diff options
59 files changed, 3335 insertions, 1429 deletions
diff --git a/jstests/core/upsert2.js b/jstests/core/upsert2.js deleted file mode 100644 index 7184ed807d1..00000000000 --- a/jstests/core/upsert2.js +++ /dev/null @@ -1,20 +0,0 @@ -// A query field with a $not operator should be excluded when constructing the object to which mods -// will be applied when performing an upsert. SERVER-8178 - -t = db.jstests_upsert2; - -// The a:$not query operator does not cause an 'a' field to be added to the upsert document. -t.drop(); -t.update( { a:{ $not:{ $lt:1 } } }, { $set:{ b:1 } }, true ); -assert( !t.findOne().a ); - -// The a:$not query operator does not cause an 'a' field to be added to the upsert document. -t.drop(); -t.update( { a:{ $not:{ $elemMatch:{ a:1 } } } }, { $set:{ b:1 } }, true ); -assert( !t.findOne().a ); - -// The a:$not query operator does not cause an 'a' field to be added to the upsert document, and as -// a result $push can be applied to the (missing) 'a' field. -t.drop(); -t.update( { a:{ $not:{ $elemMatch:{ a:1 } } } }, { $push:{ a:{ b:1, c:0 } } }, true ); -assert.eq( [ { b:1, c:0 } ], t.findOne().a ); diff --git a/jstests/core/upsert3.js b/jstests/core/upsert3.js deleted file mode 100644 index cfcfa2688d1..00000000000 --- a/jstests/core/upsert3.js +++ /dev/null @@ -1,58 +0,0 @@ -// tests to make sure no dup fields are created when using query to do upsert -var res; -t = db.upsert3; -t.drop(); - -//make sure we validate query -res = t.update( {a: {"a.a": 1}} , {$inc: {y: 1}} , true ); -assert.writeError( res, "a.a.a-1 - " + res.toString() + " doc:" + tojson(t.findOne())); - -res = t.update( {a: {$a: 1}} , {$inc: {y: 1}} , true ); -assert.writeError(res, "a.$a-1 - " + res.toString() + " doc:" + tojson(t.findOne())); - -// make sure the new _id is not duplicated -res = t.update( {"a.b": 1, a: {a: 1, b: 1}} , {$inc: {y: 1}} , true ); -assert.writeError(res, "a.b-1 - " + res.toString() + " doc:" + tojson(t.findOne())); - -res = t.update( {"_id.a": 1, _id: {a: 1, b: 1}} , {$inc : {y: 1}} , true ); -assert.writeError(res, "_id-1 - " + res.toString() + " doc:" + tojson(t.findOne())); - -res = t.update( {_id: {a: 1, b: 1}, "_id.a": 1} , { $inc: {y: 1}} , true ); -assert.writeError(res, "_id-2 - " + res.toString() + " doc:" + tojson(t.findOne())); - -// Should be redundant, but including from SERVER-11363 -res = t.update( {_id: {a: 1, b: 1}, "_id.a": 1} , {$setOnInsert: {y: 1}} , true ); -assert.writeError(res, "_id-3 - " + res.toString() + " doc:" + tojson(t.findOne())); - -//Should be redundant, but including from SERVER-11514 -res = t.update( {"a": {}, "a.c": 2} , {$set: {x: 1}}, true ); -assert.writeError(res, "a.c-1 - " + res.toString() + " doc:" + tojson(t.findOne())); - -// Should be redundant, but including from SERVER-4830 -res = t.update( {'a': {b: 1}, 'a.c': 1}, {$inc: {z: 1}}, true ); -assert.writeError(res, "a-1 - " + res.toString() + " doc:" + tojson(t.findOne())); - -// Should be redundant, but including from SERVER-4830 -res = t.update( {a: 1, "a.b": 1, a: [1, {b: 1}]}, {$inc: {z: 1}}, true ); -assert.writeError(res, "a-2 - " + res.toString() + " doc:" + tojson(t.findOne())); - -// Replacement tests -// Query is ignored for replacements, except _id field. -res = t.update( {r: {a: 1, b: 1}, "r.a": 1} , {y: 1} , true ); -assert.writeOK(res); -assert(t.findOne().y, 1, "inserted doc missing field") -var docMinusId = t.findOne(); -delete docMinusId._id -assert.docEq({y: 1}, docMinusId, "r-1") -t.drop() - -res = t.update( {_id: {a:1, b:1}, "_id.a": 1} , {y: 1} , true ); -assert.writeOK(res); -assert.docEq({_id: {a: 1, b: 1}, y: 1}, t.findOne(), "_id-4") -t.drop() - -// make sure query doesn't error when creating doc for insert, -// since it missing the rest of the dbref fields. SERVER-14024 -res = t.update( {_id:1, "foo.$id":1}, {$set : {foo:DBRef("a", 1)}}, {upsert:true} ); -assert.writeOK(res); -assert.docEq({_id: 1, foo:DBRef("a", 1)}, t.findOne()) diff --git a/jstests/core/upsert4.js b/jstests/core/upsert_and.js index 7020837adf6..7020837adf6 100644 --- a/jstests/core/upsert4.js +++ b/jstests/core/upsert_and.js diff --git a/jstests/core/upsert_fields.js b/jstests/core/upsert_fields.js new file mode 100644 index 00000000000..48ba8b595e2 --- /dev/null +++ b/jstests/core/upsert_fields.js @@ -0,0 +1,227 @@ +// +// Upsert behavior tests for field extraction +// + +var coll = db.upsert_field_gen; +coll.drop(); + +var upsertedResult = function(query, expr) { + coll.drop(); + result = coll.update(query, expr, { upsert : true }); + return result; +}; + +var upsertedField = function(query, expr, fieldName) { + assert.writeOK(upsertedResult(query, expr)); + return coll.findOne()[fieldName]; +}; + +var upsertedId = function(query, expr) { + return upsertedField(query, expr, "_id"); +}; + +var upsertedXVal = function(query, expr) { + return upsertedField(query, expr, "x"); +}; + +// +// _id field has special rules +// + +// _id autogenerated +assert.neq(null, upsertedId({}, {})); + +// _id autogenerated with add'l fields +assert.neq(null, upsertedId({}, { a : 1 })); +assert.eq(1, upsertedField({}, { a : 1 }, "a")); +assert.neq(null, upsertedId({}, { $set : { a : 1 } }, "a")); +assert.eq(1, upsertedField({}, { $set : { a : 1 } }, "a")); +assert.neq(null, upsertedId({}, { $setOnInsert : { a : 1 } }, "a")); +assert.eq(1, upsertedField({}, { $setOnInsert : { a : 1 } }, "a")); + +// _id not autogenerated +assert.eq(1, upsertedId({}, { _id : 1 })); +assert.eq(1, upsertedId({}, { $set : { _id : 1 } })); +assert.eq(1, upsertedId({}, { $setOnInsert : { _id : 1 } })); + +// _id type error +assert.writeError(upsertedResult({}, { _id : [1, 2] })); +assert.writeError(upsertedResult({}, { _id : undefined })); +assert.writeError(upsertedResult({}, { $set : { _id : [1, 2] } })); +// Fails in v2.6, no validation +assert.writeError(upsertedResult({}, { $setOnInsert : { _id : undefined } })); + +// Check things that are pretty much the same for replacement and $op style upserts +for (var i = 0; i < 3; i++) { + + // replacement style + var expr = {}; + + // $op style + if (i == 1) + expr = { $set : { a : 1 } }; + if (i == 2) + expr = { $setOnInsert : { a : 1 } }; + + var isReplStyle = i == 0; + + // _id extracted + assert.eq(1, upsertedId({ _id : 1 }, expr)); + // All below fail in v2.6, no $ops for _id and $and/$or not explored + assert.eq(1, upsertedId({ _id : { $eq : 1 } }, expr)); + assert.eq(1, upsertedId({ _id : { $all : [1] } }, expr)); + assert.eq(1, upsertedId({ $and : [{ _id : 1 }] }, expr)); + assert.eq(1, upsertedId({ $and : [{ _id : { $eq : 1 } }] }, expr)); + assert.eq(1, upsertedId({ $or : [{ _id : 1 }] }, expr)); + assert.eq(1, upsertedId({ $or : [{ _id : { $eq : 1 } }] }, expr)); + + // _id not extracted, autogenerated + assert.neq(1, upsertedId({ _id : { $gt : 1 } }, expr)); + assert.neq(1, upsertedId({ _id : { $ne : 1 } }, expr)); + assert.neq(1, upsertedId({ _id : { $in : [1] } }, expr)); + assert.neq(1, upsertedId({ _id : { $in : [1, 2] } }, expr)); + assert.neq(1, upsertedId({ _id : { $elemMatch : { $eq : 1 } } }, expr)); + assert.neq(1, upsertedId({ _id : { $exists : true } }, expr)); + assert.neq(1, upsertedId({ _id : { $not : { $eq : 1 } } }, expr)); + assert.neq(1, upsertedId({ $or : [{ _id : 1 }, { _id : 1 }] }, expr)); + assert.neq(1, upsertedId({ $or : [{ _id : { $eq : 1 } }, { _id : 2 }] }, expr)); + assert.neq(1, upsertedId({ $nor : [{ _id : 1 }] }, expr)); + assert.neq(1, upsertedId({ $nor : [{ _id : { $eq : 1 } }] }, expr)); + assert.neq(1, upsertedId({ $nor : [{ _id : { $eq : 1 } }, { _id : 1 }] }, expr)); + + // _id extraction errors + assert.writeError(upsertedResult({ _id : [1, 2] }, expr)); + assert.writeError(upsertedResult({ _id : undefined }, expr)); + assert.writeError(upsertedResult({ _id : { $eq : [1, 2] } }, expr)); + assert.writeError(upsertedResult({ _id : { $eq : undefined } }, expr)); + assert.writeError(upsertedResult({ _id : { $all : [ 1, 2 ] } }, expr)); + // All below fail in v2.6, non-_id fields completely ignored + assert.writeError(upsertedResult({ $and : [{ _id : 1 }, { _id : 1 }] }, expr)); + assert.writeError(upsertedResult({ $and : [{ _id : { $eq : 1 } }, { _id : 2 }] }, expr)); + assert.writeError(upsertedResult({ _id : 1, "_id.x" : 1 }, expr)); + assert.writeError(upsertedResult({ _id : { x : 1 }, "_id.x" : 1 }, expr)); + + // Special case - nested _id fields only used on $op-style updates + if (isReplStyle) { + // Fails in v2.6 + assert.writeError(upsertedResult({ "_id.x" : 1, "_id.y" : 2 }, expr)); + } + else { + // Fails in v2.6 + assert.docEq({ x : 1, y : 2 }, upsertedId({ "_id.x" : 1, "_id.y" : 2 }, expr)); + } +} + +// regex _id in expression is an error, no regex ids allowed +assert.writeError(upsertedResult({}, { _id : /abc/ })); +// Fails in v2.6, no validation +assert.writeError(upsertedResult({}, { $set : { _id : /abc/ } })); + +// no regex _id extraction from query +assert.neq(/abc/, upsertedId({ _id : /abc/ }, {})); + +// +// Regular field extraction +// + +// Check things that are pretty much the same for replacement and $op style upserts +for (var i = 0; i < 3; i++) { + + // replacement style + var expr = {}; + + // $op style + if (i == 1) + expr = { $set : { a : 1 } }; + if (i == 2) + expr = { $setOnInsert : { a : 1 } }; + + var isReplStyle = i == 0; + + // field extracted when replacement style + var value = isReplStyle ? undefined : 1; + assert.eq(value, upsertedXVal({ x : 1 }, expr)); + assert.eq(value, upsertedXVal({ x : { $eq : 1 } }, expr)); + assert.eq(value, upsertedXVal({ x : { $all : [1] } }, expr)); + assert.eq(value, upsertedXVal({ $and : [{ x : 1 }] }, expr)); + assert.eq(value, upsertedXVal({ $and : [{ x : { $eq : 1 } }] }, expr)); + assert.eq(value, upsertedXVal({ $or : [{ x : 1 }] }, expr)); + assert.eq(value, upsertedXVal({ $or : [{ x : { $eq : 1 } }] }, expr)); + // Special types extracted + assert.eq(isReplStyle ? undefined : [1, 2], upsertedXVal({ x : [1, 2] }, expr)); + + // field not extracted + assert.eq(undefined, upsertedXVal({ x : { $gt : 1 } }, expr)); + assert.eq(undefined, upsertedXVal({ x : { $ne : 1 } }, expr)); + assert.eq(undefined, upsertedXVal({ x : { $in : [1] } }, expr)); + assert.eq(undefined, upsertedXVal({ x : { $in : [1, 2] } }, expr)); + assert.eq(undefined, upsertedXVal({ x : { $elemMatch : { $eq : 1 } } }, expr)); + assert.eq(undefined, upsertedXVal({ x : { $exists : true } }, expr)); + assert.eq(undefined, upsertedXVal({ x : { $not : { $eq : 1 } } }, expr)); + assert.eq(undefined, upsertedXVal({ $or : [{ x : 1 }, { x : 1 }] }, expr)); + assert.eq(undefined, upsertedXVal({ $or : [{ x : { $eq : 1 } }, { x : 2 }] }, expr)); + assert.eq(undefined, upsertedXVal({ $nor : [{ x : 1 }] }, expr)); + assert.eq(undefined, upsertedXVal({ $nor : [{ x : { $eq : 1 } }] }, expr)); + assert.eq(undefined, upsertedXVal({ $nor : [{ x : { $eq : 1 } }, { x : 1 }] }, expr)); + + // field extraction errors + assert.writeError(upsertedResult({ x : undefined }, expr)); + + if (!isReplStyle) { + assert.writeError(upsertedResult({ x : { 'x.x' : 1 } }, expr)); + assert.writeError(upsertedResult({ x : { $all : [ 1, 2 ] } }, expr)); + assert.writeError(upsertedResult({ $and : [{ x : 1 }, { x : 1 }] }, expr)); + assert.writeError(upsertedResult({ $and : [{ x : { $eq : 1 } }, { x : 2 }] }, expr)); + } + else { + assert.eq(undefined, upsertedXVal({ x : { 'x.x' : 1 } }, expr)); + assert.eq(undefined, upsertedXVal({ x : { $all : [ 1, 2 ] } }, expr)); + assert.eq(undefined, upsertedXVal({ $and : [{ x : 1 }, { x : 1 }] }, expr)); + assert.eq(undefined, upsertedXVal({ $and : [{ x : { $eq : 1 } }, { x : 2 }] }, expr)); + } + + // nested field extraction + var docValue = isReplStyle ? undefined : { x : 1 }; + assert.docEq(docValue, upsertedXVal({ "x.x" : 1 }, expr)); + assert.docEq(docValue, upsertedXVal({ "x.x" : { $eq : 1 } }, expr)); + assert.docEq(docValue, upsertedXVal({ "x.x" : { $all : [1] } }, expr)); + assert.docEq(docValue, upsertedXVal({ $and : [{ "x.x" : 1 }] }, expr)); + assert.docEq(docValue, upsertedXVal({ $and : [{ "x.x" : { $eq : 1 } }] }, expr)); + assert.docEq(docValue, upsertedXVal({ $or : [{ "x.x" : 1 }] }, expr)); + assert.docEq(docValue, upsertedXVal({ $or : [{ "x.x" : { $eq : 1 } }] }, expr)); + + // nested field conflicts + if (!isReplStyle) { + assert.writeError(upsertedResult({ x : 1, "x.x" : 1 }, expr)); + assert.writeError(upsertedResult({ x : {}, "x.x" : 1 }, expr)); + assert.writeError(upsertedResult({ x : { x : 1 }, "x.x" : 1 }, expr)); + assert.writeError(upsertedResult({ x : { x : 1 }, "x.y" : 1 }, expr)); + assert.writeError(upsertedResult({ x : [1, { x : 1 }], "x.x" : 1 }, expr)); + } + else { + assert.eq(undefined, upsertedXVal({ x : 1, "x.x" : 1 }, expr)); + assert.eq(undefined, upsertedXVal({ x : {}, "x.x" : 1 }, expr)); + assert.eq(undefined, upsertedXVal({ x : { x : 1 }, "x.x" : 1 }, expr)); + assert.eq(undefined, upsertedXVal({ x : { x : 1 }, "x.y" : 1 }, expr)); + assert.eq(undefined, upsertedXVal({ x : [1, { x : 1 }], "x.x" : 1 }, expr)); + } + +} + +// regex field in expression is a value +assert.eq(/abc/, upsertedXVal({}, { x : /abc/ })); +assert.eq(/abc/, upsertedXVal({}, { $set : { x : /abc/ } })); + +// no regex field extraction from query unless $eq'd +assert.eq(/abc/, upsertedXVal({ x : { $eq : /abc/ } }, { $set : { a : 1 } })); +assert.eq(undefined, upsertedXVal({ x : /abc/ }, { $set : { a : 1 } })); + +// replacement-style updates ignore conflicts *except* on _id field +assert.eq(1, upsertedId({ _id : 1, x : [1, { x : 1 }], "x.x" : 1 }, {})); + +// DBRef special cases +// make sure query doesn't error when creating doc for insert, since it's missing the rest of the +// dbref fields. SERVER-14024 +// Fails in 2.6.1->3 +assert.docEq(tojson(DBRef("a", 1)), upsertedXVal({ "x.$id" : 1 }, + { $set : { x : DBRef("a", 1) } })); diff --git a/jstests/core/upsert1.js b/jstests/core/upsert_shell.js index f8c97e41c84..ab502f7ddb5 100644 --- a/jstests/core/upsert1.js +++ b/jstests/core/upsert_shell.js @@ -1,4 +1,4 @@ -// tests to make sure that the new _id is returned after the insert +// tests to make sure that the new _id is returned after the insert in the shell var l; t = db.upsert1; t.drop(); diff --git a/jstests/sharding/regex_targeting.js b/jstests/sharding/regex_targeting.js index 3fad66e2c51..6fffabb1e51 100644 --- a/jstests/sharding/regex_targeting.js +++ b/jstests/sharding/regex_targeting.js @@ -169,20 +169,41 @@ assert.writeOK(collHashed.update({ hash : /abcde.*/ }, { multi : true })); assert.eq(collHashed.find().itcount(), collHashed.find({ updated : true }).itcount()); - // // -// Upsert by regex should fail on sharded collections -// Regex is not targeted in queries, so can't be targeted for updates +// Upsert with op-style regex should fail on sharded collections +// Query clause is targeted, and regex in query clause is ambiguous collSharded.remove({}); -assert.writeError(collSharded.update({ a : /abcde.*/ }, { a : /abcde.*/ }, { upsert : true })); -assert.writeError(collCompound.update({ a : /abcde.*/ }, { a : /abcde.*/ }, { upsert : true })); -assert.writeError(collSharded.update({ a : /abcde.*/ }, { a : /abcde.*/ }, { upsert : true })); -assert.writeError(collNested.update({ a : { b : /abcde.*/ } }, { a : { b : /abcde.*/ } }, +collCompound.remove({}); +collNested.remove({}); +assert.writeError(collSharded.update({ a : /abcde.*/ }, { $set : { a : /abcde.*/ } }, + { upsert : true })); +assert.writeError(collCompound.update({ a : /abcde.*/ }, { $set : { a : /abcde.*/, b : 1 } }, + { upsert : true })); +// Exact regex in query never equality +assert.writeError(collNested.update({ 'a.b' : /abcde.*/ }, { $set : { 'a.b' : /abcde.*/ } }, { upsert : true })); -assert.writeError(collNested.update({ c : 1 }, { a : { b : /abcde.*/ } }, +// Even nested regexes are not extracted in queries +assert.writeError(collNested.update({ a : { b : /abcde.*/ } }, { $set : { 'a.b' : /abcde.*/ } }, + { upsert : true })); +assert.writeError(collNested.update({ c : 1 }, { $set : { 'a.b' : /abcde.*/ } }, { upsert : true })); +// +// +// Upsert by replacement-style regex should succeed on sharded collections +// Replacement clause is targeted, and regex is unambiguously a value +collSharded.remove({}); +collCompound.remove({}); +collNested.remove({}); +assert.writeOK(collSharded.update({ a : /abcde.*/ }, { a : /abcde.*/ }, { upsert : true })); +assert.writeOK(collCompound.update({ a : /abcde.*/ }, { a : /abcde.*/, b : 1 }, { upsert : true })); +assert.writeOK(collNested.update({ 'a.b' : /abcde.*/ }, { a : { b : /abcde.*/ } }, + { upsert : true })); +assert.writeOK(collNested.update({ a : { b : /abcde.*/ } }, { a : { b : /abcde.*/ } }, + { upsert : true })); +assert.writeOK(collNested.update({ c : 1 }, { a : { b : /abcde.*/ } }, + { upsert : true })); // // diff --git a/jstests/sharding/update1.js b/jstests/sharding/update1.js deleted file mode 100644 index d555331bc7a..00000000000 --- a/jstests/sharding/update1.js +++ /dev/null @@ -1,57 +0,0 @@ -// Test simple updates issued through mongos. Updates have different constraints through mongos, -// since shard key is immutable. - -s = new ShardingTest( "auto1" , 2 , 1 , 1 ); - -s.adminCommand( { enablesharding : "test" } ); -// repeat same tests with hashed shard key, to ensure identical behavior -s.adminCommand( { shardcollection : "test.update0" , key : { key : 1 } } ); -s.adminCommand( { shardcollection : "test.update1" , key : { key : "hashed" } } ); - -db = s.getDB( "test" ) -for(i=0; i < 2; i++){ - coll = db.getCollection("update" + i); - - coll.insert({_id:1, key:1}); - - // these are both upserts - coll.save({_id:2, key:2}); - coll.update({_id:3, key:3}, {$set: {foo: 'bar'}}, {upsert: true}); - - assert.eq(coll.count(), 3, "count A") - assert.eq(coll.findOne({_id:3}).key, 3 , "findOne 3 key A") - assert.eq(coll.findOne({_id:3}).foo, 'bar' , "findOne 3 foo A") - - // update existing using save() - coll.save({_id:1, key:1, other:1}); - - // update existing using update() - coll.update({_id:2}, {key:2, other:2}); - coll.update({_id:3}, {key:3, other:3}); - - // do a replacement-style update which queries the shard key and keeps it constant - coll.save( {_id:4, key:4} ); - coll.update({key:4}, {key:4, other:4}); - assert.eq( coll.find({key:4, other:4}).count() , 1 , 'replacement update error'); - coll.remove( {_id:4} ) - - assert.eq(coll.count(), 3, "count B") - coll.find().forEach(function(x){ - assert.eq(x._id, x.key, "_id == key"); - assert.eq(x._id, x.other, "_id == other"); - }); - - assert.writeError(coll.update({ _id: 1, key: 1 }, { $set: { key: 2 }})); - assert.eq(coll.findOne({_id:1}).key, 1, 'key unchanged'); - - assert.writeOK(coll.update({ _id: 1, key: 1 }, { $set: { foo: 2 }})); - - coll.update( { key : 17 } , { $inc : { x : 5 } } , true ); - assert.eq( 5 , coll.findOne( { key : 17 } ).x , "up1" ) - - coll.update( { key : 18 } , { $inc : { x : 5 } } , true , true ); - assert.eq( 5 , coll.findOne( { key : 18 } ).x , "up2" ) -} - -s.stop() - diff --git a/jstests/sharding/update_sharded.js b/jstests/sharding/update_sharded.js new file mode 100644 index 00000000000..466e7bfdfa8 --- /dev/null +++ b/jstests/sharding/update_sharded.js @@ -0,0 +1,99 @@ +// Test simple updates issued through mongos. Updates have different constraints through mongos, +// since shard key is immutable. + +s = new ShardingTest( "auto1" , 2 , 1 , 1 ); + +s.adminCommand( { enablesharding : "test" } ); +// repeat same tests with hashed shard key, to ensure identical behavior +s.adminCommand( { shardcollection : "test.update0" , key : { key : 1 } } ); +s.adminCommand( { shardcollection : "test.update1" , key : { key : "hashed" } } ); + +db = s.getDB( "test" ) +for(i=0; i < 2; i++){ + coll = db.getCollection("update" + i); + + coll.insert({_id:1, key:1}); + + // these are both upserts + coll.save({_id:2, key:2}); + coll.update({_id:3, key:3}, {$set: {foo: 'bar'}}, {upsert: true}); + + assert.eq(coll.count(), 3, "count A") + assert.eq(coll.findOne({_id:3}).key, 3 , "findOne 3 key A") + assert.eq(coll.findOne({_id:3}).foo, 'bar' , "findOne 3 foo A") + + // update existing using save() + coll.save({_id:1, key:1, other:1}); + + // update existing using update() + coll.update({_id:2}, {key:2, other:2}); + coll.update({_id:3}, {key:3, other:3}); + + // do a replacement-style update which queries the shard key and keeps it constant + coll.save( {_id:4, key:4} ); + coll.update({key:4}, {key:4, other:4}); + assert.eq( coll.find({key:4, other:4}).count() , 1 , 'replacement update error'); + coll.remove( {_id:4} ) + + assert.eq(coll.count(), 3, "count B") + coll.find().forEach(function(x){ + assert.eq(x._id, x.key, "_id == key"); + assert.eq(x._id, x.other, "_id == other"); + }); + + assert.writeError(coll.update({ _id: 1, key: 1 }, { $set: { key: 2 }})); + assert.eq(coll.findOne({_id:1}).key, 1, 'key unchanged'); + + assert.writeOK(coll.update({ _id: 1, key: 1 }, { $set: { foo: 2 }})); + + coll.update( { key : 17 } , { $inc : { x : 5 } } , true ); + assert.eq( 5 , coll.findOne( { key : 17 } ).x , "up1" ) + + coll.update( { key : 18 } , { $inc : { x : 5 } } , true , true ); + assert.eq( 5 , coll.findOne( { key : 18 } ).x , "up2" ) + + // Make sure we can extract exact _id from certain queries + assert.writeOK(coll.update({_id : ObjectId()}, {$set : {x : 1}}, {multi : false})); + assert.writeOK(coll.update({_id : {$eq : ObjectId()}}, {$set : {x : 1}}, {multi : false})); + assert.writeOK(coll.update({_id : {$all : [ObjectId()]}}, {$set : {x : 1}}, {multi : false})); + assert.writeOK(coll.update({$or : [{_id : ObjectId()}]}, {$set : {x : 1}}, {multi : false})); + assert.writeOK(coll.update({$and : [{_id : ObjectId()}]}, {$set : {x : 1}}, {multi : false})); + + // Invalid extraction of exact _id from query + assert.writeError(coll.update({}, {$set : {x : 1}}, {multi : false})); + assert.writeError(coll.update({_id : {$gt : ObjectId()}}, {$set : {x : 1}}, {multi : false})); + assert.writeError(coll.update({_id : {$in : [ObjectId()]}}, {$set : {x : 1}}, {multi : false})); + assert.writeError(coll.update({$or : [{_id : ObjectId()}, {_id : ObjectId()}]}, + {$set : {x : 1}}, + {multi : false})); + assert.writeError(coll.update({$and : [{_id : ObjectId()}, {_id : ObjectId()}]}, + {$set : {x : 1}}, + {multi : false})); + assert.writeError(coll.update({'_id.x' : ObjectId()}, {$set : {x : 1}}, {multi : false})); + + // Make sure we can extract exact shard key from certain queries + assert.writeOK(coll.update({key : ObjectId()}, {$set : {x : 1}}, {multi : false})); + assert.writeOK(coll.update({key : {$eq : ObjectId()}}, {$set : {x : 1}}, {multi : false})); + assert.writeOK(coll.update({key : {$all : [ObjectId()]}}, {$set : {x : 1}}, {multi : false})); + assert.writeOK(coll.update({$or : [{key : ObjectId()}]}, {$set : {x : 1}}, {multi : false})); + assert.writeOK(coll.update({$and : [{key : ObjectId()}]}, {$set : {x : 1}}, {multi : false})); + + // Invalid extraction of exact key from query + assert.writeError(coll.update({}, {$set : {x : 1}}, {multi : false})); + assert.writeError(coll.update({key : {$gt : ObjectId()}}, {$set : {x : 1}}, {multi : false})); + assert.writeError(coll.update({key : {$in : [ObjectId()]}}, {$set : {x : 1}}, {multi : false})); + assert.writeError(coll.update({$or : [{key : ObjectId()}, {key : ObjectId()}]}, + {$set : {x : 1}}, + {multi : false})); + assert.writeError(coll.update({$and : [{key : ObjectId()}, {key : ObjectId()}]}, + {$set : {x : 1}}, + {multi : false})); + assert.writeError(coll.update({'key.x' : ObjectId()}, {$set : {x : 1}}, {multi : false})); + + // Make sure failed shard key or _id extraction doesn't affect the other + assert.writeOK(coll.update({'_id.x' : ObjectId(), key : 1}, {$set : {x : 1}}, {multi : false})); + assert.writeOK(coll.update({_id : ObjectId(), 'key.x' : 1}, {$set : {x : 1}}, {multi : false})); +} + +s.stop() + diff --git a/jstests/sharding/upsert_sharded.js b/jstests/sharding/upsert_sharded.js new file mode 100644 index 00000000000..0d270dc1f5e --- /dev/null +++ b/jstests/sharding/upsert_sharded.js @@ -0,0 +1,108 @@ +// +// Upsert behavior tests for sharding +// NOTE: Generic upsert behavior tests belong in the core suite +// + +var options = { separateConfig : true, shardOptions : { verbose : 2 } }; + +var st = new ShardingTest({ shards : 2, mongos : 1, other : options }); +st.stopBalancer(); + +var mongos = st.s0; +var admin = mongos.getDB( "admin" ); +var shards = mongos.getCollection( "config.shards" ).find().toArray(); +var coll = mongos.getCollection( "foo.bar" ); + +assert( admin.runCommand({ enableSharding : coll.getDB() + "" }).ok ); + +var upsertedResult = function(query, expr) { + coll.remove({}); + result = coll.update(query, expr, { upsert : true }); + return result; +}; + +var upsertedField = function(query, expr, fieldName) { + assert.writeOK(upsertedResult(query, expr)); + return coll.findOne()[fieldName]; +}; + +var upsertedId = function(query, expr) { + return upsertedField(query, expr, "_id"); +}; + +var upsertedXVal = function(query, expr) { + return upsertedField(query, expr, "x"); +}; + +printjson( admin.runCommand({ movePrimary : coll.getDB() + "", to : shards[0]._id }) ); +assert( admin.runCommand({ shardCollection : coll + "", key : { x : 1 } }).ok ); +assert( admin.runCommand({ split : coll + "", middle : { x : 0 } }).ok ); +assert( admin.runCommand({ moveChunk : coll + "", + find : { x : 0 }, + to : shards[1]._id, + _waitForDelete : true }).ok ); + +st.printShardingStatus(); + +// upserted update replacement would result in no shard key +assert.writeError(upsertedResult({ x : 1 }, {})); + +// updates with upsert must contain shard key in query when $op style +assert.eq(1, upsertedXVal({ x : 1 }, { $set : { a : 1 } })); +assert.eq(1, upsertedXVal({ x : { $eq : 1 } }, { $set : { a : 1 } })); +assert.eq(1, upsertedXVal({ x : { $all : [1] } }, { $set : { a : 1 } })); +assert.eq(1, upsertedXVal({ $and : [{ x : { $eq : 1 } }] }, { $set : { a : 1 } })); +assert.eq(1, upsertedXVal({ $or : [{ x : { $eq : 1 } }] }, { $set : { a : 1 } })); + +// shard key not extracted +assert.writeError(upsertedResult({}, { $set : { a : 1, x : 1 } })); +assert.writeError(upsertedResult({ x : { $gt : 1 } }, { $set : { a : 1, x : 1 } })); +assert.writeError(upsertedResult({ x : { $in : [1] } }, { $set : { a : 1, x : 1 } })); + +// Shard key type errors +assert.writeError(upsertedResult({ x : undefined }, { $set : { a : 1 } })); +assert.writeError(upsertedResult({ x : [1, 2] }, { $set : { a : 1 } })); +assert.writeError(upsertedResult({ x : { $eq : { $gt : 5 } } }, { $set : { a : 1 } })); +// Regex shard key is not extracted from queries, even exact matches +assert.writeError(upsertedResult({ x : { $eq : /abc/ } }, { $set : { a : 1 } })); + +// nested field extraction always fails with non-nested key - like _id, we require setting the +// elements directly +assert.writeError(upsertedResult({ "x.x" : 1 }, { $set : { a : 1 } })); +assert.writeError(upsertedResult({ "x.x" : { $eq : 1 } }, { $set : { a : 1 } })); + +coll.drop(); + +printjson( admin.runCommand({ movePrimary : coll.getDB() + "", to : shards[0]._id }) ); +assert( admin.runCommand({ shardCollection : coll + "", key : { 'x.x' : 1 } }).ok ); +assert( admin.runCommand({ split : coll + "", middle : { 'x.x' : 0 } }).ok ); +assert( admin.runCommand({ moveChunk : coll + "", + find : { 'x.x' : 0 }, + to : shards[1]._id, + _waitForDelete : true }).ok ); + +st.printShardingStatus(); + +// nested field extraction with nested shard key +assert.docEq({ x : 1 }, upsertedXVal({ "x.x" : 1 }, { $set : { a : 1 } })); +assert.docEq({ x : 1 }, upsertedXVal({ "x.x" : { $eq : 1 } }, { $set : { a : 1 } })); +assert.docEq({ x : 1 }, upsertedXVal({ "x.x" : { $all : [1] } }, { $set : { a : 1 } })); +assert.docEq({ x : 1 }, upsertedXVal({ $and : [{ "x.x" : { $eq : 1 } }] }, { $set : { a : 1 } })); +assert.docEq({ x : 1 }, upsertedXVal({ $or : [{ "x.x" : { $eq : 1 } }] }, { $set : { a : 1 } })); + +// Can specify siblings of nested shard keys +assert.docEq({ x : 1, y : 1 }, upsertedXVal({ "x.x" : 1, "x.y" : 1 }, { $set : { a : 1 } })); +assert.docEq({ x : 1, y : { z : 1 } }, + upsertedXVal({ "x.x" : 1, "x.y.z" : 1 }, { $set : { a : 1 } })); + +// No arrays at any level +assert.writeError(upsertedResult({ "x.x" : [] }, { $set : { a : 1 } })); +assert.writeError(upsertedResult({ x : { x : [] } }, { $set : { a : 1 } })); +assert.writeError(upsertedResult({ x : [{ x : 1 }] }, { $set : { a : 1 } })); + +// Can't set sub-fields of nested key +assert.writeError(upsertedResult({ "x.x.x" : { $eq : 1 } }, { $set : { a : 1 } })); + +jsTest.log("DONE!"); +st.stop(); + diff --git a/src/mongo/SConscript b/src/mongo/SConscript index a321e666b99..6d07f4f422d 100644 --- a/src/mongo/SConscript +++ b/src/mongo/SConscript @@ -467,7 +467,6 @@ coredbEnv.Library("coredb", [ "db/commands/write_commands/write_commands_common.cpp", "db/pipeline/pipeline.cpp", "db/dbcommands_generic.cpp", - "db/keypattern.cpp", "db/matcher/matcher.cpp", "db/pipeline/accumulator_add_to_set.cpp", "db/pipeline/accumulator_avg.cpp", @@ -747,11 +746,10 @@ env.Library('coreshard', [# This is only here temporarily for auto-split logic i # No good reason to be here other than chunk.cpp needs this. 's/config_server_checker_service.cpp', 's/shard.cpp', - 's/shardkey.cpp', 's/shard_key_pattern.cpp'], LIBDEPS=['s/base', 's/cluster_ops_impl']); - + mongosLibraryFiles = [ "s/strategy.cpp", "s/commands_admin.cpp", @@ -784,6 +782,13 @@ env.Library( "mongoscore", 's/upgrade', ] ) +env.CppUnitTest("shard_key_pattern_test", [ "s/shard_key_pattern_test.cpp" ], + LIBDEPS=["mongoscore", + "coreshard", + "mongocommon", + "coreserver", + "coredb"]) + env.CppUnitTest( "balancer_policy_test" , [ "s/balancer_policy_tests.cpp" ] , LIBDEPS=["mongoscore", "coreshard", diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err index 333285ab1a4..99085b86971 100644 --- a/src/mongo/base/error_codes.err +++ b/src/mongo/base/error_codes.err @@ -110,6 +110,7 @@ error_code("LockFailed", 107) error_code("InconsistentReplicaSetNames", 108) error_code("ConfigurationInProgress", 109) error_code("CannotInitializeNodeWithData", 110) +error_code("NotExactValueField", 111) # Non-sequential error codes (for compatibility only) error_code("NotMaster", 10107) #this comes from assert_util.h @@ -122,6 +123,7 @@ error_code("BackgroundOperationInProgressForDatabase", 12586); error_code("BackgroundOperationInProgressForNamespace", 12587); error_code("NotMasterOrSecondaryCode", 13436); error_code("NotMasterNoSlaveOkCode", 13435); +error_code("ShardKeyTooBig", 13334); error_class("NetworkError", ["HostUnreachable", "HostNotFound", "NetworkTimeout"]) error_class("Interruption", ["Interrupted", "InterruptedAtShutdown", "ExceededTimeLimit"]) diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index 83c30a6711d..4b2a739d80c 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -15,11 +15,13 @@ env.Library( 'field_ref_set.cpp', 'field_parser.cpp', 'get_status_from_command_result.cpp', + 'keypattern.cpp', 'write_concern_options.cpp' ], LIBDEPS=[ '$BUILD_DIR/mongo/bson', '$BUILD_DIR/mongo/foundation', + '$BUILD_DIR/mongo/index_names' ], ) @@ -59,6 +61,14 @@ env.CppUnitTest( ) env.CppUnitTest( + target= 'keypattern_test', + source= 'keypattern_test.cpp', + LIBDEPS=[ + 'common' + ], +) + +env.CppUnitTest( target="dbmessage_test", source=[ "dbmessage_test.cpp" diff --git a/src/mongo/db/auth/authorization_manager_test.cpp b/src/mongo/db/auth/authorization_manager_test.cpp index 64ca474ec48..2f29749722a 100644 --- a/src/mongo/db/auth/authorization_manager_test.cpp +++ b/src/mongo/db/auth/authorization_manager_test.cpp @@ -152,7 +152,8 @@ namespace { class AuthorizationManagerTest : public ::mongo::unittest::Test { public: virtual ~AuthorizationManagerTest() { - authzManager->invalidateUserCache(); + if (authzManager) + authzManager->invalidateUserCache(); } void setUp() { diff --git a/src/mongo/db/auth/authz_manager_external_state_mock.cpp b/src/mongo/db/auth/authz_manager_external_state_mock.cpp index 638e6f9465a..377be4b36ea 100644 --- a/src/mongo/db/auth/authz_manager_external_state_mock.cpp +++ b/src/mongo/db/auth/authz_manager_external_state_mock.cpp @@ -204,7 +204,7 @@ namespace { if (query.hasField("_id")) { document.root().appendElement(query["_id"]); } - status = driver.populateDocumentWithQueryFields(query, document); + status = driver.populateDocumentWithQueryFields(query, NULL, document); if (!status.isOK()) { return status; } diff --git a/src/mongo/db/auth/role_graph_update.cpp b/src/mongo/db/auth/role_graph_update.cpp index 6236bcc50ae..71e9370ec61 100644 --- a/src/mongo/db/auth/role_graph_update.cpp +++ b/src/mongo/db/auth/role_graph_update.cpp @@ -181,7 +181,8 @@ namespace { status = AuthorizationManager::getBSONForRole( roleGraph, roleToUpdate, roleDocument.root()); if (status == ErrorCodes::RoleNotFound) { - status = driver.populateDocumentWithQueryFields(queryPattern, roleDocument); + // The query pattern will only contain _id, no other immutable fields are present + status = driver.populateDocumentWithQueryFields(queryPattern, NULL, roleDocument); } if (!status.isOK()) return status; diff --git a/src/mongo/db/commands/create_indexes.cpp b/src/mongo/db/commands/create_indexes.cpp index c978b5ec046..353d8979690 100644 --- a/src/mongo/db/commands/create_indexes.cpp +++ b/src/mongo/db/commands/create_indexes.cpp @@ -214,11 +214,12 @@ namespace mongo { shardingState.getCollectionMetadata( ns.toString() )); if ( metadata ) { - BSONObj shardKey(metadata->getKeyPattern()); - if ( !isUniqueIndexCompatible( shardKey, newIdxKey )) { + ShardKeyPattern shardKeyPattern(metadata->getKeyPattern()); + if (!shardKeyPattern.isUniqueIndexCompatible(newIdxKey)) { return Status(ErrorCodes::CannotCreateIndex, - str::stream() << "cannot create unique index over " << newIdxKey - << " with shard key pattern " << shardKey); + str::stream() << "cannot create unique index over " << newIdxKey + << " with shard key pattern " + << shardKeyPattern.toBSON()); } } } diff --git a/src/mongo/db/commands/mr.cpp b/src/mongo/db/commands/mr.cpp index 4717eea89c7..d4ced8c5806 100644 --- a/src/mongo/db/commands/mr.cpp +++ b/src/mongo/db/commands/mr.cpp @@ -58,6 +58,7 @@ #include "mongo/s/collection_metadata.h" #include "mongo/s/d_state.h" #include "mongo/s/grid.h" +#include "mongo/s/shard_key_pattern.h" #include "mongo/s/stale_exception.h" #include "mongo/util/log.h" #include "mongo/util/scopeguard.h" @@ -1371,7 +1372,7 @@ namespace mongo { // check to see if this is a new object we don't own yet // because of a chunk migration if ( collMetadata ) { - KeyPattern kp( collMetadata->getKeyPattern() ); + ShardKeyPattern kp( collMetadata->getKeyPattern() ); if (!collMetadata->keyBelongsToMe(kp.extractShardKeyFromDoc(o))) { continue; } diff --git a/src/mongo/db/commands/write_commands/batch_executor.cpp b/src/mongo/db/commands/write_commands/batch_executor.cpp index b71978dc5a6..4e0eac5a579 100644 --- a/src/mongo/db/commands/write_commands/batch_executor.cpp +++ b/src/mongo/db/commands/write_commands/batch_executor.cpp @@ -486,8 +486,8 @@ namespace mongo { CollectionMetadataPtr metadata = shardingState->getCollectionMetadata( nss.ns() ); if ( metadata ) { - if ( !isUniqueIndexCompatible( metadata->getKeyPattern(), - request.getIndexKeyPattern() ) ) { + ShardKeyPattern shardKeyPattern(metadata->getKeyPattern()); + if (!shardKeyPattern.isUniqueIndexCompatible(request.getIndexKeyPattern())) { result->setError(new WriteErrorDetail); buildUniqueIndexError(metadata->getKeyPattern(), diff --git a/src/mongo/db/dbhelpers.cpp b/src/mongo/db/dbhelpers.cpp index e0c7cfaaa8e..334e87895d8 100644 --- a/src/mongo/db/dbhelpers.cpp +++ b/src/mongo/db/dbhelpers.cpp @@ -43,6 +43,7 @@ #include "mongo/db/db.h" #include "mongo/db/exec/working_set_common.h" #include "mongo/db/json.h" +#include "mongo/db/keypattern.h" #include "mongo/db/index/btree_access_method.h" #include "mongo/db/ops/delete.h" #include "mongo/db/ops/update.h" @@ -60,6 +61,7 @@ #include "mongo/db/storage_options.h" #include "mongo/db/catalog/collection.h" #include "mongo/s/d_state.h" +#include "mongo/s/shard_key_pattern.h" #include "mongo/util/log.h" namespace mongo { @@ -420,7 +422,7 @@ namespace mongo { bool docIsOrphan; if ( metadataNow ) { - KeyPattern kp( metadataNow->getKeyPattern() ); + ShardKeyPattern kp( metadataNow->getKeyPattern() ); BSONObj key = kp.extractShardKeyFromDoc(obj); docIsOrphan = !metadataNow->keyBelongsToMe( key ) && !metadataNow->keyIsPending( key ); diff --git a/src/mongo/db/exec/shard_filter.cpp b/src/mongo/db/exec/shard_filter.cpp index 1680c67e1ed..d27cce9ad6b 100644 --- a/src/mongo/db/exec/shard_filter.cpp +++ b/src/mongo/db/exec/shard_filter.cpp @@ -33,8 +33,7 @@ #include "mongo/db/exec/filter.h" #include "mongo/db/exec/scoped_timer.h" #include "mongo/db/exec/working_set_common.h" -#include "mongo/db/keypattern.h" -#include "mongo/s/stale_exception.h" +#include "mongo/s/shard_key_pattern.h" #include "mongo/util/log.h" namespace mongo { @@ -69,7 +68,7 @@ namespace mongo { // aborted migrations if (_metadata) { - KeyPattern shardKeyPattern(_metadata->getKeyPattern()); + ShardKeyPattern shardKeyPattern(_metadata->getKeyPattern()); WorkingSetMember* member = _ws->get(*out); WorkingSetMatchableDocument matchable(member); BSONObj shardKey = shardKeyPattern.extractShardKeyFromMatchable(matchable); diff --git a/src/mongo/db/exec/update.cpp b/src/mongo/db/exec/update.cpp index aa5a895df70..391d09126f0 100644 --- a/src/mongo/db/exec/update.cpp +++ b/src/mongo/db/exec/update.cpp @@ -347,8 +347,7 @@ namespace mongo { return Status(ErrorCodes::ImmutableField, mongoutils::str::stream() << "After applying the update to the document {" - << (oldIdElem.ok() ? oldIdElem.toString() : - newIdElem.toString()) + << oldElem.toString() << " , ...}, the (immutable) field '" << current.dottedField() << "' was found to have been altered to " << newElem.toString()); @@ -592,24 +591,23 @@ namespace mongo { // Reset the document we will be writing to _doc.reset(); - // This remains the empty object in the case of an object replacement, but in the case - // of an upsert where we are creating a base object from the query and applying mods, - // we capture the query as the original so that we can detect immutable field mutations. - BSONObj original = BSONObj(); + // The original document we compare changes to - immutable paths must not change + BSONObj original; + + bool isInternalRequest = request->isFromReplication() || request->isFromMigration(); + + const vector<FieldRef*>* immutablePaths = NULL; + if (!isInternalRequest && lifecycle) + immutablePaths = lifecycle->getImmutableFields(); // Calling populateDocumentWithQueryFields will populate the '_doc' with fields from the // query which creates the base of the update for the inserted doc (because upsert // was true). if (cq) { - uassertStatusOK(driver->populateDocumentWithQueryFields(cq, _doc)); - if (!driver->isDocReplacement()) { + uassertStatusOK(driver->populateDocumentWithQueryFields(cq, immutablePaths, _doc)); + if (driver->isDocReplacement()) _specificStats.fastmodinsert = true; - // We need all the fields from the query to compare against for validation below. - original = _doc.getObject(); - } - else { - original = request->getQuery(); - } + original = _doc.getObject(); } else { fassert(17354, CanonicalQuery::isSimpleIdQuery(request->getQuery())); @@ -630,17 +628,13 @@ namespace mongo { // Validate that the object replacement or modifiers resulted in a document // that contains all the immutable keys and can be stored if it isn't coming // from a migration or via replication. - if (!(request->isFromReplication() || request->isFromMigration())){ - const std::vector<FieldRef*>* immutableFields = NULL; - if (lifecycle) - immutableFields = lifecycle->getImmutableFields(); - + if (!isInternalRequest){ FieldRefSet noFields; // This will only validate the modified fields if not a replacement. uassertStatusOK(validate(original, noFields, _doc, - immutableFields, + immutablePaths, driver->modOptions()) ); } diff --git a/src/mongo/db/field_ref.cpp b/src/mongo/db/field_ref.cpp index b6ad89c5938..840a62cdc7b 100644 --- a/src/mongo/db/field_ref.cpp +++ b/src/mongo/db/field_ref.cpp @@ -198,25 +198,36 @@ namespace mongo { } StringData FieldRef::dottedField( size_t offset ) const { - if (_size == 0 || offset >= numParts() ) + return dottedSubstring(offset, numParts()); + } + + StringData FieldRef::dottedSubstring(size_t startPart, size_t endPart) const { + if (_size == 0 || startPart >= endPart || endPart > numParts()) return StringData(); if (!_replacements.empty()) reserialize(); dassert(_replacements.empty()); - // Assume we want the whole thing StringData result(_dotted); - // Strip off any leading parts we were asked to ignore - for (size_t i = 0; i < offset; ++i) { - const StringData part = getPart(i); - result = StringData( - result.rawData() + part.size() + 1, - result.size() - part.size() - 1); + // Fast-path if we want the whole thing + if (startPart == 0 && endPart == numParts()) + return result; + + size_t startChar = 0; + for (size_t i = 0; i < startPart; ++i) { + startChar += getPart(i).size() + 1; // correct for '.' + } + size_t endChar = startChar; + for (size_t i = startPart; i < endPart; ++i) { + endChar += getPart(i).size() + 1; } + // correct for last '.' + if (endPart != numParts()) + --endChar; - return result; + return result.substr(startChar, endChar - startChar); } bool FieldRef::equalsDottedField( const StringData& other ) const { diff --git a/src/mongo/db/field_ref.h b/src/mongo/db/field_ref.h index d35a94d284d..51b3f642985 100644 --- a/src/mongo/db/field_ref.h +++ b/src/mongo/db/field_ref.h @@ -89,12 +89,18 @@ namespace mongo { size_t commonPrefixSize( const FieldRef& other ) const; /** - * Returns a copy of the full dotted field in its current state (i.e., some parts may + * Returns a StringData of the full dotted field in its current state (i.e., some parts may * have been replaced since the parse() call). */ StringData dottedField( size_t offsetFromStart = 0 ) const; /** + * Returns a StringData of parts of the dotted field from startPart to endPart in its + * current state (i.e., some parts may have been replaced since the parse() call). + */ + StringData dottedSubstring(size_t startPart, size_t endPart) const; + + /** * Compares the full dotted path represented by this FieldRef to other */ bool equalsDottedField( const StringData& other ) const; diff --git a/src/mongo/db/field_ref_set.cpp b/src/mongo/db/field_ref_set.cpp index 509de2845fc..efc49f2dd1d 100644 --- a/src/mongo/db/field_ref_set.cpp +++ b/src/mongo/db/field_ref_set.cpp @@ -33,6 +33,8 @@ namespace mongo { + using std::vector; + using std::string; namespace str = mongoutils::str; namespace { @@ -59,6 +61,10 @@ namespace mongo { FieldRefSet::FieldRefSet() { } + FieldRefSet::FieldRefSet(const vector<FieldRef*>& paths) { + fillFrom(paths); + } + bool FieldRefSet::findConflicts(const FieldRef* toCheck, FieldRefSet* conflicts) const { bool foundConflict = false; diff --git a/src/mongo/db/field_ref_set.h b/src/mongo/db/field_ref_set.h index e7258c2a184..0403f9265b2 100644 --- a/src/mongo/db/field_ref_set.h +++ b/src/mongo/db/field_ref_set.h @@ -29,6 +29,7 @@ #pragma once #include <set> +#include <vector> #include "mongo/base/disallow_copying.h" #include "mongo/base/owned_pointer_vector.h" @@ -38,9 +39,14 @@ namespace mongo { /** - * A FieldRefSet holds a set of FieldRefs's that do not conflict with one another, that is, - * they target different subtrees of a given document. Two fieldRef's would conflict if they - * are equal or one is prefix of the other. + * A FieldRefSet holds a number of unique FieldRefs - a set of dotted paths into a document. + * + * The FieldRefSet provides helpful functions for efficiently finding conflicts between field + * ref paths - field ref paths conflict if they are equal to each other or if one is a prefix. + * To maintain a FieldRefSet of non-conflicting paths, always use the insert method which + * returns conflicting FieldRefs. + * + * FieldRefSets do not own the FieldRef paths they contain. */ class FieldRefSet { MONGO_DISALLOW_COPYING(FieldRefSet); @@ -57,6 +63,8 @@ namespace mongo { FieldRefSet(); + FieldRefSet(const std::vector<FieldRef*>& paths); + /** Returns 'true' if the set is empty */ bool empty() const { return _fieldSet.empty(); @@ -71,6 +79,15 @@ namespace mongo { } /** + * Returns true if the path does not already exist in the set, false otherwise. + * + * Note that *no* conflict resolution occurs - any path can be inserted into a set. + */ + inline bool insert(const FieldRef* path) { + return _fieldSet.insert(path).second; + } + + /** * Returns true if the field 'toInsert' can be added in the set without * conflicts. Otherwise returns false and fill in '*conflict' with the field 'toInsert' * clashed with. @@ -83,6 +100,8 @@ namespace mongo { /** * Fills the set with the supplied FieldRef*s + * + * Note that *no* conflict resolution occurs here. */ void fillFrom(const std::vector<FieldRef*>& fields); diff --git a/src/mongo/db/field_ref_test.cpp b/src/mongo/db/field_ref_test.cpp index 5ea2009ac25..02390302190 100644 --- a/src/mongo/db/field_ref_test.cpp +++ b/src/mongo/db/field_ref_test.cpp @@ -277,4 +277,46 @@ namespace { ASSERT_EQUALS( "", a.dottedField(6) ); } + TEST(DottedSubstring, Short) { + FieldRef path("a"); + ASSERT_EQUALS(1u, path.numParts()); + ASSERT_EQUALS("a", path.dottedSubstring(0, path.numParts())); + ASSERT_EQUALS("", path.dottedSubstring(1, path.numParts())); + ASSERT_EQUALS("", path.dottedSubstring(0, 0)); + } + + TEST(DottedSubstring, Empty) { + FieldRef path(""); + ASSERT_EQUALS(0u, path.numParts()); + ASSERT_EQUALS("", path.dottedSubstring(0, path.numParts())); + ASSERT_EQUALS("", path.dottedSubstring(1, path.numParts())); + ASSERT_EQUALS("", path.dottedSubstring(0, 0)); + } + + TEST(DottedSubstring, Nested) { + FieldRef path("a.b.c.d.e"); + ASSERT_EQUALS(5u, path.numParts()); + + ASSERT_EQUALS("b.c.d.e", path.dottedSubstring(1, path.numParts())); + ASSERT_EQUALS("c.d.e", path.dottedSubstring(2, path.numParts())); + ASSERT_EQUALS("d.e", path.dottedSubstring(3, path.numParts())); + ASSERT_EQUALS("e", path.dottedSubstring(4, path.numParts())); + ASSERT_EQUALS("", path.dottedSubstring(5, path.numParts())); + ASSERT_EQUALS("", path.dottedSubstring(6, path.numParts())); + + ASSERT_EQUALS("a.b.c.d.e", path.dottedSubstring(0, path.numParts())); + ASSERT_EQUALS("a.b.c.d", path.dottedSubstring(0, path.numParts() - 1)); + ASSERT_EQUALS("a.b.c", path.dottedSubstring(0, path.numParts() - 2)); + ASSERT_EQUALS("a.b", path.dottedSubstring(0, path.numParts() - 3)); + ASSERT_EQUALS("a", path.dottedSubstring(0, path.numParts() - 4)); + ASSERT_EQUALS("", path.dottedSubstring(0, path.numParts() - 5)); + ASSERT_EQUALS("", path.dottedSubstring(0, path.numParts() - 6)); + + ASSERT_EQUALS("b.c.d", path.dottedSubstring(1, path.numParts() - 1)); + ASSERT_EQUALS("b.c", path.dottedSubstring(1, path.numParts() - 2)); + ASSERT_EQUALS("b", path.dottedSubstring(1, path.numParts() - 3)); + ASSERT_EQUALS("", path.dottedSubstring(1, path.numParts() - 4)); + ASSERT_EQUALS("", path.dottedSubstring(1, path.numParts() - 5)); + } + } // namespace diff --git a/src/mongo/db/keypattern.cpp b/src/mongo/db/keypattern.cpp index e0476f94358..b62885b97d1 100644 --- a/src/mongo/db/keypattern.cpp +++ b/src/mongo/db/keypattern.cpp @@ -30,7 +30,6 @@ #include "mongo/db/keypattern.h" -#include "mongo/db/hasher.h" #include "mongo/db/index_names.h" #include "mongo/util/mongoutils/str.h" @@ -49,64 +48,12 @@ namespace mongo { && i.next().eoo(); } - BSONObj KeyPattern::extractShardKeyFromQuery(const BSONObj& query) const { - - if (_pattern.isEmpty()) - return BSONObj(); - - if (mongoutils::str::equals(_pattern.firstElement().valuestrsafe(), "hashed")) { - BSONElement fieldVal = query.getFieldDotted(_pattern.firstElementFieldName()); - return BSON(_pattern.firstElementFieldName() << - BSONElementHasher::hash64(fieldVal , BSONElementHasher::DEFAULT_HASH_SEED)); - } - - return query.extractFields(_pattern); - } - bool KeyPattern::isOrderedKeyPattern(const BSONObj& pattern) { return IndexNames::BTREE == IndexNames::findPluginName(pattern); } - BSONObj KeyPattern::extractShardKeyFromDoc(const BSONObj& doc) const { - BSONMatchableDocument matchable(doc); - return extractShardKeyFromMatchable(matchable); - } - - BSONObj KeyPattern::extractShardKeyFromMatchable(const MatchableDocument& matchable) const { - - if ( _pattern.isEmpty() ) - return BSONObj(); - - BSONObjBuilder keyBuilder; - - BSONObjIterator patternIt(_pattern); - while (patternIt.more()) { - - BSONElement patternEl = patternIt.next(); - ElementPath path; - path.init(patternEl.fieldName()); - - MatchableDocument::IteratorHolder matchIt(&matchable, &path); - if (!matchIt->more()) - return BSONObj(); - BSONElement matchEl = matchIt->next().element(); - // We sometimes get eoo(), apparently - if (matchEl.eoo() || matchIt->more()) - return BSONObj(); - - if (mongoutils::str::equals(patternEl.valuestrsafe(), "hashed")) { - keyBuilder.append(patternEl.fieldName(), - BSONElementHasher::hash64(matchEl, - BSONElementHasher::DEFAULT_HASH_SEED)); - } - else { - // NOTE: The matched element may *not* have the same field name as the path - - // index keys don't contain field names, for example - keyBuilder.appendAs(matchEl, patternEl.fieldName()); - } - } - - return keyBuilder.obj(); + bool KeyPattern::isHashedKeyPattern(const BSONObj& pattern) { + return IndexNames::HASHED == IndexNames::findPluginName(pattern); } BSONObj KeyPattern::extendRangeBound( const BSONObj& bound , bool makeUpperInclusive ) const { @@ -144,95 +91,12 @@ namespace mongo { return newBound.obj(); } - BoundList KeyPattern::flattenBounds( const BSONObj& keyPattern, const IndexBounds& indexBounds ) { - invariant(indexBounds.fields.size() == (size_t)keyPattern.nFields()); - - // If any field is unsatisfied, return empty bound list. - for (vector<OrderedIntervalList>::const_iterator it = indexBounds.fields.begin(); - it != indexBounds.fields.end(); it++) { - if (it->intervals.size() == 0) { - return BoundList(); - } - } - // To construct our bounds we will generate intervals based on bounds for - // the first field, then compound intervals based on constraints for the first - // 2 fields, then compound intervals for the first 3 fields, etc. - // As we loop through the fields, we start generating new intervals that will later - // get extended in another iteration of the loop. We define these partially constructed - // intervals using pairs of BSONObjBuilders (shared_ptrs, since after one iteration of the - // loop they still must exist outside their scope). - typedef vector< pair< shared_ptr<BSONObjBuilder> , - shared_ptr<BSONObjBuilder> > > BoundBuilders; - BoundBuilders builders; - builders.push_back( make_pair( shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ), - shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ) ) ); - BSONObjIterator keyIter( keyPattern ); - // until equalityOnly is false, we are just dealing with equality (no range or $in queries). - bool equalityOnly = true; - - for (size_t i = 0; i < indexBounds.fields.size(); i++) { - BSONElement e = keyIter.next(); - - StringData fieldName = e.fieldNameStringData(); - - // get the relevant intervals for this field, but we may have to transform the - // list of what's relevant according to the expression for this field - const OrderedIntervalList& oil = indexBounds.fields[i]; - const vector<Interval>& intervals = oil.intervals; - - if ( equalityOnly ) { - if ( intervals.size() == 1 && intervals.front().isPoint() ){ - // this field is only a single point-interval - BoundBuilders::const_iterator j; - for( j = builders.begin(); j != builders.end(); ++j ) { - j->first->appendAs( intervals.front().start, fieldName ); - j->second->appendAs( intervals.front().end, fieldName ); - } - } - else { - // This clause is the first to generate more than a single point. - // We only execute this clause once. After that, we simplify the bound - // extensions to prevent combinatorial explosion. - equalityOnly = false; - - BoundBuilders newBuilders; - - for(BoundBuilders::const_iterator it = builders.begin(); it != builders.end(); ++it ) { - BSONObj first = it->first->obj(); - BSONObj second = it->second->obj(); + BSONObj KeyPattern::globalMin() const { + return extendRangeBound(BSONObj(), false); + } - for ( vector<Interval>::const_iterator interval = intervals.begin(); - interval != intervals.end(); ++interval ) - { - uassert( 17439, - "combinatorial limit of $in partitioning of results exceeded" , - newBuilders.size() < MAX_IN_COMBINATIONS ); - newBuilders.push_back( - make_pair( shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ), - shared_ptr<BSONObjBuilder>( new BSONObjBuilder()))); - newBuilders.back().first->appendElements( first ); - newBuilders.back().second->appendElements( second ); - newBuilders.back().first->appendAs( interval->start, fieldName ); - newBuilders.back().second->appendAs( interval->end, fieldName ); - } - } - builders = newBuilders; - } - } - else { - // if we've already generated a range or multiple point-intervals - // just extend what we've generated with min/max bounds for this field - BoundBuilders::const_iterator j; - for( j = builders.begin(); j != builders.end(); ++j ) { - j->first->appendAs( intervals.front().start, fieldName ); - j->second->appendAs( intervals.back().end, fieldName ); - } - } - } - BoundList ret; - for( BoundBuilders::const_iterator i = builders.begin(); i != builders.end(); ++i ) - ret.push_back( make_pair( i->first->obj(), i->second->obj() ) ); - return ret; + BSONObj KeyPattern::globalMax() const { + return extendRangeBound(BSONObj(), true); } } // namespace mongo diff --git a/src/mongo/db/keypattern.h b/src/mongo/db/keypattern.h index 77e30b883d2..78e1f3d59bf 100644 --- a/src/mongo/db/keypattern.h +++ b/src/mongo/db/keypattern.h @@ -1,5 +1,3 @@ -// @file keypattern.h - Utilities for manipulating index/shard key patterns. - /** * Copyright (C) 2012 10gen Inc. * @@ -32,27 +30,18 @@ #include "mongo/base/string_data.h" #include "mongo/db/jsobj.h" -#include "mongo/platform/unordered_set.h" -#include "mongo/util/mongoutils/str.h" -#include "mongo/db/query/index_bounds.h" -#include "mongo/db/matcher/matchable.h" namespace mongo { /** - * A BoundList contains intervals specified by inclusive start - * and end bounds. The intervals should be nonoverlapping and occur in - * the specified direction of traversal. For example, given a simple index {i:1} - * and direction +1, one valid BoundList is: (1, 2); (4, 6). The same BoundList - * would be valid for index {i:-1} with direction -1. - */ - typedef std::vector<std::pair<BSONObj,BSONObj> > BoundList; - - /** A KeyPattern is an expression describing a transformation of a document into a - * document key. Document keys are used to store documents in indices and to target - * sharded queries. + * A KeyPattern is an expression describing a transformation of a document into a + * document key. Document keys are used to store documents in indices and to target + * sharded queries. * - * Examples: + * The root field names of KeyPatterns are always (potentially-dotted) paths, and the values of + * the fields describe the type of indexing over the found elements. + * + * Examples: * { a : 1 } * { a : 1 , b : -1 } * { a : "hashed" } @@ -60,19 +49,6 @@ namespace mongo { class KeyPattern { public: - //maximum number of intervals produced by $in queries. - static const unsigned MAX_IN_COMBINATIONS = 4000000; - - /* - * We are allowing implicit conversion from BSON - */ - KeyPattern( const BSONObj& pattern ); - - /* - * Returns a BSON representation of this KeyPattern. - */ - BSONObj toBSON() const { return _pattern; } - /** * Is the provided key pattern the index over the ID field? * The always required ID index is always {_id: 1} or {_id: -1}. @@ -84,6 +60,27 @@ namespace mongo { */ static bool isOrderedKeyPattern(const BSONObj& pattern); + /** + * Does the provided key pattern hash its keys? + */ + static bool isHashedKeyPattern(const BSONObj& pattern); + + /** + * Constructs a new key pattern based on a BSON document + */ + KeyPattern(const BSONObj& pattern); + + /** + * Returns a BSON representation of this KeyPattern. + */ + const BSONObj& toBSON() const { return _pattern; } + + /** + * Returns a string representation of this KeyPattern + */ + std::string toString() const{ return toBSON().toString(); } + + /* Takes a BSONObj whose field names are a prefix of the fields in this keyPattern, and * outputs a new bound with MinKey values appended to match the fields in this keyPattern * (or MaxKey values for descending -1 fields). This is useful in sharding for @@ -109,66 +106,9 @@ namespace mongo { */ BSONObj extendRangeBound( const BSONObj& bound , bool makeUpperInclusive ) const; - std::string toString() const{ return toBSON().toString(); } - - /** - * Given a document, extracts the shard key corresponding to the key pattern. - * Warning: assumes that there is a *single* key to be extracted! - * - * Examples: - * If 'this' KeyPattern is { a : 1 } - * { a: "hi" , b : 4} --> returns { a : "hi" } - * { c : 4 , a : 2 } --> returns { a : 2 } - * { b : 2 } (bad input, don't call with this) - * { a : [1,2] } (bad input, don't call with this) - * If 'this' KeyPattern is { a : "hashed" } - * { a: 1 } --> returns { a : NumberLong("5902408780260971510") } - * If 'this' KeyPattern is { 'a.b' : 1 } - * { a : { b : "hi" } } --> returns { a : "hi" } - */ - BSONObj extractShardKeyFromDoc(const BSONObj& doc) const; + BSONObj globalMin() const; - /** - * Given a MatchableDocument, extracts the shard key corresponding to the key pattern. - * See above. - */ - BSONObj extractShardKeyFromMatchable(const MatchableDocument& matchable) const; - - /** - * Given a query expression, extracts the shard key corresponding to the key pattern. - * - * NOTE: This generally is similar to the above, however "a.b" fields in the query (which - * are invalid document fields) may match "a.b" fields in the shard key pattern. - * - * Examples: - * If the key pattern is { a : 1 } - * { a : "hi", b : 4 } --> returns { a : "hi" } - * If the key pattern is { 'a.b' : 1 } - * { a : { b : "hi" } } --> returns { 'a.b' : "hi" } - * { 'a.b' : "hi" } --> returns { 'a.b' : "hi" } - */ - BSONObj extractShardKeyFromQuery(const BSONObj& query) const; - - /** - * Return an ordered list of bounds generated using this KeyPattern and the - * bounds from the IndexBounds. This function is used in sharding to - * determine where to route queries according to the shard key pattern. - * - * Examples: - * - * Key { a: 1 }, Bounds a: [0] => { a: 0 } -> { a: 0 } - * Key { a: 1 }, Bounds a: [2, 3) => { a: 2 } -> { a: 3 } // bound inclusion ignored. - * - * The bounds returned by this function may be a superset of those defined - * by the constraints. For instance, if this KeyPattern is {a : 1, b: 1} - * Bounds: { a : {$in : [1,2]} , b : {$in : [3,4,5]} } - * => {a : 1 , b : 3} -> {a : 1 , b : 5}, {a : 2 , b : 3} -> {a : 2 , b : 5} - * - * If the IndexBounds are not defined for all the fields in this keypattern, which - * means some fields are unsatisfied, an empty BoundList could return. - * - */ - static BoundList flattenBounds( const BSONObj& keyPattern, const IndexBounds& indexBounds ); + BSONObj globalMax() const; private: BSONObj _pattern; diff --git a/src/mongo/db/keypattern_test.cpp b/src/mongo/db/keypattern_test.cpp new file mode 100644 index 00000000000..63b9b325d75 --- /dev/null +++ b/src/mongo/db/keypattern_test.cpp @@ -0,0 +1,142 @@ +/* Copyright 2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/db/keypattern.h" + +#include "mongo/unittest/unittest.h" + +namespace { + + using namespace mongo; + + TEST(KeyPattern, ExtendRangeBound) { + + BSONObj bound = BSON("a" << 55); + BSONObj longBound = BSON("a" << 55 << "b" << 66); + + //test keyPattern shorter than bound, should fail + { + KeyPattern keyPat(BSON("a" << 1)); + ASSERT_THROWS(keyPat.extendRangeBound(longBound, false), MsgAssertionException); + } + + //test keyPattern doesn't match bound, should fail + { + KeyPattern keyPat(BSON("b" << 1)); + ASSERT_THROWS(keyPat.extendRangeBound(bound, false), MsgAssertionException); + } + { + KeyPattern keyPat(BSON("a" << 1 << "c" << 1)); + ASSERT_THROWS(keyPat.extendRangeBound(longBound, false), MsgAssertionException); + } + + //test keyPattern same as bound + { + KeyPattern keyPat(BSON("a" << 1)); + BSONObj newB = keyPat.extendRangeBound(bound, false); + ASSERT_EQUALS(newB, BSON("a" << 55)); + } + { + KeyPattern keyPat(BSON("a" << 1)); + BSONObj newB = keyPat.extendRangeBound(bound, false); + ASSERT_EQUALS(newB, BSON("a" << 55)); + } + + //test keyPattern longer than bound, simple + { + KeyPattern keyPat(BSON("a" << 1 << "b" << 1)); + BSONObj newB = keyPat.extendRangeBound(bound, false); + ASSERT_EQUALS(newB, BSON("a" << 55 << "b" << MINKEY)); + } + { + KeyPattern keyPat(BSON("a" << 1 << "b" << 1)); + BSONObj newB = keyPat.extendRangeBound(bound, true); + ASSERT_EQUALS(newB, BSON("a" << 55 << "b" << MAXKEY)); + } + + //test keyPattern longer than bound, more complex pattern directions + { + KeyPattern keyPat(BSON("a" << 1 << "b" << -1)); + BSONObj newB = keyPat.extendRangeBound(bound, false); + ASSERT_EQUALS(newB, BSON("a" << 55 << "b" << MAXKEY)); + } + { + KeyPattern keyPat(BSON("a" << 1 << "b" << -1)); + BSONObj newB = keyPat.extendRangeBound(bound, true); + ASSERT_EQUALS(newB, BSON("a" << 55 << "b" << MINKEY)); + } + { + + KeyPattern keyPat(BSON("a" << 1 << "b" << -1 << "c" << 1)); + BSONObj newB = keyPat.extendRangeBound(bound, false); + ASSERT_EQUALS(newB, BSON("a" << 55 << "b" << MAXKEY << "c" << MINKEY)); + } + { + KeyPattern keyPat(BSON("a" << 1 << "b" << -1 << "c" << 1)); + BSONObj newB = keyPat.extendRangeBound(bound, true); + ASSERT_EQUALS(newB, BSON("a" << 55 << "b" << MINKEY << "c" << MAXKEY)); + } + } + + TEST(KeyPattern, GlobalMinMax) { + + // + // Simple KeyPatterns + // + + ASSERT_EQUALS(KeyPattern(BSON("a" << 1)).globalMin(), BSON("a" << MINKEY)); + ASSERT_EQUALS(KeyPattern(BSON("a" << 1)).globalMax(), BSON("a" << MAXKEY)); + + ASSERT_EQUALS(KeyPattern(BSON("a" << -1)).globalMin(), BSON("a" << MAXKEY)); + ASSERT_EQUALS(KeyPattern(BSON("a" << -1)).globalMax(), BSON("a" << MINKEY)); + + ASSERT_EQUALS(KeyPattern(BSON("a" << 1 << "b" << 1.0)).globalMin(), + BSON("a" << MINKEY << "b" << MINKEY)); + ASSERT_EQUALS(KeyPattern(BSON("a" << 1 << "b" << 1.0)).globalMax(), + BSON("a" << MAXKEY << "b" << MAXKEY)); + + ASSERT_EQUALS(KeyPattern(BSON("a" << 1 << "b" << -1.0f)).globalMin(), + BSON("a" << MINKEY << "b" << MAXKEY)); + ASSERT_EQUALS(KeyPattern(BSON("a" << 1 << "b" << -1.0f)).globalMax(), + BSON("a" << MAXKEY << "b" << MINKEY)); + + ASSERT_EQUALS(KeyPattern(BSON("a" << "hashed")).globalMin(), BSON("a" << MINKEY)); + ASSERT_EQUALS(KeyPattern(BSON("a" << "hashed")).globalMax(), BSON("a" << MAXKEY)); + + // + // Nested KeyPatterns + // + + ASSERT_EQUALS(KeyPattern(BSON("a.b" << 1)).globalMin(), BSON("a.b" << MINKEY)); + ASSERT_EQUALS(KeyPattern(BSON("a.b" << 1)).globalMax(), BSON("a.b" << MAXKEY)); + + ASSERT_EQUALS(KeyPattern(BSON("a.b.c" << -1)).globalMin(), BSON("a.b.c" << MAXKEY)); + ASSERT_EQUALS(KeyPattern(BSON("a.b.c" << -1)).globalMax(), BSON("a.b.c" << MINKEY)); + } + +} + diff --git a/src/mongo/db/matcher/path.cpp b/src/mongo/db/matcher/path.cpp index 048a9ae5700..ce9c9fcbeec 100644 --- a/src/mongo/db/matcher/path.cpp +++ b/src/mongo/db/matcher/path.cpp @@ -39,6 +39,7 @@ namespace mongo { Status ElementPath::init( const StringData& path ) { + _shouldTraverseNonleafArrays = true; _shouldTraverseLeafArray = true; _fieldRef.parse( path ); return Status::OK(); @@ -164,6 +165,11 @@ namespace mongo { _subCursorPath.reset( new ElementPath() ); _subCursorPath->init( _arrayIterationState.restOfPath.substr( _arrayIterationState.nextPieceOfPath.size() + 1 ) ); _subCursorPath->setTraverseLeafArray( _path->shouldTraverseLeafArray() ); + + // If we're here, we must be able to traverse nonleaf arrays + dassert(_path->shouldTraverseNonleafArrays()); + dassert(_subCursorPath->shouldTraverseNonleafArrays()); + _subCursor.reset( new BSONElementIterator( _subCursorPath.get(), _arrayIterationState._current.Obj() ) ); _arrayIterationState._current = BSONElement(); return more(); @@ -192,8 +198,14 @@ namespace mongo { _arrayIterationState.reset( _path->fieldRef(), idxPath + 1 ); - if ( !_arrayIterationState.hasMore && !_path->shouldTraverseLeafArray() ) { - _next.reset( e, BSONElement(), true ); + if (_arrayIterationState.hasMore && !_path->shouldTraverseNonleafArrays()) { + // Don't allow traversing the array + _state = DONE; + return false; + } + else if (!_arrayIterationState.hasMore && !_path->shouldTraverseLeafArray()) { + // Return the leaf array + _next.reset(e, BSONElement(), true); _state = DONE; return true; } diff --git a/src/mongo/db/matcher/path.h b/src/mongo/db/matcher/path.h index 8fdf8ed2a11..50c963c97ad 100644 --- a/src/mongo/db/matcher/path.h +++ b/src/mongo/db/matcher/path.h @@ -44,13 +44,16 @@ namespace mongo { public: Status init( const StringData& path ); + void setTraverseNonleafArrays( bool b ) { _shouldTraverseNonleafArrays = b; } void setTraverseLeafArray( bool b ) { _shouldTraverseLeafArray = b; } const FieldRef& fieldRef() const { return _fieldRef; } + bool shouldTraverseNonleafArrays() const { return _shouldTraverseNonleafArrays; } bool shouldTraverseLeafArray() const { return _shouldTraverseLeafArray; } private: FieldRef _fieldRef; + bool _shouldTraverseNonleafArrays; bool _shouldTraverseLeafArray; }; diff --git a/src/mongo/db/ops/SConscript b/src/mongo/db/ops/SConscript index 82f7a214c3c..a56fdcc8441 100644 --- a/src/mongo/db/ops/SConscript +++ b/src/mongo/db/ops/SConscript @@ -44,6 +44,7 @@ env.CppUnitTest( ], LIBDEPS=[ '$BUILD_DIR/mongo/mutable_bson_test_utils', + '$BUILD_DIR/mongo/expressions', 'update_common', ], ) diff --git a/src/mongo/db/ops/path_support.cpp b/src/mongo/db/ops/path_support.cpp index d46ad1aa977..fa72a4aceae 100644 --- a/src/mongo/db/ops/path_support.cpp +++ b/src/mongo/db/ops/path_support.cpp @@ -38,6 +38,8 @@ namespace mongo { namespace pathsupport { + using mongoutils::str::stream; + namespace { bool isNumeric(const StringData& str, size_t* num) { @@ -260,5 +262,199 @@ namespace pathsupport { return Status::OK(); } + Status setElementAtPath(const FieldRef& path, + const BSONElement& value, + mutablebson::Document* doc) { + + size_t deepestElemPathPart; + mutablebson::Element deepestElem(doc->end()); + + // Get the existing parents of this path + Status status = findLongestPrefix(path, + doc->root(), + &deepestElemPathPart, + &deepestElem); + + // TODO: All this is pretty awkward, why not return the position immediately after the + // consumed path or use a signed sentinel? Why is it a special case when we've consumed the + // whole path? + + if (!status.isOK() && status.code() != ErrorCodes::NonExistentPath) + return status; + + // Inc the path by one *unless* we matched nothing + if (status.code() != ErrorCodes::NonExistentPath) { + ++deepestElemPathPart; + } + else { + deepestElemPathPart = 0; + deepestElem = doc->root(); + } + + if (deepestElemPathPart == path.numParts()) { + // The full path exists already in the document, so just set a value + return deepestElem.setValueBSONElement(value); + } + else { + // Construct the rest of the path we need with empty documents and set the value + StringData leafFieldName = path.getPart(path.numParts() - 1); + mutablebson::Element leafElem = doc->makeElementWithNewFieldName(leafFieldName, + value); + dassert(leafElem.ok()); + return createPathAt(path, deepestElemPathPart, deepestElem, leafElem); + } + } + + const BSONElement& findParentEqualityElement(const EqualityMatches& equalities, + const FieldRef& path, + int* parentPathParts) { + + // We may have an equality match to an object at a higher point in the pattern path, check + // all path prefixes for equality matches + // ex: path: 'a.b', query : { 'a' : { b : <value> } } + // ex: path: 'a.b.c', query : { 'a.b' : { c : <value> } } + for (int i = static_cast<int>(path.numParts()); i >= 0; --i) { + + // "" element is *not* a parent of anyone but itself + if (i == 0 && path.numParts() != 0) + continue; + + StringData subPathStr = path.dottedSubstring(0, i); + EqualityMatches::const_iterator seenIt = equalities.find(subPathStr); + if (seenIt == equalities.end()) + continue; + + *parentPathParts = i; + return seenIt->second->getData(); + } + + *parentPathParts = -1; + static const BSONElement eooElement; + return eooElement; + } + + /** + * Helper function to check if the current equality match paths conflict with a new path. + */ + static Status checkEqualityConflicts(const EqualityMatches& equalities, const FieldRef& path) { + + int parentPathPart = -1; + const BSONElement& parentEl = findParentEqualityElement(equalities, + path, + &parentPathPart); + + if (parentEl.eoo()) + return Status::OK(); + + string errMsg = "cannot infer query fields to set, "; + + StringData pathStr = path.dottedField(); + StringData prefixStr = path.dottedSubstring(0, parentPathPart); + StringData suffixStr = path.dottedSubstring(parentPathPart, path.numParts()); + + if (suffixStr.size() != 0) + errMsg += stream() << "both paths '" << pathStr << "' and '" << prefixStr + << "' are matched"; + else + errMsg += stream() << "path '" << pathStr << "' is matched twice"; + + return Status(ErrorCodes::NotSingleValueField, errMsg); + } + + /** + * Helper function to check if path conflicts are all prefixes. + */ + static Status checkPathIsPrefixOf(const FieldRef& path, const FieldRefSet& conflictPaths) { + + for (FieldRefSet::const_iterator it = conflictPaths.begin(); it != conflictPaths.end(); + ++it) { + + const FieldRef* conflictingPath = *it; + // Conflicts are always prefixes (or equal to) the path, or vice versa + if (path.numParts() > conflictingPath->numParts()) { + + string errMsg = stream() << "field at '" << conflictingPath->dottedField() + << "' must be exactly specified, field at sub-path '" + << path.dottedField() << "'found"; + return Status(ErrorCodes::NotExactValueField, errMsg); + } + } + + return Status::OK(); + } + + static Status _extractFullEqualityMatches(const MatchExpression& root, + const FieldRefSet* fullPathsToExtract, + EqualityMatches* equalities) { + + if (root.matchType() == MatchExpression::EQ) { + + // Extract equality matches + const EqualityMatchExpression& eqChild = + static_cast<const EqualityMatchExpression&>(root); + + FieldRef path(eqChild.path()); + + if (fullPathsToExtract) { + + FieldRefSet conflictPaths; + fullPathsToExtract->findConflicts(&path, &conflictPaths); + + // Ignore if this path is unrelated to the full paths + if (conflictPaths.empty()) + return Status::OK(); + + // Make sure we're a prefix of all the conflict paths + Status status = checkPathIsPrefixOf(path, conflictPaths); + if (!status.isOK()) + return status; + } + + Status status = checkEqualityConflicts(*equalities, path); + if (!status.isOK()) + return status; + + equalities->insert(make_pair(eqChild.path(), &eqChild)); + } + else if (root.matchType() == MatchExpression::AND) { + + // Further explore $and matches + for (size_t i = 0; i < root.numChildren(); ++i) { + MatchExpression* child = root.getChild(i); + Status status = _extractFullEqualityMatches(*child, fullPathsToExtract, equalities); + if (!status.isOK()) + return status; + } + } + + return Status::OK(); + } + + Status extractFullEqualityMatches(const MatchExpression& root, + const FieldRefSet& fullPathsToExtract, + EqualityMatches* equalities) { + return _extractFullEqualityMatches(root, &fullPathsToExtract, equalities); + } + + Status extractEqualityMatches(const MatchExpression& root, EqualityMatches* equalities) { + return _extractFullEqualityMatches(root, NULL, equalities); + } + + Status addEqualitiesToDoc(const EqualityMatches& equalities, mutablebson::Document* doc) { + + for (EqualityMatches::const_iterator it = equalities.begin(); it != equalities.end(); + ++it) { + + FieldRef path(it->first); + const BSONElement& data = it->second->getData(); + + Status status = setElementAtPath(path, data, doc); + if (!status.isOK()) + return status; + } + + return Status::OK(); + } + } // namespace pathsupport } // namespace mongo diff --git a/src/mongo/db/ops/path_support.h b/src/mongo/db/ops/path_support.h index 2641b39f658..ce9d2bcf595 100644 --- a/src/mongo/db/ops/path_support.h +++ b/src/mongo/db/ops/path_support.h @@ -33,6 +33,9 @@ #include "mongo/base/status.h" #include "mongo/bson/mutable/element.h" #include "mongo/db/field_ref.h" +#include "mongo/db/field_ref_set.h" +#include "mongo/db/matcher/expression.h" +#include "mongo/db/matcher/expression_leaf.h" #include "mongo/platform/cstdint.h" namespace mongo { @@ -43,6 +46,9 @@ namespace mongo { // doesn't exist. static const size_t kMaxPaddingAllowed = 1500000; + // Convenience type to hold equality matches at particular paths from a MatchExpression + typedef map<StringData, const EqualityMatchExpression*> EqualityMatches; + /** * Finds the longest portion of 'prefix' that exists in document rooted at 'root' and is * "viable." A viable path is one that, if fully created on a given doc, would not @@ -93,6 +99,93 @@ namespace mongo { mutablebson::Element elemFound, mutablebson::Element newElem); + /** + * Uses the above methods to set the given value at the specified path in a mutable + * Document, creating parents of the path if necessary. + * + * Returns PathNotViable if the path cannot be created without modifying the type of another + * element, see above. + */ + Status setElementAtPath(const FieldRef& path, + const BSONElement& value, + mutablebson::Document* doc); + + /** + * Finds and returns-by-path all the equality matches in a particular MatchExpression. + * + * This method is meant to be used with the methods below, which allow efficient use of the + * equality matches without needing to serialize to a BSONObj. + * + * Returns NotSingleValueField if the match expression has equality operators for + * conflicting paths - equality paths conflict if they are the same or one path is a prefix + * of the other. + * + * Ex: + * { a : 1, b : 1 } -> no conflict + * { a : 1, a.b : 1 } -> conflict + * { _id : { x : 1 }, _id.y : 1 } -> conflict + * { a : 1, a : 1 } -> conflict + */ + Status extractEqualityMatches(const MatchExpression& root, EqualityMatches* equalities); + + /** + * Same as the above, but ignores all paths except for paths in a specified set. + * Equality matches with paths completely distinct from these paths are ignored. + * + * For a full equality match, the path of an equality found must not be a suffix of one of + * the specified path - otherwise it isn't clear how to construct a full value for a field + * at that path. + * + * Generally this is useful for shard keys and _ids which need unambiguous extraction from + * queries. + * + * Ex: + * { a : 1 }, full path 'a' -> a $eq 1 extracted + * { a : 1 }, full path 'a.b' -> a $eq 1 extracted + * { 'a.b' : 1 }, full path 'a' -> NotExactValueField error + * ('a.b' doesn't specify 'a' fully) + * { 'a.b' : 1 }, full path 'a.b' -> 'a.b' $eq 1 extracted + * { '_id' : 1 }, full path '_id' -> '_id' $eq 1 extracted + * { '_id.x' : 1 }, full path '_id' -> NotExactValueFieldError + */ + Status extractFullEqualityMatches(const MatchExpression& root, + const FieldRefSet& fullPathsToExtract, + EqualityMatches* equalities); + + /** + * Returns the equality match which is at or a parent of the specified path string. The + * path string must be a valid dotted path. + * + * If a parent equality is found, returns the BSONElement data from that equality (which + * includes the BSON value), the path of the parent element (prefixStr), and the remainder + * of the path (which may be empty). + * + * EOO() is returned if there were no equalities at any point along the path. + * + * Ex: + * Given equality matches of: + * 'a.b' : 1, 'c' : 2 + * Path 'a' has no equality match parent (EOO) + * Path 'c' has an eqmatch parent of 'c' : 2 + * Path 'c.d' has an eqmatch parent of 'c' : 2 + * Path 'a.b' has an eqmatch parent of 'a.b' : 1 + * Path 'a.b.c' has an eqmatch parent of 'a.b' : 1 + * + */ + const BSONElement& findParentEqualityElement(const EqualityMatches& equalities, + const FieldRef& path, + int* parentPathParts); + + /** + * Adds the BSON values from equality matches into the given document at the equality match + * paths. + * + * Returns PathNotViable similar to setElementAtPath above. If equality paths do not + * conflict, as is enforced by extractEqualityMatches, this function should return OK. + */ + Status addEqualitiesToDoc(const EqualityMatches& equalities, + mutablebson::Document* doc); + } // namespace pathsupport } // namespace mongo diff --git a/src/mongo/db/ops/path_support_test.cpp b/src/mongo/db/ops/path_support_test.cpp index 34113bc902b..686d4b46094 100644 --- a/src/mongo/db/ops/path_support_test.cpp +++ b/src/mongo/db/ops/path_support_test.cpp @@ -31,6 +31,7 @@ #include <string> #include "mongo/base/error_codes.h" +#include "mongo/base/owned_pointer_vector.h" #include "mongo/base/status.h" #include "mongo/base/string_data.h" #include "mongo/bson/mutable/algorithm.h" @@ -39,27 +40,18 @@ #include "mongo/db/field_ref.h" #include "mongo/db/jsobj.h" #include "mongo/db/json.h" +#include "mongo/db/matcher/expression.h" +#include "mongo/db/matcher/expression_leaf.h" +#include "mongo/db/matcher/expression_parser.h" #include "mongo/platform/cstdint.h" #include "mongo/unittest/unittest.h" #include "mongo/util/mongoutils/str.h" namespace { - using mongo::BSONObj; - using mongo::ErrorCodes; - using mongo::FieldRef; - using mongo::fromjson; - using mongo::jstNULL; - using mongo::NumberInt; - using mongo::Object; - using mongo::pathsupport::findLongestPrefix; - using mongo::pathsupport::createPathAt; - using mongo::Status; - using mongo::StringData; - using mongo::mutablebson::countChildren; - using mongo::mutablebson::getNthChild; - using mongo::mutablebson::Document; - using mongo::mutablebson::Element; + using namespace mongo; + using namespace mutablebson; + using namespace pathsupport; using mongoutils::str::stream; using std::string; @@ -459,4 +451,431 @@ namespace { ASSERT_EQUALS(elemFound.compareWithElement(root()["b"]), 0); } + // + // Tests of equality extraction from MatchExpressions + // NONGOAL: Testing query/match expression parsing and optimization + // + + static MatchExpression* makeExpr(const BSONObj& exprBSON) { + static const WhereCallbackNoop callbackNoop; + return MatchExpressionParser::parse(exprBSON, callbackNoop).getValue(); + } + + static void assertContains(const EqualityMatches& equalities, const BSONObj& wrapped) { + + BSONElement value = wrapped.firstElement(); + StringData path = value.fieldNameStringData(); + + EqualityMatches::const_iterator it = equalities.find(path); + if (it == equalities.end()) { + FAIL(stream() << "Equality matches did not contain path \"" << path << "\""); + } + if (!it->second->getData().valuesEqual(value)) { + FAIL(stream() << "Equality match at path \"" << path << "\" contains value " + << it->second->getData() << ", not value " << value); + } + } + + static void assertContains(const EqualityMatches& equalities, + const StringData& path, + int value) { + assertContains(equalities, BSON(path << value)); + } + + // NOTE: For tests below, BSONObj expr must exist for lifetime of MatchExpression + + TEST(ExtractEqualities, Basic) { + BSONObj exprBSON = fromjson("{a:1}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + EqualityMatches equalities; + ASSERT_OK(extractEqualityMatches(*expr, &equalities)); + ASSERT_EQUALS(equalities.size(), 1u); + assertContains(equalities, "a", 1); + } + + TEST(ExtractEqualities, Multiple) { + BSONObj exprBSON = fromjson("{a:1, b:2}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + EqualityMatches equalities; + ASSERT_OK(extractEqualityMatches(*expr, &equalities)); + ASSERT_EQUALS(equalities.size(), 2u); + assertContains(equalities, "a", 1); + assertContains(equalities, "b", 2); + } + + TEST(ExtractEqualities, EqOperator) { + BSONObj exprBSON = fromjson("{a:{$eq:1}}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + EqualityMatches equalities; + ASSERT_OK(extractEqualityMatches(*expr, &equalities)); + ASSERT_EQUALS(equalities.size(), 1u); + assertContains(equalities, "a", 1); + } + + TEST(ExtractEqualities, AndOperator) { + BSONObj exprBSON = fromjson("{$and:[{a:{$eq:1}},{b:2}]}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + EqualityMatches equalities; + ASSERT_OK(extractEqualityMatches(*expr, &equalities)); + ASSERT_EQUALS(equalities.size(), 2u); + assertContains(equalities, "a", 1); + assertContains(equalities, "b", 2); + } + + TEST(ExtractEqualities, NestedAndOperator) { + BSONObj exprBSON = fromjson("{$and:[{$and:[{a:{$eq:1}},{b:2}]},{c:3}]}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + EqualityMatches equalities; + ASSERT_OK(extractEqualityMatches(*expr, &equalities)); + ASSERT_EQUALS(equalities.size(), 3u); + assertContains(equalities, "a", 1); + assertContains(equalities, "b", 2); + assertContains(equalities, "c", 3); + } + + TEST(ExtractEqualities, NestedPaths) { + BSONObj exprBSON = fromjson("{'a.a':1}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + EqualityMatches equalities; + ASSERT_OK(extractEqualityMatches(*expr, &equalities)); + ASSERT_EQUALS(equalities.size(), 1u); + assertContains(equalities, "a.a", 1); + } + + TEST(ExtractEqualities, SiblingPaths) { + BSONObj exprBSON = fromjson("{'a.a':1,'a.b':{$eq:2}}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + EqualityMatches equalities; + ASSERT_OK(extractEqualityMatches(*expr, &equalities)); + ASSERT_EQUALS(equalities.size(), 2u); + assertContains(equalities, "a.a", 1); + assertContains(equalities, "a.b", 2); + } + + TEST(ExtractEqualities, NestedAndNestedPaths) { + BSONObj exprBSON = fromjson("{$and:[{$and:[{'a.a':{$eq:1}},{'a.b':2}]},{'c.c.c':3}]}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + EqualityMatches equalities; + ASSERT_OK(extractEqualityMatches(*expr, &equalities)); + ASSERT_EQUALS(equalities.size(), 3u); + assertContains(equalities, "a.a", 1); + assertContains(equalities, "a.b", 2); + assertContains(equalities, "c.c.c", 3); + } + + TEST(ExtractEqualities, IdOnly) { + BSONObj exprBSON = fromjson("{_id:1}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + EqualityMatches equalities; + ASSERT_OK(extractEqualityMatches(*expr, &equalities)); + ASSERT_EQUALS(equalities.size(), 1u); + assertContains(equalities, "_id", 1); + } + + /** + * Helper class to allow easy construction of immutable paths + */ + class ImmutablePaths { + public: + ImmutablePaths() {} + + void addPath(const string& path) { + _ownedPaths.mutableVector().push_back(new FieldRef(path)); + FieldRef const* conflictPath = NULL; + ASSERT(_immutablePathSet.insert(_ownedPaths.vector().back(), &conflictPath)); + } + + const FieldRefSet& getPathSet() { + return _immutablePathSet; + } + + private: + + FieldRefSet _immutablePathSet; + OwnedPointerVector<FieldRef> _ownedPaths; + }; + + TEST(ExtractEqualities, IdOnlyMulti) { + BSONObj exprBSON = fromjson("{_id:{$eq:1},a:1}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + ImmutablePaths immutablePaths; + immutablePaths.addPath("_id"); + + EqualityMatches equalities; + ASSERT_OK(extractFullEqualityMatches(*expr, immutablePaths.getPathSet(), &equalities)); + ASSERT_EQUALS(equalities.size(), 1u); + assertContains(equalities, "_id", 1); + } + + TEST(ExtractEqualities, IdOnlyIgnoreConflict) { + BSONObj exprBSON = fromjson("{_id:1,a:1,'a.b':1}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + ImmutablePaths immutablePaths; + immutablePaths.addPath("_id"); + + EqualityMatches equalities; + ASSERT_OK(extractFullEqualityMatches(*expr, immutablePaths.getPathSet(), &equalities)); + ASSERT_EQUALS(equalities.size(), 1u); + assertContains(equalities, "_id", 1); + } + + TEST(ExtractEqualities, IdOnlyNested) { + BSONObj exprBSON = fromjson("{'_id.a':1,'_id.b':{$eq:2},c:3}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + ImmutablePaths immutablePaths; + immutablePaths.addPath("_id"); + + EqualityMatches equalities; + Status status = extractFullEqualityMatches(*expr, immutablePaths.getPathSet(), &equalities); + ASSERT_EQUALS(status.code(), ErrorCodes::NotExactValueField); + } + + TEST(ExtractEqualities, IdAndOtherImmutable) { + BSONObj exprBSON = fromjson("{_id:1,a:1,b:2}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + ImmutablePaths immutablePaths; + immutablePaths.addPath("_id"); + immutablePaths.addPath("a"); + + EqualityMatches equalities; + ASSERT_OK(extractFullEqualityMatches(*expr, immutablePaths.getPathSet(), &equalities)); + ASSERT_EQUALS(equalities.size(), 2u); + assertContains(equalities, "_id", 1); + assertContains(equalities, "a", 1); + } + + TEST(ExtractEqualities, IdAndNestedImmutable) { + BSONObj exprBSON = fromjson("{_id:1,a:1,'c.d':3}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + ImmutablePaths immutablePaths; + immutablePaths.addPath("_id"); + immutablePaths.addPath("a.b"); + immutablePaths.addPath("c.d"); + + EqualityMatches equalities; + ASSERT_OK(extractFullEqualityMatches(*expr, immutablePaths.getPathSet(), &equalities)); + ASSERT_EQUALS(equalities.size(), 3u); + assertContains(equalities, "_id", 1); + assertContains(equalities, "a", 1); + assertContains(equalities, "c.d", 3); + } + + TEST(ExtractEqualities, NonFullImmutable) { + BSONObj exprBSON = fromjson("{'a.b':1}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + ImmutablePaths immutablePaths; + immutablePaths.addPath("a"); + + EqualityMatches equalities; + Status status = extractFullEqualityMatches(*expr, immutablePaths.getPathSet(), &equalities); + ASSERT_EQUALS(status.code(), ErrorCodes::NotExactValueField); + } + + TEST(ExtractEqualities, Empty) { + BSONObj exprBSON = fromjson("{'':0}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + EqualityMatches equalities; + ASSERT_OK(extractEqualityMatches(*expr, &equalities)); + ASSERT_EQUALS(equalities.size(), 1u); + assertContains(equalities, "", 0); + } + + TEST(ExtractEqualities, EmptyMulti) { + BSONObj exprBSON = fromjson("{'':0,a:{$eq:1}}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + EqualityMatches equalities; + ASSERT_OK(extractEqualityMatches(*expr, &equalities)); + ASSERT_EQUALS(equalities.size(), 2u); + assertContains(equalities, "", 0); + assertContains(equalities, "a", 1); + } + + TEST(ExtractEqualities, EqConflict) { + BSONObj exprBSON = fromjson("{a:1,a:1}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + EqualityMatches equalities; + ASSERT_EQUALS(extractEqualityMatches(*expr, &equalities).code(), + ErrorCodes::NotSingleValueField); + } + + TEST(ExtractEqualities, PrefixConflict) { + BSONObj exprBSON = fromjson("{a:1,'a.b':{$eq:1}}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + EqualityMatches equalities; + ASSERT_EQUALS(extractEqualityMatches(*expr, &equalities).code(), + ErrorCodes::NotSingleValueField); + } + + TEST(ExtractEqualities, AndPrefixConflict) { + BSONObj exprBSON = fromjson("{$and:[{a:1},{'a.b':{$eq:1}}]}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + EqualityMatches equalities; + ASSERT_EQUALS(extractEqualityMatches(*expr, &equalities).code(), + ErrorCodes::NotSingleValueField); + } + + TEST(ExtractEqualities, EmptyConflict) { + BSONObj exprBSON = fromjson("{'':0,'':{$eq:0}}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + + EqualityMatches equalities; + ASSERT_EQUALS(extractEqualityMatches(*expr, &equalities).code(), + ErrorCodes::NotSingleValueField); + } + + // + // Tests for finding parent equality from equalities found in expression + // NONGOALS: Testing complex equality match extraction - tested above + // + + static void assertParent(const EqualityMatches& equalities, + const StringData& pathStr, + const BSONObj& wrapped) { + + FieldRef path(pathStr); + BSONElement value = wrapped.firstElement(); + StringData parentPath = value.fieldNameStringData(); + + int parentPathPart; + BSONElement parentEl = findParentEqualityElement(equalities, path, &parentPathPart); + + if (parentEl.eoo()) { + FAIL(stream() << "Equality matches did not contain parent for \"" << pathStr + << "\""); + } + + StringData foundParentPath = path.dottedSubstring(0, parentPathPart); + if (foundParentPath != parentPath) { + FAIL(stream() << "Equality match parent at path \"" << foundParentPath + << "\" does not match \"" << parentPath << "\""); + } + + if (!parentEl.valuesEqual(value)) { + FAIL(stream() << "Equality match parent for \"" << pathStr << "\" at path \"" + << parentPath << "\" contains value " << parentEl << ", not value " + << value); + } + } + + static void assertParent(const EqualityMatches& equalities, + const StringData& path, + const StringData& parentPath, + int value) { + assertParent(equalities, path, BSON(parentPath << value)); + } + + static void assertNoParent(const EqualityMatches& equalities, const StringData& pathStr) { + + FieldRef path(pathStr); + + int parentPathPart; + BSONElement parentEl = findParentEqualityElement(equalities, path, &parentPathPart); + + if (!parentEl.eoo()) { + StringData foundParentPath = path.dottedSubstring(0, parentPathPart); + FAIL(stream() << "Equality matches contained parent for \"" << pathStr << "\" at \"" + << foundParentPath << "\""); + } + } + + + TEST(FindParentEquality, Basic) { + + BSONObj exprBSON = fromjson("{a:1}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + EqualityMatches equalities; + ASSERT_OK(extractEqualityMatches(*expr, &equalities)); + + assertNoParent(equalities, ""); + assertParent(equalities, "a", "a", 1); + assertParent(equalities, "a.b", "a", 1); + } + + TEST(FindParentEquality, Multi) { + + BSONObj exprBSON = fromjson("{a:1,b:2}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + EqualityMatches equalities; + ASSERT_OK(extractEqualityMatches(*expr, &equalities)); + + assertNoParent(equalities, ""); + assertParent(equalities, "a", "a", 1); + assertParent(equalities, "a.b", "a", 1); + assertParent(equalities, "b", "b", 2); + assertParent(equalities, "b.b", "b", 2); + } + + TEST(FindParentEquality, Nested) { + + BSONObj exprBSON = fromjson("{'a.a':1}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + EqualityMatches equalities; + ASSERT_OK(extractEqualityMatches(*expr, &equalities)); + + assertNoParent(equalities, ""); + assertNoParent(equalities, "a"); + assertParent(equalities, "a.a", "a.a", 1); + assertParent(equalities, "a.a.b", "a.a", 1); + } + + TEST(FindParentEquality, NestedMulti) { + + BSONObj exprBSON = fromjson("{'a.a':1,'a.b':2,'c.c':3}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + EqualityMatches equalities; + ASSERT_OK(extractEqualityMatches(*expr, &equalities)); + + assertNoParent(equalities, ""); + assertNoParent(equalities, "a"); + assertNoParent(equalities, "c"); + assertParent(equalities, "a.a", "a.a", 1); + assertParent(equalities, "a.a.a", "a.a", 1); + assertParent(equalities, "a.b", "a.b", 2); + assertParent(equalities, "a.b.b", "a.b", 2); + assertParent(equalities, "c.c", "c.c", 3); + assertParent(equalities, "c.c.c", "c.c", 3); + } + + TEST(FindParentEquality, Empty) { + + BSONObj exprBSON = fromjson("{'':0}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + EqualityMatches equalities; + ASSERT_OK(extractEqualityMatches(*expr, &equalities)); + + assertParent(equalities, "", "", 0); + } + + TEST(FindParentEquality, EmptyMulti) { + + BSONObj exprBSON = fromjson("{'':0,a:1}"); + auto_ptr<MatchExpression> expr(makeExpr(exprBSON)); + EqualityMatches equalities; + ASSERT_OK(extractEqualityMatches(*expr, &equalities)); + + assertParent(equalities, "", "", 0); + assertParent(equalities, "a", "a", 1); + assertParent(equalities, "a.b", "a", 1); + } + } // unnamed namespace diff --git a/src/mongo/db/ops/update_driver.cpp b/src/mongo/db/ops/update_driver.cpp index 672aad85d73..df8de62873e 100644 --- a/src/mongo/db/ops/update_driver.cpp +++ b/src/mongo/db/ops/update_driver.cpp @@ -46,6 +46,8 @@ namespace mongo { namespace str = mongoutils::str; namespace mb = mongo::mutablebson; + using pathsupport::EqualityMatches; + UpdateDriver::UpdateDriver(const Options& opts) : _replacementMode(false) , _indexedFields(NULL) @@ -164,6 +166,7 @@ namespace mongo { } Status UpdateDriver::populateDocumentWithQueryFields(const BSONObj& query, + const vector<FieldRef*>* immutablePaths, mutablebson::Document& doc) const { CanonicalQuery* rawCG; // We canonicalize the query to collapse $and/$or, and the first arg (ns) is not needed @@ -173,109 +176,43 @@ namespace mongo { if (!s.isOK()) return s; scoped_ptr<CanonicalQuery> cq(rawCG); - return populateDocumentWithQueryFields(rawCG, doc); + return populateDocumentWithQueryFields(rawCG, immutablePaths, doc); } Status UpdateDriver::populateDocumentWithQueryFields(const CanonicalQuery* query, + const vector<FieldRef*>* immutablePathsPtr, mutablebson::Document& doc) const { - - MatchExpression* root = query->root(); - - MatchExpression::MatchType rootType = root->matchType(); - - // These copies are needed until we apply the modifiers at the end. - std::vector<BSONObj> copies; - - // We only care about equality and "and"ed equality fields, everything else is ignored - if (rootType != MatchExpression::EQ && rootType != MatchExpression::AND) - return Status::OK(); + EqualityMatches equalities; + Status status = Status::OK(); if (isDocReplacement()) { - BSONElement idElem = query->getQueryObj().getField("_id"); - // Replacement mods need the _id field copied explicitly. - if (idElem.ok()) { - mb::Element elem = doc.makeElement(idElem); - return doc.root().pushFront(elem); - } + FieldRefSet pathsToExtract; - return Status::OK(); - } + // TODO: Refactor update logic, make _id just another immutable field + static const FieldRef idPath("_id"); + static const vector<FieldRef*> emptyImmutablePaths; + const vector<FieldRef*>& immutablePaths = + immutablePathsPtr ? *immutablePathsPtr : emptyImmutablePaths; - // Create a new UpdateDriver to create the base doc from the query - Options opts; - opts.logOp = false; - opts.modOptions = modOptions(); - - UpdateDriver insertDriver(opts); - insertDriver.setContext(ModifierInterface::ExecInfo::INSERT_CONTEXT); - - // If we are a single equality match query - if (root->matchType() == MatchExpression::EQ) { - EqualityMatchExpression* eqMatch = - static_cast<EqualityMatchExpression*>(root); - - const BSONElement matchData = eqMatch->getData(); - BSONElement childElem = matchData; - - // Make copy to new path if not the same field name (for cases like $all) - if (!root->path().empty() && matchData.fieldNameStringData() != root->path()) { - BSONObjBuilder copyBuilder; - copyBuilder.appendAs(eqMatch->getData(), root->path()); - const BSONObj copy = copyBuilder.obj(); - copies.push_back(copy); - childElem = copy[root->path()]; - } - - // Add this element as a $set modifier - Status s = insertDriver.addAndParse(modifiertable::MOD_SET, - childElem); - if (!s.isOK()) - return s; + pathsToExtract.fillFrom(immutablePaths); + pathsToExtract.insert(&idPath); + // Extract only immutable fields from replacement-style + status = pathsupport::extractFullEqualityMatches(*query->root(), + pathsToExtract, + &equalities); } else { - - // parse query $set mods, including only equality stuff - for (size_t i = 0; i < root->numChildren(); ++i) { - MatchExpression* child = root->getChild(i); - if (child->matchType() == MatchExpression::EQ) { - EqualityMatchExpression* eqMatch = - static_cast<EqualityMatchExpression*>(child); - - const BSONElement matchData = eqMatch->getData(); - BSONElement childElem = matchData; - - // Make copy to new path if not the same field name (for cases like $all) - if (!child->path().empty() && - matchData.fieldNameStringData() != child->path()) { - BSONObjBuilder copyBuilder; - copyBuilder.appendAs(eqMatch->getData(), child->path()); - const BSONObj copy = copyBuilder.obj(); - copies.push_back(copy); - childElem = copy[child->path()]; - } - - // Add this element as a $set modifier - Status s = insertDriver.addAndParse(modifiertable::MOD_SET, - childElem); - if (!s.isOK()) - return s; - } - } + // Extract all fields from op-style + status = pathsupport::extractEqualityMatches(*query->root(), &equalities); } - // update the document with base field - Status s = insertDriver.update(StringData(), &doc); - copies.clear(); - if (!s.isOK()) { - return Status(ErrorCodes::UnsupportedFormat, - str::stream() << "Cannot create base during" - " insert of update. Caused by :" - << s.toString()); - } + if (!status.isOK()) + return status; - return Status::OK(); + status = pathsupport::addEqualitiesToDoc(equalities, &doc); + return status; } Status UpdateDriver::update(const StringData& matchedField, diff --git a/src/mongo/db/ops/update_driver.h b/src/mongo/db/ops/update_driver.h index 0b9fd951cb8..4ac88872e04 100644 --- a/src/mongo/db/ops/update_driver.h +++ b/src/mongo/db/ops/update_driver.h @@ -69,9 +69,11 @@ namespace mongo { * conflicts along the way then those errors will be returned. */ Status populateDocumentWithQueryFields(const BSONObj& query, + const vector<FieldRef*>* immutablePaths, mutablebson::Document& doc) const; Status populateDocumentWithQueryFields(const CanonicalQuery* query, + const vector<FieldRef*>* immutablePaths, mutablebson::Document& doc) const; /** diff --git a/src/mongo/db/ops/update_driver_test.cpp b/src/mongo/db/ops/update_driver_test.cpp index 27f26d9dd3a..22140b5389e 100644 --- a/src/mongo/db/ops/update_driver_test.cpp +++ b/src/mongo/db/ops/update_driver_test.cpp @@ -28,21 +28,32 @@ #include "mongo/db/ops/update_driver.h" +#include <boost/scoped_ptr.hpp> + +#include <map> + +#include "mongo/base/owned_pointer_vector.h" #include "mongo/base/string_data.h" #include "mongo/bson/mutable/document.h" #include "mongo/bson/mutable/mutable_bson_test_utils.h" -#include "mongo/db/update_index_data.h" +#include "mongo/db/field_ref.h" #include "mongo/db/json.h" +#include "mongo/db/update_index_data.h" #include "mongo/unittest/unittest.h" namespace { using mongo::BSONObj; + using mongo::BSONElement; + using mongo::BSONObjIterator; + using mongo::FieldRef; using mongo::fromjson; + using mongo::OwnedPointerVector; using mongo::UpdateIndexData; using mongo::mutablebson::Document; using mongo::StringData; using mongo::UpdateDriver; + using mongoutils::str::stream; TEST(Parse, Normal) { UpdateDriver::Options opts; @@ -115,59 +126,279 @@ namespace { ASSERT_FALSE(driver.isDocReplacement()); } + // + // Tests of creating a base for an upsert from a query document + // $or, $and, $all get special handling, as does the _id field + // + // NONGOAL: Testing all query parsing and nesting combinations + // - // Test the upsert case where we copy the query parts into the new doc - TEST(CreateFromQuery, Basic) { - UpdateDriver::Options opts; - UpdateDriver driver(opts); - Document doc; + class CreateFromQueryFixture : public mongo::unittest::Test { + public: + + CreateFromQueryFixture() + : _driverOps(new UpdateDriver(UpdateDriver::Options())), + _driverRepl(new UpdateDriver(UpdateDriver::Options())) { + _driverOps->parse(fromjson("{$set:{'_':1}}")); + _driverRepl->parse(fromjson("{}")); + } + + Document& doc() { + return _doc; + } + + UpdateDriver& driverOps() { + return *_driverOps; + } + + UpdateDriver& driverRepl() { + return *_driverRepl; + } - BSONObj query = fromjson("{a:1, b:1}"); - ASSERT_OK(driver.populateDocumentWithQueryFields(query, doc)); - ASSERT_EQUALS(query, doc); + private: + boost::scoped_ptr<UpdateDriver> _driverOps; + boost::scoped_ptr<UpdateDriver> _driverRepl; + Document _doc; + }; + + // Make name nicer to report + typedef CreateFromQueryFixture CreateFromQuery; + + static void assertSameFields(const BSONObj& docA, const BSONObj& docB); + + /** + * Recursively asserts that two BSONElements contain the same data or sub-elements, + * ignoring element order. + */ + static void assertSameElements(const BSONElement& elA, const BSONElement& elB) { + if (elA.type() != elB.type() || (!elA.isABSONObj() && !elA.valuesEqual(elB))) { + FAIL(stream() << "element " << elA << " not equal to " << elB); + } + else if (elA.type() == mongo::Array) { + std::vector<BSONElement> elsA = elA.Array(); + std::vector<BSONElement> elsB = elB.Array(); + if (elsA.size() != elsB.size()) + FAIL(stream() << "element " << elA << " not equal to " << elB); + + std::vector<BSONElement>::iterator arrItA = elsA.begin(); + std::vector<BSONElement>::iterator arrItB = elsB.begin(); + for (; arrItA != elsA.end(); ++arrItA, ++arrItB) { + assertSameElements(*arrItA, *arrItB); + } + } + else if (elA.type() == mongo::Object) { + assertSameFields(elA.Obj(), elB.Obj()); + } } - TEST(CreateFromQuery, BasicWithId) { - UpdateDriver::Options opts; - UpdateDriver driver(opts); - Document doc; + /** + * Recursively asserts that two BSONObjects contain the same elements, + * ignoring element order. + */ + static void assertSameFields(const BSONObj& docA, const BSONObj& docB) { + + if (docA.nFields() != docB.nFields()) + FAIL(stream() << "document " << docA << " has different fields than " << docB); + + std::map<StringData, BSONElement> docAMap; + BSONObjIterator itA(docA); + while (itA.more()) { + BSONElement elA = itA.next(); + docAMap.insert(std::make_pair(elA.fieldNameStringData(), elA)); + } + + BSONObjIterator itB(docB); + while (itB.more()) { + BSONElement elB = itB.next(); - BSONObj query = fromjson("{_id:1, a:1, b:1}"); - ASSERT_OK(driver.populateDocumentWithQueryFields(query, doc)); - ASSERT_EQUALS(query, doc); + std::map<StringData, BSONElement>::iterator seenIt = docAMap.find(elB + .fieldNameStringData()); + if (seenIt == docAMap.end()) + FAIL(stream() << "element " << elB << " not found in " << docA); + + BSONElement elA = seenIt->second; + assertSameElements(elA, elB); + } } - TEST(CreateFromQuery, NestedSharedRoot) { - UpdateDriver::Options opts; - UpdateDriver driver(opts); - Document doc; + TEST_F(CreateFromQuery, BasicOp) { + BSONObj query = fromjson("{a:1,b:2}"); + ASSERT_OK(driverOps().populateDocumentWithQueryFields(query, NULL, doc())); + assertSameFields(query, doc().getObject()); + } - ASSERT_OK(driver.populateDocumentWithQueryFields(fromjson("{'a.c':1, 'a.b':1}"), doc)); + TEST_F(CreateFromQuery, BasicOpEq) { + BSONObj query = fromjson("{a:{$eq:1}}"); + ASSERT_OK(driverOps().populateDocumentWithQueryFields(query, NULL, doc())); + assertSameFields(fromjson("{a:1}"), doc().getObject()); } - TEST(CreateFromQuery, AllArrayDoesntHaveOrdinalName) { - UpdateDriver::Options opts; - UpdateDriver driver(opts); - Document doc; + TEST_F(CreateFromQuery, BasicOpWithId) { + BSONObj query = fromjson("{_id:1,a:1,b:2}"); + ASSERT_OK(driverOps().populateDocumentWithQueryFields(query, NULL, doc())); + assertSameFields(query, doc().getObject()); + } - ASSERT_OK(driver.populateDocumentWithQueryFields(fromjson("{a:{$all:[1]}}"), doc)); - ASSERT_EQUALS(fromjson("{a:1}"), doc); + TEST_F(CreateFromQuery, BasicRepl) { + BSONObj query = fromjson("{a:1,b:2}"); + ASSERT_OK(driverRepl().populateDocumentWithQueryFields(query, NULL, doc())); + assertSameFields(fromjson("{}"), doc().getObject()); } - // Failures - TEST(CreateFromQuery, DupFieldsFail) { - UpdateDriver::Options opts; - UpdateDriver driver(opts); - Document doc; + TEST_F(CreateFromQuery, BasicReplWithId) { + BSONObj query = fromjson("{_id:1,a:1,b:2}"); + ASSERT_OK(driverRepl().populateDocumentWithQueryFields(query, NULL, doc())); + assertSameFields(fromjson("{_id:1}"), doc().getObject()); + } - ASSERT_NOT_OK(driver.populateDocumentWithQueryFields(fromjson("{a:1, 'a.b':1}"), doc)); + TEST_F(CreateFromQuery, BasicReplWithIdEq) { + BSONObj query = fromjson("{_id:{$eq:1},a:1,b:2}"); + ASSERT_OK(driverRepl().populateDocumentWithQueryFields(query, NULL, doc())); + assertSameFields(fromjson("{_id:1}"), doc().getObject()); } - TEST(CreateFromQuery, AllArrayMultipleVals) { - UpdateDriver::Options opts; - UpdateDriver driver(opts); - Document doc; + TEST_F(CreateFromQuery, NoRootIdOp) { + BSONObj query = fromjson("{'_id.a':1,'_id.b':2}"); + ASSERT_OK(driverOps().populateDocumentWithQueryFields(query, NULL, doc())); + assertSameFields(fromjson("{_id:{a:1,b:2}}"), doc().getObject()); + } - ASSERT_NOT_OK(driver.populateDocumentWithQueryFields(fromjson("{a:{$all:[1, 2]}}"), doc)); + TEST_F(CreateFromQuery, NoRootIdRepl) { + BSONObj query = fromjson("{'_id.a':1,'_id.b':2}"); + ASSERT_NOT_OK(driverRepl().populateDocumentWithQueryFields(query, NULL, doc())); } + + TEST_F(CreateFromQuery, NestedSharedRootOp) { + BSONObj query = fromjson("{'a.c':1,'a.b':{$eq:2}}"); + ASSERT_OK(driverOps().populateDocumentWithQueryFields(query, NULL, doc())); + assertSameFields(fromjson("{a:{c:1,b:2}}"), doc().getObject()); + } + + TEST_F(CreateFromQuery, OrQueryOp) { + BSONObj query = fromjson("{$or:[{a:1}]}"); + ASSERT_OK(driverOps().populateDocumentWithQueryFields(query, NULL, doc())); + assertSameFields(fromjson("{a:1}"), doc().getObject()); + } + + TEST_F(CreateFromQuery, OrQueryIdRepl) { + BSONObj query = fromjson("{$or:[{_id:1}]}"); + ASSERT_OK(driverRepl().populateDocumentWithQueryFields(query, NULL, doc())); + assertSameFields(fromjson("{_id:1}"), doc().getObject()); + } + + TEST_F(CreateFromQuery, OrQueryNoExtractOps) { + BSONObj query = fromjson("{$or:[{a:1}, {b:2}]}"); + ASSERT_OK(driverOps().populateDocumentWithQueryFields(query, NULL, doc())); + assertSameFields(BSONObj(), doc().getObject()); + } + + TEST_F(CreateFromQuery, OrQueryNoExtractIdRepl) { + BSONObj query = fromjson("{$or:[{_id:1}, {_id:2}]}"); + ASSERT_OK(driverRepl().populateDocumentWithQueryFields(query, NULL, doc())); + assertSameFields(BSONObj(), doc().getObject()); + } + + TEST_F(CreateFromQuery, AndQueryOp) { + BSONObj query = fromjson("{$and:[{'a.c':1},{'a.b':{$eq:2}}]}"); + ASSERT_OK(driverOps().populateDocumentWithQueryFields(query, NULL, doc())); + assertSameFields(fromjson("{a:{c:1,b:2}}"), doc().getObject()); + } + + TEST_F(CreateFromQuery, AndQueryIdRepl) { + BSONObj query = fromjson("{$and:[{_id:1},{a:{$eq:2}}]}"); + ASSERT_OK(driverRepl().populateDocumentWithQueryFields(query, NULL, doc())); + assertSameFields(fromjson("{_id:1}"), doc().getObject()); + } + + TEST_F(CreateFromQuery, AllArrayOp) { + BSONObj query = fromjson("{a:{$all:[1]}}"); + ASSERT_OK(driverOps().populateDocumentWithQueryFields(query, NULL, doc())); + assertSameFields(fromjson("{a:1}"), doc().getObject()); + } + + TEST_F(CreateFromQuery, AllArrayIdRepl) { + BSONObj query = fromjson("{_id:{$all:[1]}, b:2}"); + ASSERT_OK(driverRepl().populateDocumentWithQueryFields(query, NULL, doc())); + assertSameFields(fromjson("{_id:1}"), doc().getObject()); + } + + TEST_F(CreateFromQuery, ConflictFieldsFailOp) { + BSONObj query = fromjson("{a:1,'a.b':1}"); + ASSERT_NOT_OK(driverOps().populateDocumentWithQueryFields(query, NULL, doc())); + } + + TEST_F(CreateFromQuery, ConflictFieldsFailSameValueOp) { + BSONObj query = fromjson("{a:{b:1},'a.b':1}"); + ASSERT_NOT_OK(driverOps().populateDocumentWithQueryFields(query, NULL, doc())); + } + + TEST_F(CreateFromQuery, ConflictWithIdRepl) { + BSONObj query = fromjson("{_id:1,'_id.a':1}"); + ASSERT_NOT_OK(driverRepl().populateDocumentWithQueryFields(query, NULL, doc())); + } + + TEST_F(CreateFromQuery, ConflictAndQueryOp) { + BSONObj query = fromjson("{$and:[{a:{b:1}},{'a.b':{$eq:1}}]}"); + ASSERT_NOT_OK(driverOps().populateDocumentWithQueryFields(query, NULL, doc())); + } + + TEST_F(CreateFromQuery, ConflictAllMultipleValsOp) { + BSONObj query = fromjson("{a:{$all:[1, 2]}}"); + ASSERT_NOT_OK(driverOps().populateDocumentWithQueryFields(query, NULL, doc())); + } + + TEST_F(CreateFromQuery, NoConflictOrQueryOp) { + BSONObj query = fromjson("{$or:[{a:{b:1}},{'a.b':{$eq:1}}]}"); + ASSERT_OK(driverOps().populateDocumentWithQueryFields(query, NULL, doc())); + assertSameFields(BSONObj(), doc().getObject()); + } + + TEST_F(CreateFromQuery, ImmutableFieldsOp) { + BSONObj query = fromjson("{$or:[{a:{b:1}},{'a.b':{$eq:1}}]}"); + ASSERT_OK(driverOps().populateDocumentWithQueryFields(query, NULL, doc())); + assertSameFields(BSONObj(), doc().getObject()); + } + + TEST_F(CreateFromQuery, ShardKeyRepl) { + BSONObj query = fromjson("{a:{$eq:1}}, b:2}"); + OwnedPointerVector<FieldRef> immutablePaths; + immutablePaths.push_back(new FieldRef("a")); + ASSERT_OK(driverRepl().populateDocumentWithQueryFields(query, + &immutablePaths.vector(), + doc())); + assertSameFields(fromjson("{a:1}"), doc().getObject()); + } + + TEST_F(CreateFromQuery, NestedShardKeyRepl) { + BSONObj query = fromjson("{a:{$eq:1},'b.c':2},d:2}"); + OwnedPointerVector<FieldRef> immutablePaths; + immutablePaths.push_back(new FieldRef("a")); + immutablePaths.push_back(new FieldRef("b.c")); + ASSERT_OK(driverRepl().populateDocumentWithQueryFields(query, + &immutablePaths.vector(), + doc())); + assertSameFields(fromjson("{a:1,b:{c:2}}"), doc().getObject()); + } + + TEST_F(CreateFromQuery, NestedShardKeyOp) { + BSONObj query = fromjson("{a:{$eq:1},'b.c':2,d:{$all:[3]}},e:2}"); + OwnedPointerVector<FieldRef> immutablePaths; + immutablePaths.push_back(new FieldRef("a")); + immutablePaths.push_back(new FieldRef("b.c")); + ASSERT_OK(driverOps().populateDocumentWithQueryFields(query, + &immutablePaths.vector(), + doc())); + assertSameFields(fromjson("{a:1,b:{c:2},d:3}"), doc().getObject()); + } + + TEST_F(CreateFromQuery, NotFullShardKeyRepl) { + BSONObj query = fromjson("{a:{$eq:1}, 'b.c':2}, d:2}"); + OwnedPointerVector<FieldRef> immutablePaths; + immutablePaths.push_back(new FieldRef("a")); + immutablePaths.push_back(new FieldRef("b")); + ASSERT_NOT_OK(driverRepl().populateDocumentWithQueryFields(query, + &immutablePaths.vector(), + doc())); + } + } // unnamed namespace diff --git a/src/mongo/dbtests/chunktests.cpp b/src/mongo/dbtests/chunktests.cpp index 1b87f749bef..3faddbc5305 100644 --- a/src/mongo/dbtests/chunktests.cpp +++ b/src/mongo/dbtests/chunktests.cpp @@ -38,17 +38,19 @@ namespace mongo { class TestableChunkManager : public ChunkManager { public: - void setShardKey( const BSONObj &keyPattern ) { - const_cast<ShardKeyPattern&>(_key) = ShardKeyPattern( keyPattern ); + + TestableChunkManager(const string& ns, const ShardKeyPattern& keyPattern, bool unique) + : ChunkManager(ns, keyPattern, unique) { } + void setSingleChunkForShards( const vector<BSONObj> &splitPoints ) { ChunkMap &chunkMap = const_cast<ChunkMap&>( _chunkMap ); ChunkRangeManager &chunkRanges = const_cast<ChunkRangeManager&>( _chunkRanges ); set<Shard> &shards = const_cast<set<Shard>&>( _shards ); vector<BSONObj> mySplitPoints( splitPoints ); - mySplitPoints.insert( mySplitPoints.begin(), _key.globalMin() ); - mySplitPoints.push_back( _key.globalMax() ); + mySplitPoints.insert( mySplitPoints.begin(), _keyPattern.getKeyPattern().globalMin() ); + mySplitPoints.push_back( _keyPattern.getKeyPattern().globalMax() ); for( unsigned i = 1; i < mySplitPoints.size(); ++i ) { string name = str::stream() << (i-1); @@ -75,20 +77,13 @@ namespace ChunkTests { namespace ChunkManagerTests { typedef mongo::TestableChunkManager ChunkManager; - - class Create { - public: - void run() { - ChunkManager chunkManager; - } - }; - + class Base { public: virtual ~Base() {} void run() { - ChunkManager chunkManager; - chunkManager.setShardKey( shardKey() ); + ShardKeyPattern shardKeyPattern(shardKey()); + ChunkManager chunkManager("", shardKeyPattern, false); chunkManager.setSingleChunkForShards( splitPointsVector() ); set<Shard> shards; @@ -259,7 +254,6 @@ namespace ChunkTests { } void setupTests() { - add<ChunkManagerTests::Create>(); add<ChunkManagerTests::EmptyQuerySingleShard>(); add<ChunkManagerTests::EmptyQueryMultiShard>(); add<ChunkManagerTests::UniversalRangeMultiShard>(); diff --git a/src/mongo/dbtests/keypatterntests.cpp b/src/mongo/dbtests/keypatterntests.cpp deleted file mode 100644 index b4a96cd3a1e..00000000000 --- a/src/mongo/dbtests/keypatterntests.cpp +++ /dev/null @@ -1,120 +0,0 @@ -// keypatterntests.cpp - Tests for the KeyPattern class -// - -/** - * Copyright (C) 2012 10gen Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects - * for all of the code used other than as permitted herein. If you modify - * file(s) with this exception, you may extend this exception to your - * version of the file(s), but you are not obligated to do so. If you do not - * wish to do so, delete this exception statement from your version. If you - * delete this exception statement from all source files in the program, - * then also delete it in the license file. - */ - - -#include "mongo/db/keypattern.h" -#include "mongo/dbtests/dbtests.h" - -namespace KeyPatternTests { - - class ExtendRangeBoundTests { - public: - void run() { - - BSONObj bound = BSON( "a" << 55 ); - BSONObj longBound = BSON("a" << 55 << "b" << 66); - - //test keyPattern shorter than bound, should fail - { - KeyPattern keyPat( BSON( "a" << 1 ) ); - ASSERT_THROWS( keyPat.extendRangeBound( longBound, false ), MsgAssertionException ); - } - - //test keyPattern doesn't match bound, should fail - { - KeyPattern keyPat( BSON( "b" << 1 ) ); - ASSERT_THROWS( keyPat.extendRangeBound( bound, false ), MsgAssertionException ); - } - { - KeyPattern keyPat( BSON( "a" << 1 << "c" << 1) ); - ASSERT_THROWS( keyPat.extendRangeBound( longBound, false ), MsgAssertionException ); - } - - //test keyPattern same as bound - { - KeyPattern keyPat( BSON( "a" << 1 ) ); - BSONObj newB = keyPat.extendRangeBound( bound, false ); - ASSERT_EQUALS( newB , BSON("a" << 55) ); - } - { - KeyPattern keyPat( BSON( "a" << 1 ) ); - BSONObj newB = keyPat.extendRangeBound( bound, false ); - ASSERT_EQUALS( newB , BSON("a" << 55) ); - } - - //test keyPattern longer than bound, simple - { - KeyPattern keyPat( BSON( "a" << 1 << "b" << 1) ); - BSONObj newB = keyPat.extendRangeBound( bound, false ); - ASSERT_EQUALS( newB , BSON("a" << 55 << "b" << MINKEY ) ); - } - { - KeyPattern keyPat( BSON( "a" << 1 << "b" << 1) ); - BSONObj newB = keyPat.extendRangeBound( bound, true ); - ASSERT_EQUALS( newB , BSON("a" << 55 << "b" << MAXKEY ) ); - } - - //test keyPattern longer than bound, more complex pattern directions - { - KeyPattern keyPat( BSON( "a" << 1 << "b" << -1) ); - BSONObj newB = keyPat.extendRangeBound( bound, false ); - ASSERT_EQUALS( newB , BSON("a" << 55 << "b" << MAXKEY ) ); - } - { - KeyPattern keyPat( BSON( "a" << 1 << "b" << -1) ); - BSONObj newB = keyPat.extendRangeBound( bound, true ); - ASSERT_EQUALS( newB , BSON("a" << 55 << "b" << MINKEY ) ); - } - { - - KeyPattern keyPat( BSON( "a" << 1 << "b" << -1 << "c" << 1 ) ); - BSONObj newB = keyPat.extendRangeBound( bound, false ); - ASSERT_EQUALS( newB , BSON("a" << 55 << "b" << MAXKEY << "c" << MINKEY ) ); - } - { - KeyPattern keyPat( BSON( "a" << 1 << "b" << -1 << "c" << 1 ) ); - BSONObj newB = keyPat.extendRangeBound( bound, true ); - ASSERT_EQUALS( newB , BSON("a" << 55 << "b" << MINKEY << "c" << MAXKEY ) ); - } - } - }; - - class All : public Suite { - public: - All() : Suite( "keypattern" ) { - } - - void setupTests() { - add< ExtendRangeBoundTests >(); - } - } myall; - -} // namespace KeyPatternTests diff --git a/src/mongo/dbtests/sharding.cpp b/src/mongo/dbtests/sharding.cpp index fb6c5714dc3..b2c5b1a4831 100644 --- a/src/mongo/dbtests/sharding.cpp +++ b/src/mongo/dbtests/sharding.cpp @@ -149,7 +149,8 @@ namespace ShardingTests { void run(){ - ChunkManager manager( collName(), ShardKeyPattern( BSON( "_id" << 1 ) ), false ); + ShardKeyPattern shardKeyPattern(BSON("_id" << 1)); + ChunkManager manager(collName(), shardKeyPattern, false); manager.createFirstChunks( shard().getConnString(), shard(), NULL, NULL ); BSONObj firstChunk = _client.findOne(ChunkType::ConfigNS, BSONObj()).getOwned(); @@ -199,7 +200,8 @@ namespace ShardingTests { vector<BSONObj> splitKeys; genRandomSplitKeys( keyName, &splitKeys ); - ChunkManager manager( collName(), ShardKeyPattern( BSON( keyName << 1 ) ), false ); + ShardKeyPattern shardKeyPattern(BSON(keyName << 1)); + ChunkManager manager(collName(), shardKeyPattern, false); manager.createFirstChunks( shard().getConnString(), shard(), &splitKeys, NULL ); } @@ -283,7 +285,9 @@ namespace ShardingTests { _client.update(ChunkType::ConfigNS, BSONObj(), BSON( "$set" << b.obj())); // Make new manager load chunk diff - ChunkManager newManager(manager.getns(), manager.getShardKey(), manager.isUnique()); + ChunkManager newManager(manager.getns(), + manager.getShardKeyPattern(), + manager.isUnique()); newManager.loadExistingRanges(shard().getConnString(), &manager); ASSERT( newManager.getVersion().toLong() == laterVersion.toLong() ); diff --git a/src/mongo/s/balance.cpp b/src/mongo/s/balance.cpp index 8f5467146a0..80067ee02de 100644 --- a/src/mongo/s/balance.cpp +++ b/src/mongo/s/balance.cpp @@ -460,7 +460,7 @@ namespace mongo { for ( unsigned i = 0; i < ranges.size(); i++ ) { BSONObj min = ranges[i].min; - min = cm->getShardKey().extendRangeBound( min, false ); + min = cm->getShardKeyPattern().getKeyPattern().extendRangeBound( min, false ); if ( allChunkMinimums.count( min ) > 0 ) continue; diff --git a/src/mongo/s/chunk.cpp b/src/mongo/s/chunk.cpp index b40a1d92c7d..9ee5ad58cf5 100644 --- a/src/mongo/s/chunk.cpp +++ b/src/mongo/s/chunk.cpp @@ -202,33 +202,35 @@ namespace mongo { return _manager->getns(); } - bool Chunk::containsPoint( const BSONObj& point ) const { - return getMin().woCompare( point ) <= 0 && point.woCompare( getMax() ) < 0; + bool Chunk::containsKey( const BSONObj& shardKey ) const { + return getMin().woCompare( shardKey ) <= 0 && shardKey.woCompare( getMax() ) < 0; } - bool ChunkRange::containsPoint( const BSONObj& point ) const { + bool ChunkRange::containsKey( const BSONObj& shardKey ) const { // same as Chunk method - return getMin().woCompare( point ) <= 0 && point.woCompare( getMax() ) < 0; + return getMin().woCompare( shardKey ) <= 0 && shardKey.woCompare( getMax() ) < 0; } bool Chunk::minIsInf() const { - return _manager->getShardKey().globalMin().woCompare( getMin() ) == 0; + return 0 == + _manager->getShardKeyPattern().getKeyPattern().globalMin().woCompare( getMin() ); } bool Chunk::maxIsInf() const { - return _manager->getShardKey().globalMax().woCompare( getMax() ) == 0; + return 0 == + _manager->getShardKeyPattern().getKeyPattern().globalMax().woCompare( getMax() ); } BSONObj Chunk::_getExtremeKey( int sort ) const { Query q; if ( sort == 1 ) { - q.sort( _manager->getShardKey().key() ); + q.sort( _manager->getShardKeyPattern().toBSON() ); } else { // need to invert shard key pattern to sort backwards // TODO: make a helper in ShardKeyPattern? - BSONObj k = _manager->getShardKey().key(); + BSONObj k = _manager->getShardKeyPattern().toBSON(); BSONObjBuilder r; BSONObjIterator i(k); @@ -246,7 +248,7 @@ namespace mongo { conn.done(); if ( end.isEmpty() ) return BSONObj(); - return _manager->getShardKey().extractKeyFromQueryOrDoc( end ); + return _manager->getShardKeyPattern().extractShardKeyFromDoc(end); } void Chunk::pickMedianKey( BSONObj& medianKey ) const { @@ -255,7 +257,7 @@ namespace mongo { BSONObj result; BSONObjBuilder cmd; cmd.append( "splitVector" , _manager->getns() ); - cmd.append( "keyPattern" , _manager->getShardKey().key() ); + cmd.append( "keyPattern" , _manager->getShardKeyPattern().toBSON() ); cmd.append( "min" , getMin() ); cmd.append( "max" , getMax() ); cmd.appendBool( "force" , true ); @@ -285,7 +287,7 @@ namespace mongo { BSONObj result; BSONObjBuilder cmd; cmd.append( "splitVector" , _manager->getns() ); - cmd.append( "keyPattern" , _manager->getShardKey().key() ); + cmd.append( "keyPattern" , _manager->getShardKeyPattern().toBSON() ); cmd.append( "min" , getMin() ); cmd.append( "max" , getMax() ); cmd.append( "maxChunkSizeBytes" , chunkSize ); @@ -351,7 +353,7 @@ namespace mongo { // the very first (or last) key as a split point. // This heuristic is skipped for "special" shard key patterns that are not likely to // produce monotonically increasing or decreasing values (e.g. hashed shard keys). - if (KeyPattern::isOrderedKeyPattern(skey().key())) { + if (KeyPattern::isOrderedKeyPattern(_manager->getShardKeyPattern().toBSON())) { if ( minIsInf() ) { BSONObj key = _getExtremeKey( 1 ); if ( ! key.isEmpty() ) { @@ -425,7 +427,7 @@ namespace mongo { BSONObjBuilder cmd; cmd.append( "splitChunk" , _manager->getns() ); - cmd.append( "keyPattern" , _manager->getShardKey().key() ); + cmd.append( "keyPattern" , _manager->getShardKeyPattern().toBSON() ); cmd.append( "min" , getMin() ); cmd.append( "max" , getMax() ); cmd.append( "from" , getShard().getName() ); @@ -621,7 +623,7 @@ namespace mongo { BSONObj result; uassert( 10169 , "datasize failed!" , conn->runCommand( "admin" , BSON( "datasize" << _manager->getns() - << "keyPattern" << _manager->getShardKey().key() + << "keyPattern" << _manager->getShardKeyPattern().toBSON() << "min" << getMin() << "max" << getMax() << "maxSize" << ( MaxChunkSize + 1 ) @@ -686,10 +688,6 @@ namespace mongo { return ss.str(); } - ShardKeyPattern Chunk::skey() const { - return _manager->getShardKey(); - } - void Chunk::markAsJumbo() const { // set this first // even if we can't set it in the db @@ -753,7 +751,7 @@ namespace mongo { ChunkManager::ChunkManager( const string& ns, const ShardKeyPattern& pattern , bool unique ) : _ns( ns ), - _key( pattern ), + _keyPattern( pattern.getKeyPattern() ), _unique( unique ), _chunkRanges(), _mutex("ChunkManager"), @@ -770,7 +768,7 @@ namespace mongo { _ns(collDoc[CollectionType::ns()].type() == String ? collDoc[CollectionType::ns()].String() : ""), - _key(collDoc[CollectionType::keyPattern()].type() == Object ? + _keyPattern(collDoc[CollectionType::keyPattern()].type() == Object ? collDoc[CollectionType::keyPattern()].Obj().getOwned() : BSONObj()), _unique(collDoc[CollectionType::unique()].trueValue()), @@ -787,12 +785,12 @@ namespace mongo { // verify( _ns != "" ); - verify( ! _key.key().isEmpty() ); + verify( ! _keyPattern.toBSON().isEmpty() ); _version = ChunkVersion::fromBSON( collDoc ); } - void ChunkManager::loadExistingRanges(const string& config, const ChunkManager* oldManager){ + void ChunkManager::loadExistingRanges( const string& config, const ChunkManager* oldManager ){ int tries = 3; while (tries--) { @@ -1074,14 +1072,6 @@ namespace mongo { } } - bool ChunkManager::hasShardKey(const BSONObj& doc) const { - return _key.hasShardKey(doc); - } - - bool ChunkManager::hasTargetableShardKey(const BSONObj& doc) const { - return _key.hasTargetableShardKey(doc); - } - void ChunkManager::calcInitSplitsAndShards( const Shard& primary, const vector<BSONObj>* initPoints, const vector<Shard>* initShards, @@ -1091,7 +1081,8 @@ namespace mongo { verify( _chunkMap.size() == 0 ); unsigned long long numObjects = 0; - Chunk c(this, _key.globalMin(), _key.globalMax(), primary); + Chunk c(this, _keyPattern.getKeyPattern().globalMin(), + _keyPattern.getKeyPattern().globalMax(), primary); if ( !initPoints || !initPoints->size() ) { // discover split points @@ -1163,8 +1154,9 @@ namespace mongo { conn.done(); for ( unsigned i=0; i<=splitPoints.size(); i++ ) { - BSONObj min = i == 0 ? _key.globalMin() : splitPoints[i-1]; - BSONObj max = i < splitPoints.size() ? splitPoints[i] : _key.globalMax(); + BSONObj min = i == 0 ? _keyPattern.getKeyPattern().globalMin() : splitPoints[i-1]; + BSONObj max = i < splitPoints.size() ? + splitPoints[i] : _keyPattern.getKeyPattern().globalMax(); Chunk temp( this , min , max , shards[ i % shards.size() ], version ); @@ -1193,27 +1185,26 @@ namespace mongo { _version = ChunkVersion( 0, 0, version.epoch() ); } - ChunkPtr ChunkManager::findIntersectingChunk( const BSONObj& point ) const { + ChunkPtr ChunkManager::findIntersectingChunk( const BSONObj& shardKey ) const { { - BSONObj foo; - ChunkPtr c; + BSONObj chunkMin; + ChunkPtr chunk; { - ChunkMap::const_iterator it = _chunkMap.upper_bound( point ); + ChunkMap::const_iterator it = _chunkMap.upper_bound( shardKey ); if (it != _chunkMap.end()) { - foo = it->first; - c = it->second; + chunkMin = it->first; + chunk = it->second; } } - if ( c ) { - if ( c->containsPoint( point ) ){ - dassert( c->containsPoint( point ) ); // doesn't use fast-path in extractKey - return c; + if ( chunk ) { + if ( chunk->containsKey( shardKey ) ){ + return chunk; } - PRINT(foo); - PRINT(*c); - PRINT( point ); + PRINT(chunkMin); + PRINT(*chunk); + PRINT( shardKey ); reload(); massert(13141, "Chunk map pointed to incorrect chunk", false); @@ -1221,27 +1212,12 @@ namespace mongo { } msgasserted( 8070 , - str::stream() << "couldn't find a chunk intersecting: " << point + str::stream() << "couldn't find a chunk intersecting: " << shardKey << " for ns: " << _ns << " at version: " << _version.toString() << ", number of chunks: " << _chunkMap.size() ); } - ChunkPtr ChunkManager::findChunkForDoc( const BSONObj& doc ) const { - BSONObj key = _key.extractKeyFromQueryOrDoc( doc ); - return findIntersectingChunk( key ); - } - - ChunkPtr ChunkManager::findChunkOnServer( const Shard& shard ) const { - for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ) { - ChunkPtr c = i->second; - if ( c->getShard() == shard ) - return c; - } - - return ChunkPtr(); - } - void ChunkManager::getShardsForQuery( set<Shard>& shards , const BSONObj& query ) const { CanonicalQuery* canonicalQuery = NULL; Status status = CanonicalQuery::canonicalize( @@ -1265,17 +1241,19 @@ namespace mongo { // Query { a : { $gte : 1, $lt : 2 }, // b : { $gte : 3, $lt : 4 } } // => Bounds { a : [1, 2), b : [3, 4) } - IndexBounds bounds = getIndexBoundsForQuery(_key.key(), canonicalQuery); + IndexBounds bounds = getIndexBoundsForQuery(_keyPattern.toBSON(), canonicalQuery); // Transforms bounds for each shard key field into full shard key ranges // for example : // Key { a : 1, b : 1 } // Bounds { a : [1, 2), b : [3, 4) } // => Ranges { a : 1, b : 3 } => { a : 2, b : 4 } - BoundList ranges = KeyPattern::flattenBounds(_key.key(), bounds); + BoundList ranges = _keyPattern.flattenBounds(bounds); - for ( BoundList::const_iterator it=ranges.begin(); it != ranges.end(); ++it ){ - getShardsForRange( shards, it->first /*min*/, it->second /*max*/ ); + for (BoundList::const_iterator it = ranges.begin(); it != ranges.end(); + ++it) { + + getShardsForRange(shards, it->first /*min*/, it->second /*max*/); // once we know we need to visit all shards no need to keep looping if( shards.size() == _shards.size() ) break; @@ -1417,20 +1395,6 @@ namespace mongo { return other.getVersion(shardName).equals(getVersion(shardName)); } - bool ChunkManager::compatibleWith( const Chunk& other ) const { - - // Do this first, b/c findIntersectingChunk asserts if the key isn't similar - if( ! this->_key.hasShardKey( other.getMin() ) ) return false; - // We assume here that chunks will have consistent fields in min/max - - ChunkPtr myChunk = this->findIntersectingChunk( other.getMin() ); - - if( other.getMin() != myChunk->getMin() ) return false; - if( other.getMax() != myChunk->getMax() ) return false; - if( other.getShard() != myChunk->getShard() ) return false; - return true; - } - void ChunkManager::drop( ChunkManagerPtr me ) const { scoped_lock lk( _mutex ); @@ -1546,14 +1510,14 @@ namespace mongo { } void ChunkManager::getInfo( BSONObjBuilder& b ) const { - b.append(CollectionType::keyPattern(), _key.key()); + b.append(CollectionType::keyPattern(), _keyPattern.toBSON()); b.appendBool(CollectionType::unique(), _unique); _version.addEpochToBSON(b, CollectionType::DEPRECATED_lastmod()); } string ChunkManager::toString() const { stringstream ss; - ss << "ChunkManager: " << _ns << " key:" << _key.toString() << '\n'; + ss << "ChunkManager: " << _ns << " key:" << _keyPattern.toString() << '\n'; for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ) { const ChunkPtr c = i->second; ss << "\t" << c->toString() << '\n'; @@ -1598,8 +1562,8 @@ namespace mongo { verify(max != _ranges.end()); verify(min == max); verify(min->second->getShard() == chunk->getShard()); - verify(min->second->containsPoint( chunk->getMin() )); - verify(min->second->containsPoint( chunk->getMax() ) || (min->second->getMax() == chunk->getMax())); + verify(min->second->containsKey( chunk->getMin() )); + verify(min->second->containsKey( chunk->getMax() ) || (min->second->getMax() == chunk->getMax())); } } @@ -1656,38 +1620,6 @@ namespace mongo { return splitThreshold; } - /** This is for testing only, just setting up minimal basic defaults. */ - ChunkManager::ChunkManager() : - _unique(), - _chunkRanges(), - _mutex( "ChunkManager" ), - _sequenceNumber() - {} - - class ChunkObjUnitTest : public StartupTest { - public: - void runChunkVersion() { - vector<ChunkVersion> all; - all.push_back( ChunkVersion(1,1, OID()) ); - all.push_back( ChunkVersion(1,2, OID()) ); - all.push_back( ChunkVersion(2,1, OID()) ); - all.push_back( ChunkVersion(2,2, OID()) ); - - for ( unsigned i=0; i<all.size(); i++ ) { - for ( unsigned j=i+1; j<all.size(); j++ ) { - verify( all[i] < all[j] ); - } - } - - } - - void run() { - runChunkVersion(); - LOG(1) << "shardObjTest passed" << endl; - } - } shardObjTest; - - // ----- to be removed --- extern OID serverID; diff --git a/src/mongo/s/chunk.h b/src/mongo/s/chunk.h index 73b2f22d14f..c5473df380f 100644 --- a/src/mongo/s/chunk.h +++ b/src/mongo/s/chunk.h @@ -31,13 +31,13 @@ #pragma once #include "mongo/base/string_data.h" +#include "mongo/db/keypattern.h" +#include "mongo/db/query/query_solution.h" #include "mongo/platform/atomic_word.h" #include "mongo/s/chunk_version.h" #include "mongo/s/shard.h" -#include "mongo/s/shardkey.h" +#include "mongo/s/shard_key_pattern.h" #include "mongo/util/concurrency/ticketholder.h" -#include "mongo/db/query/query_solution.h" - namespace mongo { @@ -89,12 +89,12 @@ namespace mongo { bool minIsInf() const; bool maxIsInf() const; - // Returns true if this chunk contains the given point, and false otherwise + // Returns true if this chunk contains the given shard key, and false otherwise // // Note: this function takes an extracted *key*, not an original document // (the point may be computed by, say, hashing a given field or projecting // to a subset of fields). - bool containsPoint( const BSONObj& point ) const; + bool containsKey( const BSONObj& shardKey ) const; std::string genID() const; static std::string genID( const std::string& ns , const BSONObj& min ); @@ -279,8 +279,6 @@ namespace mongo { /** initializes _dataWritten with a random value so that a mongos restart wouldn't cause delay in splitting */ static int mkDataWritten(); - - ShardKeyPattern skey() const; }; class ChunkRange { @@ -292,12 +290,12 @@ namespace mongo { const BSONObj& getMax() const { return _max; } // clones of Chunk methods - // Returns true if this ChunkRange contains the given point, and false otherwise + // Returns true if this ChunkRange contains the given shard key, and false otherwise // // Note: this function takes an extracted *key*, not an original document // (the point may be computed by, say, hashing a given field or projecting // to a subset of fields). - bool containsPoint( const BSONObj& point ) const; + bool containsKey( const BSONObj& shardKey ) const; ChunkRange(ChunkMap::const_iterator begin, const ChunkMap::const_iterator end) : _manager(begin->second->getManager()) @@ -375,11 +373,7 @@ namespace mongo { std::string getns() const { return _ns; } - const ShardKeyPattern& getShardKey() const { return _key; } - - bool hasShardKey(const BSONObj& doc) const; - - bool hasTargetableShardKey(const BSONObj& doc) const; + const ShardKeyPattern& getShardKeyPattern() const { return _keyPattern; } bool isUnique() const { return _unique; } @@ -416,26 +410,15 @@ namespace mongo { int numChunks() const { return _chunkMap.size(); } - /** Given a document, returns the chunk which contains that document. - * This works by extracting the shard key part of the given document, then - * calling findIntersectingChunk() on the extracted key. - * - * See also the description for findIntersectingChunk(). - */ - ChunkPtr findChunkForDoc( const BSONObj& doc ) const; - - /** Given a key that has been extracted from a document, returns the - * chunk that contains that key. + /** + * Given a key that has been extracted from a document, returns the + * chunk that contains that key. * - * For instance, to locate the chunk for document {a : "foo" , b : "bar"} - * when the shard key is {a : "hashed"}, you can call - * findChunkForDoc() on {a : "foo" , b : "bar"}, or - * findIntersectingChunk() on {a : hash("foo") } + * For instance, to locate the chunk for document {a : "foo" , b : "bar"} + * when the shard key is {a : "hashed"}, you can call + * findIntersectingChunk() on {a : hash("foo") } */ - ChunkPtr findIntersectingChunk( const BSONObj& point ) const; - - - ChunkPtr findChunkOnServer( const Shard& shard ) const; + ChunkPtr findIntersectingChunk( const BSONObj& shardKey ) const; void getShardsForQuery( std::set<Shard>& shards , const BSONObj& query ) const; void getAllShards( std::set<Shard>& all ) const; @@ -467,9 +450,6 @@ namespace mongo { */ bool compatibleWith(const ChunkManager& other, const std::string& shard) const; - bool compatibleWith( const Chunk& other ) const; - bool compatibleWith( ChunkPtr other ) const { if( ! other ) return false; return compatibleWith( *other ); } - std::string toString() const; ChunkVersion getVersion(const std::string& shardName) const; @@ -507,7 +487,7 @@ namespace mongo { // All members should be const for thread-safety const std::string _ns; - const ShardKeyPattern _key; + const ShardKeyPattern _keyPattern; const bool _unique; const ChunkMap _chunkMap; @@ -571,10 +551,8 @@ namespace mongo { friend class Chunk; friend class ChunkRangeManager; // only needed for CRM::assertValid() static AtomicUInt32 NextSequenceNumber; - - /** Just for testing */ + friend class TestableChunkManager; - ChunkManager(); }; // like BSONObjCmp. for use as an STL comparison functor diff --git a/src/mongo/s/chunk_manager_targeter.cpp b/src/mongo/s/chunk_manager_targeter.cpp index 1f3d24ab8c3..11945efd0fd 100644 --- a/src/mongo/s/chunk_manager_targeter.cpp +++ b/src/mongo/s/chunk_manager_targeter.cpp @@ -86,18 +86,9 @@ namespace mongo { Status ChunkManagerTargeter::targetInsert( const BSONObj& doc, ShardEndpoint** endpoint ) const { - if ( !_primary && !_manager ) { - return Status( ErrorCodes::NamespaceNotFound, - str::stream() << "could not target insert in collection " - << getNS().ns() - << "; no metadata found" ); - } + BSONObj shardKey; - if ( _primary ) { - *endpoint = new ShardEndpoint( _primary->getName(), - ChunkVersion::UNSHARDED() ); - } - else { + if ( _manager ) { // // Sharded collections have the following requirements for targeting: @@ -105,22 +96,37 @@ namespace mongo { // Inserts must contain the exact shard key. // - if ( !_manager->hasShardKey( doc ) ) { - return Status( ErrorCodes::ShardKeyNotFound, - stream() << "document " << doc - << " does not contain shard key for pattern " - << _manager->getShardKey().key() ); + shardKey = _manager->getShardKeyPattern().extractShardKeyFromDoc(doc); + + // Check shard key exists + if (shardKey.isEmpty()) { + return Status(ErrorCodes::ShardKeyNotFound, + stream() << "document " << doc + << " does not contain shard key for pattern " + << _manager->getShardKeyPattern().toString()); } - ChunkPtr chunk = _manager->findChunkForDoc( doc ); - *endpoint = new ShardEndpoint(chunk->getShard().getName(), - _manager->getVersion(chunk->getShard().getName())); + // Check shard key size on insert + Status status = ShardKeyPattern::checkShardKeySize(shardKey); + if (!status.isOK()) + return status; + } - // Track autosplit stats for sharded collections - _stats->chunkSizeDelta[chunk->getMin()] += doc.objsize(); + // Target the shard key or database primary + if (!shardKey.isEmpty()) { + return targetShardKey(shardKey, doc.objsize(), endpoint); } + else { - return Status::OK(); + if (!_primary) { + return Status(ErrorCodes::NamespaceNotFound, + str::stream() << "could not target insert in collection " + << getNS().ns() << "; no metadata found"); + } + + *endpoint = new ShardEndpoint(_primary->getName(), ChunkVersion::UNSHARDED()); + return Status::OK(); + } } namespace { @@ -177,8 +183,12 @@ namespace mongo { * { _id : { $lt : 30 } } => false * { foo : <anything> } => false */ - bool isExactIdQuery( const BSONObj& query ) { - return query.hasField( "_id" ) && getGtLtOp( query["_id"] ) == BSONObj::Equality; + bool isExactIdQuery(const BSONObj& query) { + static const ShardKeyPattern virtualIdShardKey(BSON("_id" << 1)); + StatusWith<BSONObj> status = virtualIdShardKey.extractShardKeyFromQuery(query); + if (!status.isOK()) + return false; + return !status.getValue()["_id"].eoo(); } } @@ -197,6 +207,9 @@ namespace mongo { // The rule is simple - If the update is replacement style (no '$set'), we target using the // update. If the update is replacement style, we target using the query. // + // If we have the exact shard key in either the query or replacement doc, we target using + // that extracted key. + // BSONObj query = updateDoc.getQuery(); BSONObj updateExpr = updateDoc.getUpdateExpr(); @@ -209,9 +222,7 @@ namespace mongo { << " has mixed $operator and non-$operator style fields" ); } - BSONObj targetedDoc = updateType == UpdateType_OpStyle ? query : updateExpr; - - bool exactShardKeyQuery = false; + BSONObj shardKey; if ( _manager ) { @@ -222,54 +233,76 @@ namespace mongo { // Non-multi updates must be targeted exactly by shard key *or* exact _id. // - exactShardKeyQuery = _manager->hasTargetableShardKey(targetedDoc); + // Get the shard key + if (updateType == UpdateType_OpStyle) { + + // Target using the query + StatusWith<BSONObj> status = + _manager->getShardKeyPattern().extractShardKeyFromQuery(query); - if ( updateDoc.getUpsert() && !exactShardKeyQuery ) { - return Status( ErrorCodes::ShardKeyNotFound, - stream() << "upsert " << updateDoc.toBSON() - << " does not contain shard key for pattern " - << _manager->getShardKey().key() ); + // Bad query + if (!status.isOK()) + return status.getStatus(); + + shardKey = status.getValue(); + } + else { + // Target using the replacement document + shardKey = _manager->getShardKeyPattern().extractShardKeyFromDoc(updateExpr); } - bool exactIdQuery = isExactIdQuery( updateDoc.getQuery() ); + // + // Extra sharded update validation + // - if ( !updateDoc.getMulti() && !exactShardKeyQuery && !exactIdQuery ) { - return Status( ErrorCodes::ShardKeyNotFound, - stream() << "update " << updateDoc.toBSON() - << " does not contain _id or shard key for pattern " - << _manager->getShardKey().key() ); + if (updateDoc.getUpsert()) { + + // Sharded upserts *always* need to be exactly targeted by shard key + if (shardKey.isEmpty()) { + return Status(ErrorCodes::ShardKeyNotFound, + stream() << "upsert " << updateDoc.toBSON() + << " does not contain shard key for pattern " + << _manager->getShardKeyPattern().toString()); + } + + // Also check shard key size on upsert + Status status = ShardKeyPattern::checkShardKeySize(shardKey); + if (!status.isOK()) + return status; } - // Track autosplit stats for sharded collections - // Note: this is only best effort accounting and is not accurate. - if ( exactShardKeyQuery ) { - ChunkPtr chunk = _manager->findChunkForDoc(targetedDoc); - _stats->chunkSizeDelta[chunk->getMin()] += - ( query.objsize() + updateExpr.objsize() ); + // Validate that single (non-multi) sharded updates are targeted by shard key or _id + if (!updateDoc.getMulti() && shardKey.isEmpty() + && !isExactIdQuery(updateDoc.getQuery())) { + return Status(ErrorCodes::ShardKeyNotFound, + stream() << "update " << updateDoc.toBSON() + << " does not contain _id or shard key for pattern " + << _manager->getShardKeyPattern().toString()); } } - Status result = Status::OK(); - if (exactShardKeyQuery) { + // Target the shard key, query, or replacement doc + if (!shardKey.isEmpty()) { // We can't rely on our query targeting to be exact ShardEndpoint* endpoint = NULL; - result = targetShardKey(targetedDoc, &endpoint); + Status result = targetShardKey(shardKey, + (query.objsize() + updateExpr.objsize()), + &endpoint); endpoints->push_back(endpoint); - - invariant(result.isOK()); - invariant(NULL != endpoint); + return result; + } + else if (updateType == UpdateType_OpStyle) { + return targetQuery(query, endpoints); } else { - result = targetQuery(targetedDoc, endpoints); + return targetDoc(updateExpr, endpoints); } - - return result; } Status ChunkManagerTargeter::targetDelete( const BatchedDeleteDocument& deleteDoc, vector<ShardEndpoint*>* endpoints ) const { - bool exactShardKeyQuery = false; + BSONObj shardKey; if ( _manager ) { @@ -279,32 +312,44 @@ namespace mongo { // Limit-1 deletes must be targeted exactly by shard key *or* exact _id // - exactShardKeyQuery = _manager->hasTargetableShardKey(deleteDoc.getQuery()); - bool exactIdQuery = isExactIdQuery( deleteDoc.getQuery() ); + // Get the shard key + StatusWith<BSONObj> status = + _manager->getShardKeyPattern().extractShardKeyFromQuery(deleteDoc.getQuery()); + + // Bad query + if (!status.isOK()) + return status.getStatus(); + + shardKey = status.getValue(); - if ( deleteDoc.getLimit() == 1 && !exactShardKeyQuery && !exactIdQuery ) { - return Status( ErrorCodes::ShardKeyNotFound, - stream() << "delete " << deleteDoc.toBSON() - << " does not contain _id or shard key for pattern " - << _manager->getShardKey().key() ); + // Validate that single (limit-1) sharded deletes are targeted by shard key or _id + if (deleteDoc.getLimit() == 1 && shardKey.isEmpty() + && !isExactIdQuery(deleteDoc.getQuery())) { + return Status(ErrorCodes::ShardKeyNotFound, + stream() << "delete " << deleteDoc.toBSON() + << " does not contain _id or shard key for pattern " + << _manager->getShardKeyPattern().toString()); } } - Status result = Status::OK(); - if (exactShardKeyQuery) { + // Target the shard key or delete query + if (!shardKey.isEmpty()) { // We can't rely on our query targeting to be exact ShardEndpoint* endpoint = NULL; - result = targetShardKey(deleteDoc.getQuery(), &endpoint); + Status result = targetShardKey(shardKey, 0, &endpoint); endpoints->push_back(endpoint); - - invariant(result.isOK()); - invariant(NULL != endpoint); + return result; } else { - result = targetQuery(deleteDoc.getQuery(), endpoints); + return targetQuery(deleteDoc.getQuery(), endpoints); } + } - return result; + Status ChunkManagerTargeter::targetDoc(const BSONObj& doc, + vector<ShardEndpoint*>* endpoints) const { + // NOTE: This is weird and fragile, but it's the way our language works right now - + // documents are either A) invalid or B) valid equality queries over themselves. + return targetQuery(doc, endpoints); } Status ChunkManagerTargeter::targetQuery( const BSONObj& query, @@ -339,13 +384,17 @@ namespace mongo { return Status::OK(); } - Status ChunkManagerTargeter::targetShardKey(const BSONObj& doc, + Status ChunkManagerTargeter::targetShardKey(const BSONObj& shardKey, + long long estDataSize, ShardEndpoint** endpoint) const { - invariant(NULL != _manager); - dassert(_manager->hasShardKey(doc)); - ChunkPtr chunk = _manager->findChunkForDoc(doc); + ChunkPtr chunk = _manager->findIntersectingChunk(shardKey); + + // Track autosplit stats for sharded collections + // Note: this is only best effort accounting and is not accurate. + if (estDataSize > 0) + _stats->chunkSizeDelta[chunk->getMin()] += estDataSize; Shard shard = chunk->getShard(); *endpoint = new ShardEndpoint(shard.getName(), diff --git a/src/mongo/s/chunk_manager_targeter.h b/src/mongo/s/chunk_manager_targeter.h index 3d3d4ea8995..cf3c6e3fa38 100644 --- a/src/mongo/s/chunk_manager_targeter.h +++ b/src/mongo/s/chunk_manager_targeter.h @@ -116,6 +116,13 @@ namespace mongo { Status refreshNow( RefreshType refreshType ); /** + * Returns a vector of ShardEndpoints where a document might need to be placed. + * + * Returns !OK with message if replacement could not be targeted + */ + Status targetDoc(const BSONObj& doc, vector<ShardEndpoint*>* endpoints) const; + + /** * Returns a vector of ShardEndpoints for a potentially multi-shard query. * * Returns !OK with message if query could not be targeted. @@ -124,8 +131,13 @@ namespace mongo { /** * Returns a ShardEndpoint for an exact shard key query. + * + * Also has the side effect of updating the chunks stats with an estimate of the amount of + * data targeted at this shard key. */ - Status targetShardKey( const BSONObj& doc, ShardEndpoint** endpoint ) const; + Status targetShardKey(const BSONObj& doc, + long long estDataSize, + ShardEndpoint** endpoint) const; NamespaceString _nss; diff --git a/src/mongo/s/chunk_manager_targeter_test.cpp b/src/mongo/s/chunk_manager_targeter_test.cpp index 02bebd82f8a..0a023ef15bf 100644 --- a/src/mongo/s/chunk_manager_targeter_test.cpp +++ b/src/mongo/s/chunk_manager_targeter_test.cpp @@ -32,6 +32,7 @@ #include "mongo/db/namespace_string.h" #include "mongo/db/query/interval.h" #include "mongo/s/chunk.h" +#include "mongo/s/shard_key_pattern.h" #include "mongo/unittest/unittest.h" #include "mongo/util/log.h" @@ -412,7 +413,8 @@ namespace { BoundList expectedList; expectedList.push_back(make_pair(fromjson("{a: 0}"), fromjson("{a: 0}"))); - BoundList list = KeyPattern::flattenBounds(fromjson("{a: 1}"), indexBounds); + ShardKeyPattern skeyPattern(fromjson("{a: 1}")); + BoundList list = skeyPattern.flattenBounds(indexBounds); CheckBoundList(list, expectedList); } @@ -427,7 +429,8 @@ namespace { BoundList expectedList; expectedList.push_back(make_pair(fromjson("{a: 2}"), fromjson("{a: 3}"))); - BoundList list = KeyPattern::flattenBounds(fromjson("{a: 1}"), indexBounds); + ShardKeyPattern skeyPattern(fromjson("{a: 1}")); + BoundList list = skeyPattern.flattenBounds(indexBounds); CheckBoundList(list, expectedList); } @@ -450,7 +453,8 @@ namespace { fromjson("{ a: 2, b: 2, c: 2 }"), fromjson("{ a: 3, b: 3, c: 3 }"))); - BoundList list = KeyPattern::flattenBounds(fromjson("{a: 1, b: 1, c: 1}"), indexBounds); + ShardKeyPattern skeyPattern(fromjson("{a: 1, b: 1, c: 1}")); + BoundList list = skeyPattern.flattenBounds(indexBounds); CheckBoundList(list, expectedList); } @@ -488,7 +492,8 @@ namespace { fromjson("{ a: 0, b: 6, c: 2 }"), fromjson("{ a: 0, b: 6, c: 3 }"))); - BoundList list = KeyPattern::flattenBounds(fromjson("{a: 1, b: 1, c: 1}"), indexBounds); + ShardKeyPattern skeyPattern(fromjson("{a: 1, b: 1, c: 1}")); + BoundList list = skeyPattern.flattenBounds(indexBounds); CheckBoundList(list, expectedList); } @@ -518,7 +523,9 @@ namespace { expectedList.push_back(make_pair( fromjson("{ a: 0, b: 4, c: 2 }"), fromjson("{ a: 1, b: 6, c: 3 }"))); - BoundList list = KeyPattern::flattenBounds(fromjson("{a: 1, b: 1, c: 1}"), indexBounds); + + ShardKeyPattern skeyPattern(fromjson("{a: 1, b: 1, c: 1}")); + BoundList list = skeyPattern.flattenBounds(indexBounds); CheckBoundList(list, expectedList); } diff --git a/src/mongo/s/commands/cluster_merge_chunks_cmd.cpp b/src/mongo/s/commands/cluster_merge_chunks_cmd.cpp index 2e79c431563..57bca2b1fb5 100644 --- a/src/mongo/s/commands/cluster_merge_chunks_cmd.cpp +++ b/src/mongo/s/commands/cluster_merge_chunks_cmd.cpp @@ -79,33 +79,15 @@ namespace mongo { static BSONField<string> shardNameField; static BSONField<string> configField; - // TODO: Same limitations as other mongos metadata commands, sometimes we'll be stale here - // and fail. Need to better integrate targeting with commands. - ShardPtr guessMergeShard( const NamespaceString& nss, const BSONObj& minKey ) { - - DBConfigPtr config = grid.getDBConfig( nss.ns() ); - if ( !config->isSharded( nss ) ) { - config->reload(); - if ( !config->isSharded( nss ) ) { - return ShardPtr(); - } - } - - ChunkManagerPtr manager = config->getChunkManager( nss ); - if ( !manager ) return ShardPtr(); - ChunkPtr chunk = manager->findChunkForDoc( minKey ); - if ( !chunk ) return ShardPtr(); - return ShardPtr( new Shard( chunk->getShard() ) ); - } - // TODO: This refresh logic should be consolidated - void refreshChunkCache( const NamespaceString& nss ) { + ChunkManagerPtr refreshChunkCache(const NamespaceString& nss) { - DBConfigPtr config = grid.getDBConfig( nss.ns() ); - if ( !config->isSharded( nss ) ) return; + DBConfigPtr config = grid.getDBConfig(nss.ns()); + if (!config->isSharded(nss)) + return ChunkManagerPtr(); // Refreshes chunks as a side-effect - config->getChunkManagerIfExists( nss, true ); + return config->getChunkManagerIfExists(nss, true); } @@ -152,28 +134,40 @@ namespace mongo { } // This refreshes the chunk metadata if stale. - refreshChunkCache( NamespaceString( ns ) ); + ChunkManagerPtr manager = refreshChunkCache(NamespaceString(ns)); - ShardPtr mergeShard = guessMergeShard( NamespaceString( ns ), minKey ); + if (!manager) { + errmsg = (string) "collection " + ns + " is not sharded, cannot merge chunks"; + return false; + } - if ( !mergeShard ) { - errmsg = (string)"could not find shard for merge range starting at " - + minKey.toString(); + if (!manager->getShardKeyPattern().isShardKey(minKey) + || !manager->getShardKeyPattern().isShardKey(maxKey)) { + errmsg = stream() << "shard key bounds " << "[" << minKey << "," << maxKey << ")" + << " are not valid for shard key pattern " + << manager->getShardKeyPattern().toBSON(); return false; } + minKey = manager->getShardKeyPattern().normalizeShardKey(minKey); + maxKey = manager->getShardKeyPattern().normalizeShardKey(maxKey); + + ChunkPtr firstChunk = manager->findIntersectingChunk(minKey); + verify(firstChunk); + Shard shard = firstChunk->getShard(); + BSONObjBuilder remoteCmdObjB; remoteCmdObjB.append( cmdObj[ ClusterMergeChunksCommand::nsField() ] ); remoteCmdObjB.append( cmdObj[ ClusterMergeChunksCommand::boundsField() ] ); remoteCmdObjB.append( ClusterMergeChunksCommand::configField(), configServer.getPrimary().getAddress().toString() ); remoteCmdObjB.append( ClusterMergeChunksCommand::shardNameField(), - mergeShard->getName() ); + shard.getName() ); BSONObj remoteResult; // Throws, but handled at level above. Don't want to rewrap to preserve exception // formatting. - ScopedDbConnection conn( mergeShard->getAddress() ); + ScopedDbConnection conn( shard.getAddress() ); bool ok = conn->runCommand( "admin", remoteCmdObjB.obj(), remoteResult ); conn.done(); diff --git a/src/mongo/s/commands_admin.cpp b/src/mongo/s/commands_admin.cpp index 72ee9a02ddb..03e1f706f55 100644 --- a/src/mongo/s/commands_admin.cpp +++ b/src/mongo/s/commands_admin.cpp @@ -494,35 +494,24 @@ namespace mongo { return false; } - bool isHashedShardKey = // br - ( IndexNames::findPluginName( proposedKey ) == IndexNames::HASHED ); - - // Currently the allowable shard keys are either - // i) a hashed single field, e.g. { a : "hashed" }, or - // ii) a compound list of ascending fields, e.g. { a : 1 , b : 1 } - if ( isHashedShardKey ) { - // case i) - if ( proposedKey.nFields() > 1 ) { - errmsg = "hashed shard keys currently only support single field keys"; - return false; - } - if ( cmdObj["unique"].trueValue() ) { - // it's possible to ensure uniqueness on the hashed field by - // declaring an additional (non-hashed) unique index on the field, - // but the hashed shard key itself should not be declared unique - errmsg = "hashed shard keys cannot be declared unique."; - return false; - } - } else { - // case ii) - BSONForEach(e, proposedKey) { - if (!e.isNumber() || e.number() != 1.0) { - errmsg = str::stream() << "Unsupported shard key pattern. Pattern must" - << " either be a single hashed field, or a list" - << " of ascending fields."; - return false; - } - } + ShardKeyPattern proposedKeyPattern(proposedKey); + if (!proposedKeyPattern.isValid()) { + errmsg = str::stream() << "Unsupported shard key pattern. Pattern must" + << " either be a single hashed field, or a list" + << " of ascending fields."; + return false; + } + + bool isHashedShardKey = proposedKeyPattern.isHashedPattern(); + + if (isHashedShardKey && cmdObj["unique"].trueValue()) { + dassert(proposedKey.nFields() == 1); + + // it's possible to ensure uniqueness on the hashed field by + // declaring an additional (non-hashed) unique index on the field, + // but the hashed shard key itself should not be declared unique + errmsg = "hashed shard keys cannot be declared unique."; + return false; } if ( ns.find( ".system." ) != string::npos ) { @@ -756,8 +745,15 @@ namespace mongo { LOG(0) << "CMD: shardcollection: " << cmdObj << endl; - audit::logShardCollection(ClientBasic::getCurrent(), ns, proposedKey, careAboutUnique); - config->shardCollection( ns , proposedKey , careAboutUnique , &initSplits ); + audit::logShardCollection(ClientBasic::getCurrent(), + ns, + proposedKey, + careAboutUnique); + + config->shardCollection(ns, + proposedShardKey, + careAboutUnique, + &initSplits); result << "collectionsharded" << ns; @@ -802,7 +798,7 @@ namespace mongo { vector<BSONObj> subSplits; for ( unsigned i = 0 ; i <= allSplits.size(); i++){ if ( i == allSplits.size() || - ! currentChunk->containsPoint( allSplits[i] ) ) { + ! currentChunk->containsKey( allSplits[i] ) ) { if ( ! subSplits.empty() ){ Status status = currentChunk->multiSplit(subSplits, NULL); if ( !status.isOK() ){ @@ -986,20 +982,74 @@ namespace mongo { ChunkPtr chunk; if (!find.isEmpty()) { - chunk = info->findChunkForDoc(find); + + StatusWith<BSONObj> status = + info->getShardKeyPattern().extractShardKeyFromQuery(find); + + // Bad query + if (!status.isOK()) + return appendCommandStatus(result, status.getStatus()); + + BSONObj shardKey = status.getValue(); + + if (shardKey.isEmpty()) { + errmsg = stream() << "no shard key found in chunk query " << find; + return false; + } + + chunk = info->findIntersectingChunk(shardKey); + verify(chunk.get()); } else if (!bounds.isEmpty()) { - chunk = info->findIntersectingChunk(bounds[0].Obj()); + + if (!info->getShardKeyPattern().isShardKey(bounds[0].Obj()) + || !info->getShardKeyPattern().isShardKey(bounds[1].Obj())) { + errmsg = stream() << "shard key bounds " << "[" << bounds[0].Obj() << "," + << bounds[1].Obj() << ")" + << " are not valid for shard key pattern " + << info->getShardKeyPattern().toBSON(); + return false; + } + + BSONObj minKey = info->getShardKeyPattern().normalizeShardKey(bounds[0].Obj()); + BSONObj maxKey = info->getShardKeyPattern().normalizeShardKey(bounds[1].Obj()); + + chunk = info->findIntersectingChunk(minKey); verify(chunk.get()); - if (chunk->getMin() != bounds[0].Obj() || - chunk->getMax() != bounds[1].Obj()) { - errmsg = "no chunk found from the given upper and lower bounds"; + if (chunk->getMin().woCompare(minKey) != 0 + || chunk->getMax().woCompare(maxKey) != 0) { + errmsg = stream() << "no chunk found with the shard key bounds " << "[" + << minKey << "," << maxKey << ")"; return false; } } else { // middle + + if (!info->getShardKeyPattern().isShardKey(middle)) { + errmsg = stream() << "new split key " << middle + << " is not valid for shard key pattern " + << info->getShardKeyPattern().toBSON(); + return false; + } + + middle = info->getShardKeyPattern().normalizeShardKey(middle); + + // Check shard key size when manually provided + Status status = ShardKeyPattern::checkShardKeySize(middle); + if (!status.isOK()) + return appendCommandStatus(result, status); + chunk = info->findIntersectingChunk(middle); + verify(chunk.get()); + + if (chunk->getMin().woCompare(middle) == 0 + || chunk->getMax().woCompare(middle) == 0) { + errmsg = stream() << "new split key " << middle + << " is a boundary key of existing chunk " << "[" + << chunk->getMin() << "," << chunk->getMax() << ")"; + return false; + } } verify(chunk.get()); @@ -1019,17 +1069,6 @@ namespace mongo { } } else { - // sanity check if the key provided is a valid split point - if ( ( middle == chunk->getMin() ) || ( middle == chunk->getMax() ) ) { - errmsg = "cannot split on initial or final chunk's key"; - return false; - } - - if (!fieldsMatch(middle, info->getShardKey().key())){ - errmsg = "middle has different fields (or different order) than shard key"; - return false; - } - vector<BSONObj> splitPoints; splitPoints.push_back( middle ); Status status = chunk->multiSplit(splitPoints, NULL); @@ -1116,17 +1155,53 @@ namespace mongo { // This refreshes the chunk metadata if stale. ChunkManagerPtr info = config->getChunkManager( ns, true ); - ChunkPtr c = find.isEmpty() ? - info->findIntersectingChunk( bounds[0].Obj() ) : - info->findChunkForDoc( find ); + ChunkPtr chunk; - if ( ! bounds.isEmpty() && ( c->getMin() != bounds[0].Obj() || - c->getMax() != bounds[1].Obj() ) ) { - errmsg = "no chunk found with those upper and lower bounds"; - return false; + if (!find.isEmpty()) { + + StatusWith<BSONObj> status = + info->getShardKeyPattern().extractShardKeyFromQuery(find); + + // Bad query + if (!status.isOK()) + return appendCommandStatus(result, status.getStatus()); + + BSONObj shardKey = status.getValue(); + + if (shardKey.isEmpty()) { + errmsg = stream() << "no shard key found in chunk query " << find; + return false; + } + + chunk = info->findIntersectingChunk(shardKey); + verify(chunk.get()); + } + else { // bounds + + if (!info->getShardKeyPattern().isShardKey(bounds[0].Obj()) + || !info->getShardKeyPattern().isShardKey(bounds[1].Obj())) { + errmsg = stream() << "shard key bounds " << "[" << bounds[0].Obj() << "," + << bounds[1].Obj() << ")" + << " are not valid for shard key pattern " + << info->getShardKeyPattern().toBSON(); + return false; + } + + BSONObj minKey = info->getShardKeyPattern().normalizeShardKey(bounds[0].Obj()); + BSONObj maxKey = info->getShardKeyPattern().normalizeShardKey(bounds[1].Obj()); + + chunk = info->findIntersectingChunk(minKey); + verify(chunk.get()); + + if (chunk->getMin().woCompare(minKey) != 0 + || chunk->getMax().woCompare(maxKey) != 0) { + errmsg = stream() << "no chunk found with the shard key bounds " << "[" + << minKey << "," << maxKey << ")"; + return false; + } } - const Shard& from = c->getShard(); + const Shard& from = chunk->getShard(); if ( from == to ) { errmsg = "that chunk is already on that shard"; @@ -1156,12 +1231,12 @@ namespace mongo { } BSONObj res; - if (!c->moveAndCommit(to, - maxChunkSizeBytes, - writeConcern.get(), - cmdObj["_waitForDelete"].trueValue(), - maxTimeMS.getValue(), - res)) { + if (!chunk->moveAndCommit(to, + maxChunkSizeBytes, + writeConcern.get(), + cmdObj["_waitForDelete"].trueValue(), + maxTimeMS.getValue(), + res)) { errmsg = "move failed"; result.append( "cause" , res ); if ( !res["code"].eoo() ) { diff --git a/src/mongo/s/commands_public.cpp b/src/mongo/s/commands_public.cpp index a4602543f07..859cd1ca1b0 100644 --- a/src/mongo/s/commands_public.cpp +++ b/src/mongo/s/commands_public.cpp @@ -1182,9 +1182,19 @@ namespace mongo { massert( 13002 , "shard internal error chunk manager should never be null" , cm ); BSONObj filter = cmdObj.getObjectField("query"); - uassert(13343, "query for sharded findAndModify must have shardkey", cm->hasShardKey(filter)); - ChunkPtr chunk = cm->findChunkForDoc(filter); + StatusWith<BSONObj> status = + cm->getShardKeyPattern().extractShardKeyFromQuery(filter); + + // Bad query + if (!status.isOK()) + return appendCommandStatus(result, status.getStatus()); + + BSONObj shardKey = status.getValue(); + uassert(13343, "query for sharded findAndModify must have shardkey", + !shardKey.isEmpty()); + + ChunkPtr chunk = cm->findIntersectingChunk(shardKey); ShardConnection conn( chunk->getShard() , fullns ); BSONObj res; bool ok = conn->runCommand( conf->getName() , cmdObj , res ); @@ -1236,11 +1246,14 @@ namespace mongo { BSONObj keyPattern = cmdObj.getObjectField( "keyPattern" ); uassert( 13408, "keyPattern must equal shard key", - cm->getShardKey().key() == keyPattern ); + cm->getShardKeyPattern().toBSON() == keyPattern ); uassert( 13405, str::stream() << "min value " << min << " does not have shard key", - cm->hasShardKey(min) ); + cm->getShardKeyPattern().isShardKey(min) ); uassert( 13406, str::stream() << "max value " << max << " does not have shard key", - cm->hasShardKey(max) ); + cm->getShardKeyPattern().isShardKey(max) ); + + min = cm->getShardKeyPattern().normalizeShardKey(min); + max = cm->getShardKeyPattern().normalizeShardKey(max); // yes these are doubles... double size = 0; @@ -1478,7 +1491,7 @@ namespace mongo { ChunkManagerPtr cm = conf->getChunkManager( fullns ); massert( 13091 , "how could chunk manager be null!" , cm ); - if(cm->getShardKey().key() == BSON("files_id" << 1)) { + if(cm->getShardKeyPattern().toBSON() == BSON("files_id" << 1)) { BSONObj finder = BSON("files_id" << cmdObj.firstElement()); vector<Strategy::CommandResult> results; @@ -1489,7 +1502,7 @@ namespace mongo { result.appendElements(res); return res["ok"].trueValue(); } - else if (cm->getShardKey().key() == BSON("files_id" << 1 << "n" << 1)) { + else if (cm->getShardKeyPattern().toBSON() == BSON("files_id" << 1 << "n" << 1)) { int n = 0; BSONObj lastResult; @@ -2025,11 +2038,12 @@ namespace mongo { confOut->getAllShards( shardSet ); vector<Shard> outShards( shardSet.begin() , shardSet.end() ); - confOut->shardCollection( finalColLong , - sortKey , - true , - &sortedSplitPts , - &outShards ); + ShardKeyPattern sortKeyPattern(sortKey); + confOut->shardCollection(finalColLong, + sortKeyPattern, + true, + &sortedSplitPts, + &outShards); } diff --git a/src/mongo/s/config.cpp b/src/mongo/s/config.cpp index 5e3e12c6255..078a5de0bb1 100644 --- a/src/mongo/s/config.cpp +++ b/src/mongo/s/config.cpp @@ -90,7 +90,7 @@ namespace mongo { if( manager->numChunks() != 0 ){ _cm = ChunkManagerPtr( manager ); - _key = manager->getShardKey().key().getOwned(); + _key = manager->getShardKeyPattern().toBSON().getOwned(); _unqiue = manager->isUnique(); _dirty = true; _dropped = false; @@ -191,7 +191,12 @@ namespace mongo { /** * */ - ChunkManagerPtr DBConfig::shardCollection( const string& ns , ShardKeyPattern fieldsAndOrder , bool unique , vector<BSONObj>* initPoints, vector<Shard>* initShards ) { + ChunkManagerPtr DBConfig::shardCollection(const string& ns, + const ShardKeyPattern& fieldsAndOrder, + bool unique, + vector<BSONObj>* initPoints, + vector<Shard>* initShards) { + uassert( 8042 , "db doesn't have sharding enabled" , _shardingEnabled ); uassert( 13648 , str::stream() << "can't shard collection because not all config servers are up" , configServer.allUp() ); @@ -207,7 +212,7 @@ namespace mongo { // Record start in changelog BSONObjBuilder collectionDetail; - collectionDetail.append("shardKey", fieldsAndOrder.key()); + collectionDetail.append("shardKey", fieldsAndOrder.toBSON()); collectionDetail.append("collection", ns); collectionDetail.append("primary", getPrimary().toString()); BSONArray a; @@ -436,7 +441,7 @@ namespace mongo { } temp.reset(new ChunkManager(oldManager->getns(), - oldManager->getShardKey(), + oldManager->getShardKeyPattern(), oldManager->isUnique())); temp->loadExistingRanges(configServer.getPrimary().getConnString(), oldManager.get()); diff --git a/src/mongo/s/config.h b/src/mongo/s/config.h index 6dba0f3b9aa..fceb3f6b219 100644 --- a/src/mongo/s/config.h +++ b/src/mongo/s/config.h @@ -38,7 +38,7 @@ #include "mongo/client/dbclient_rs.h" #include "mongo/s/chunk.h" #include "mongo/s/shard.h" -#include "mongo/s/shardkey.h" +#include "mongo/s/shard_key_pattern.h" namespace mongo { @@ -134,11 +134,11 @@ namespace mongo { * WARNING: It's not safe to place initial chunks onto non-primary shards using this method. * The initShards parameter allows legacy behavior expected by map-reduce. */ - ChunkManagerPtr shardCollection( const std::string& ns , - ShardKeyPattern fieldsAndOrder , - bool unique , - std::vector<BSONObj>* initPoints = 0, - std::vector<Shard>* initShards = 0 ); + ChunkManagerPtr shardCollection(const std::string& ns, + const ShardKeyPattern& fieldsAndOrder, + bool unique, + std::vector<BSONObj>* initPoints = 0, + std::vector<Shard>* initShards = 0); /** @return true if there was sharding info to remove diff --git a/src/mongo/s/d_migrate.cpp b/src/mongo/s/d_migrate.cpp index 4465879479c..6c50878f183 100644 --- a/src/mongo/s/d_migrate.cpp +++ b/src/mongo/s/d_migrate.cpp @@ -227,7 +227,7 @@ namespace mongo { const BSONObj& max , const BSONObj& shardKeyPattern ) { ShardKeyPattern shardKey( shardKeyPattern ); - BSONObj k = shardKey.extractKeyFromQueryOrDoc( obj ); + BSONObj k = shardKey.extractShardKeyFromDoc( obj ); return k.woCompare( min ) >= 0 && k.woCompare( max ) < 0; } diff --git a/src/mongo/s/d_split.cpp b/src/mongo/s/d_split.cpp index aa9e1ca3958..f879cc74202 100644 --- a/src/mongo/s/d_split.cpp +++ b/src/mongo/s/d_split.cpp @@ -699,8 +699,11 @@ namespace mongo { return false; } - if (!isShardKeySizeValid(endKey, &errmsg)) { - warning() << errmsg << endl; + // Make sure splits don't create too-big shard keys + Status status = ShardKeyPattern::checkShardKeySize(endKey); + if (!status.isOK()) { + errmsg = status.reason(); + warning() << errmsg; return false; } diff --git a/src/mongo/s/shard_key_pattern.cpp b/src/mongo/s/shard_key_pattern.cpp index d1dbf9cd1ab..35404e9ea0d 100644 --- a/src/mongo/s/shard_key_pattern.cpp +++ b/src/mongo/s/shard_key_pattern.cpp @@ -28,34 +28,411 @@ #include "mongo/s/shard_key_pattern.h" -#include "mongo/db/keypattern.h" +#include <boost/shared_ptr.hpp> +#include <vector> + +#include "mongo/db/field_ref.h" +#include "mongo/db/field_ref_set.h" +#include "mongo/db/hasher.h" +#include "mongo/db/index_names.h" +#include "mongo/db/ops/path_support.h" +#include "mongo/db/query/canonical_query.h" #include "mongo/util/mongoutils/str.h" namespace mongo { - bool isUniqueIndexCompatible(const BSONObj& shardKeyPattern, - const BSONObj& uIndexKeyPattern) { + using boost::shared_ptr; + using std::auto_ptr; + using std::pair; + using std::make_pair; + using std::vector; + using pathsupport::EqualityMatches; + using mongoutils::str::stream; - if ( !uIndexKeyPattern.isEmpty() - && string( "_id" ) == uIndexKeyPattern.firstElementFieldName() ) { - return true; - } + const int ShardKeyPattern::kMaxShardKeySizeBytes = 512; + const unsigned int ShardKeyPattern::kMaxFlattenedInCombinations = 4000000; + + Status ShardKeyPattern::checkShardKeySize(const BSONObj& shardKey) { + if (shardKey.objsize() <= kMaxShardKeySizeBytes) + return Status::OK(); - return shardKeyPattern.isFieldNamePrefixOf( uIndexKeyPattern ); + return Status(ErrorCodes::ShardKeyTooBig, + stream() << "shard keys must be less than " << kMaxShardKeySizeBytes + << " bytes, but key " << shardKey << " is " << shardKey.objsize() + << " bytes"); } - bool isShardKeySizeValid(const BSONObj& shardKey, string* errMsg) { - string dummy; - if (errMsg == NULL) { - errMsg = &dummy; + static bool isHashedPatternEl(const BSONElement& el) { + return el.type() == String && el.String() == IndexNames::HASHED; + } + + /** + * Currently the allowable shard keys are either + * i) a hashed single field, e.g. { a : "hashed" }, or + * ii) a compound list of ascending, potentially-nested field paths, e.g. { a : 1 , b.c : 1 } + */ + static vector<FieldRef*> parseShardKeyPattern(const BSONObj& keyPattern) { + + OwnedPointerVector<FieldRef> parsedPaths; + static const vector<FieldRef*> empty; + + BSONObjIterator patternIt(keyPattern); + while (patternIt.more()) { + + BSONElement patternEl = patternIt.next(); + parsedPaths.push_back(new FieldRef(patternEl.fieldNameStringData())); + const FieldRef& patternPath = *parsedPaths.back(); + + // Empty path + if (patternPath.numParts() == 0) + return empty; + + // Extra "." in path? + if (patternPath.dottedField() != patternEl.fieldNameStringData()) + return empty; + + // Empty parts of the path, ".."? + for (size_t i = 0; i < patternPath.numParts(); ++i) { + if (patternPath.getPart(i).size() == 0) + return empty; + } + + // Numeric and ascending (1.0), or "hashed" and single field + if (!patternEl.isNumber()) { + if (keyPattern.nFields() != 1 || !isHashedPatternEl(patternEl)) + return empty; + } + else if (patternEl.numberInt() != 1) { + return empty; + } } - if (shardKey.objsize() > kMaxShardKeySize) { - *errMsg = str::stream() << "shard key is larger than " << kMaxShardKeySize << " bytes: " - << shardKey.objsize(); + return parsedPaths.release(); + } + + ShardKeyPattern::ShardKeyPattern(const BSONObj& keyPattern) + : _keyPatternPaths(parseShardKeyPattern(keyPattern)), + _keyPattern(_keyPatternPaths.empty() ? BSONObj() : keyPattern) { + } + + ShardKeyPattern::ShardKeyPattern(const KeyPattern& keyPattern) + : _keyPatternPaths(parseShardKeyPattern(keyPattern.toBSON())), + _keyPattern(_keyPatternPaths.empty() ? KeyPattern(BSONObj()) : keyPattern) { + } + + bool ShardKeyPattern::isValid() const { + return !_keyPattern.toBSON().isEmpty(); + } + + bool ShardKeyPattern::isHashedPattern() const { + return isHashedPatternEl(_keyPattern.toBSON().firstElement()); + } + + const KeyPattern& ShardKeyPattern::getKeyPattern() const { + return _keyPattern; + } + + const BSONObj& ShardKeyPattern::toBSON() const { + return _keyPattern.toBSON(); + } + + string ShardKeyPattern::toString() const { + return toBSON().toString(); + } + + static bool isShardKeyElement(const BSONElement& element, bool allowRegex) { + // TODO: Disallow regex all the time + if (element.eoo() || element.type() == Array || (!allowRegex && element.type() == RegEx) + || (element.type() == Object && !element.embeddedObject().okForStorage())) + return false; + return true; + } + + bool ShardKeyPattern::isShardKey(const BSONObj& shardKey) const { + + // Shard keys are always of the form: { 'nested.path' : value, 'nested.path2' : value } + + if (!isValid()) return false; + + BSONObjIterator patternIt(_keyPattern.toBSON()); + while (patternIt.more()) { + + BSONElement patternEl = patternIt.next(); + + BSONElement keyEl = shardKey[patternEl.fieldNameStringData()]; + if (!isShardKeyElement(keyEl, true)) + return false; } return true; } + + BSONObj ShardKeyPattern::normalizeShardKey(const BSONObj& shardKey) const { + + // Shard keys are always of the form: { 'nested.path' : value, 'nested.path2' : value } + // and in the same order as the key pattern + + if (!isValid()) + return BSONObj(); + + // We want to return an empty key if users pass us something that's not a shard key + if (shardKey.nFields() > _keyPattern.toBSON().nFields()) + return BSONObj(); + + BSONObjBuilder keyBuilder; + BSONObjIterator patternIt(_keyPattern.toBSON()); + while (patternIt.more()) { + + BSONElement patternEl = patternIt.next(); + + BSONElement keyEl = shardKey[patternEl.fieldNameStringData()]; + + if (!isShardKeyElement(keyEl, true)) + return BSONObj(); + + keyBuilder.appendAs(keyEl, patternEl.fieldName()); + } + + dassert(isShardKey(keyBuilder.asTempObj())); + return keyBuilder.obj(); + } + + static BSONElement extractKeyElementFromMatchable(const MatchableDocument& matchable, + const StringData& pathStr) { + ElementPath path; + path.init(pathStr); + path.setTraverseNonleafArrays(false); + path.setTraverseLeafArray(false); + + MatchableDocument::IteratorHolder matchIt(&matchable, &path); + if (!matchIt->more()) + return BSONElement(); + + BSONElement matchEl = matchIt->next().element(); + // We shouldn't have more than one element - we don't expand arrays + dassert(!matchIt->more()); + + return matchEl; + } + + BSONObj // + ShardKeyPattern::extractShardKeyFromMatchable(const MatchableDocument& matchable) const { + + if (!isValid()) + return BSONObj(); + + BSONObjBuilder keyBuilder; + + BSONObjIterator patternIt(_keyPattern.toBSON()); + while (patternIt.more()) { + + BSONElement patternEl = patternIt.next(); + BSONElement matchEl = extractKeyElementFromMatchable(matchable, + patternEl.fieldNameStringData()); + + if (!isShardKeyElement(matchEl, true)) + return BSONObj(); + + if (isHashedPatternEl(patternEl)) { + keyBuilder.append(patternEl.fieldName(), + BSONElementHasher::hash64(matchEl, + BSONElementHasher::DEFAULT_HASH_SEED)); + } + else { + // NOTE: The matched element may *not* have the same field name as the path - + // index keys don't contain field names, for example + keyBuilder.appendAs(matchEl, patternEl.fieldName()); + } + } + + dassert(isShardKey(keyBuilder.asTempObj())); + return keyBuilder.obj(); + } + + BSONObj ShardKeyPattern::extractShardKeyFromDoc(const BSONObj& doc) const { + BSONMatchableDocument matchable(doc); + return extractShardKeyFromMatchable(matchable); + } + + static BSONElement findEqualityElement(const EqualityMatches& equalities, + const FieldRef& path) { + + int parentPathPart; + const BSONElement& parentEl = pathsupport::findParentEqualityElement(equalities, + path, + &parentPathPart); + + if (parentPathPart == static_cast<int>(path.numParts())) + return parentEl; + + if (parentEl.type() != Object) + return BSONElement(); + + StringData suffixStr = path.dottedSubstring(parentPathPart, path.numParts()); + BSONMatchableDocument matchable(parentEl.Obj()); + return extractKeyElementFromMatchable(matchable, suffixStr); + } + + StatusWith<BSONObj> ShardKeyPattern::extractShardKeyFromQuery(const BSONObj& basicQuery) const { + + if (!isValid()) + return StatusWith<BSONObj>(BSONObj()); + + // Extract equalities from query + CanonicalQuery* rawQuery; + Status queryStatus = + CanonicalQuery::canonicalize("", basicQuery, &rawQuery, WhereCallbackNoop()); + if (!queryStatus.isOK()) + return StatusWith<BSONObj>(queryStatus); + scoped_ptr<CanonicalQuery> query(rawQuery); + + EqualityMatches equalities; + // TODO: Build the path set initially? + FieldRefSet keyPatternPathSet(_keyPatternPaths.vector()); + // We only care about extracting the full key pattern paths - if they don't exist (or are + // conflicting), we don't contain the shard key. + Status eqStatus = pathsupport::extractFullEqualityMatches(*query->root(), + keyPatternPathSet, + &equalities); + // NOTE: Failure to extract equality matches just means we return no shard key - it's not + // an error we propagate + if (!eqStatus.isOK()) + return StatusWith<BSONObj>(BSONObj()); + + // Extract key from equalities + // NOTE: The method below is equivalent to constructing a BSONObj and running + // extractShardKeyFromMatchable, but doesn't require creating the doc. + + BSONObjBuilder keyBuilder; + // Iterate the parsed paths to avoid re-parsing + for (OwnedPointerVector<FieldRef>::const_iterator it = _keyPatternPaths.begin(); + it != _keyPatternPaths.end(); ++it) { + + const FieldRef& patternPath = **it; + BSONElement equalEl = findEqualityElement(equalities, patternPath); + + if (!isShardKeyElement(equalEl, false)) + return StatusWith<BSONObj>(BSONObj()); + + if (isHashedPattern()) { + keyBuilder.append(patternPath.dottedField(), + BSONElementHasher::hash64(equalEl, + BSONElementHasher::DEFAULT_HASH_SEED)); + } + else { + // NOTE: The equal element may *not* have the same field name as the path - + // nested $and, $eq, for example + keyBuilder.appendAs(equalEl, patternPath.dottedField()); + } + } + + dassert(isShardKey(keyBuilder.asTempObj())); + return StatusWith<BSONObj>(keyBuilder.obj()); + } + + bool ShardKeyPattern::isUniqueIndexCompatible(const BSONObj& uniqueIndexPattern) const { + + dassert(!KeyPattern::isHashedKeyPattern(uniqueIndexPattern)); + + if (!uniqueIndexPattern.isEmpty() + && string("_id") == uniqueIndexPattern.firstElementFieldName()) { + return true; + } + + return _keyPattern.toBSON().isFieldNamePrefixOf(uniqueIndexPattern); + } + + BoundList ShardKeyPattern::flattenBounds(const IndexBounds& indexBounds) const { + + invariant(indexBounds.fields.size() == (size_t)_keyPattern.toBSON().nFields()); + + // If any field is unsatisfied, return empty bound list. + for (vector<OrderedIntervalList>::const_iterator it = indexBounds.fields.begin(); + it != indexBounds.fields.end(); it++) { + if (it->intervals.size() == 0) { + return BoundList(); + } + } + // To construct our bounds we will generate intervals based on bounds for + // the first field, then compound intervals based on constraints for the first + // 2 fields, then compound intervals for the first 3 fields, etc. + // As we loop through the fields, we start generating new intervals that will later + // get extended in another iteration of the loop. We define these partially constructed + // intervals using pairs of BSONObjBuilders (shared_ptrs, since after one iteration of the + // loop they still must exist outside their scope). + typedef vector<pair<shared_ptr<BSONObjBuilder>, shared_ptr<BSONObjBuilder> > > BoundBuilders; + + BoundBuilders builders; + builders.push_back(make_pair(shared_ptr<BSONObjBuilder>(new BSONObjBuilder()), + shared_ptr<BSONObjBuilder>(new BSONObjBuilder()))); + BSONObjIterator keyIter(_keyPattern.toBSON()); + // until equalityOnly is false, we are just dealing with equality (no range or $in queries). + bool equalityOnly = true; + + for (size_t i = 0; i < indexBounds.fields.size(); i++) { + BSONElement e = keyIter.next(); + + StringData fieldName = e.fieldNameStringData(); + + // get the relevant intervals for this field, but we may have to transform the + // list of what's relevant according to the expression for this field + const OrderedIntervalList& oil = indexBounds.fields[i]; + const vector<Interval>& intervals = oil.intervals; + + if (equalityOnly) { + if (intervals.size() == 1 && intervals.front().isPoint()) { + // this field is only a single point-interval + BoundBuilders::const_iterator j; + for (j = builders.begin(); j != builders.end(); ++j) { + j->first->appendAs(intervals.front().start, fieldName); + j->second->appendAs(intervals.front().end, fieldName); + } + } + else { + // This clause is the first to generate more than a single point. + // We only execute this clause once. After that, we simplify the bound + // extensions to prevent combinatorial explosion. + equalityOnly = false; + + BoundBuilders newBuilders; + + for (BoundBuilders::const_iterator it = builders.begin(); it != builders.end(); + ++it) { + BSONObj first = it->first->obj(); + BSONObj second = it->second->obj(); + + for (vector<Interval>::const_iterator interval = intervals.begin(); + interval != intervals.end(); ++interval) { + uassert( 17439, + "combinatorial limit of $in partitioning of results exceeded" , + newBuilders.size() < kMaxFlattenedInCombinations ); + newBuilders.push_back( // + make_pair(shared_ptr<BSONObjBuilder>(new BSONObjBuilder()), + shared_ptr<BSONObjBuilder>(new BSONObjBuilder()))); + newBuilders.back().first->appendElements(first); + newBuilders.back().second->appendElements(second); + newBuilders.back().first->appendAs(interval->start, fieldName); + newBuilders.back().second->appendAs(interval->end, fieldName); + } + } + builders = newBuilders; + } + } + else { + // if we've already generated a range or multiple point-intervals + // just extend what we've generated with min/max bounds for this field + BoundBuilders::const_iterator j; + for (j = builders.begin(); j != builders.end(); ++j) { + j->first->appendAs(intervals.front().start, fieldName); + j->second->appendAs(intervals.back().end, fieldName); + } + } + } + BoundList ret; + for (BoundBuilders::const_iterator i = builders.begin(); i != builders.end(); ++i) + ret.push_back(make_pair(i->first->obj(), i->second->obj())); + return ret; + } + } diff --git a/src/mongo/s/shard_key_pattern.h b/src/mongo/s/shard_key_pattern.h index 3f2c034a5e9..d6cc67f56df 100644 --- a/src/mongo/s/shard_key_pattern.h +++ b/src/mongo/s/shard_key_pattern.h @@ -28,20 +28,202 @@ #pragma once +#include "mongo/base/owned_pointer_vector.h" +#include "mongo/base/status.h" +#include "mongo/base/status_with.h" #include "mongo/db/jsobj.h" +#include "mongo/db/keypattern.h" +#include "mongo/db/matcher/matchable.h" +#include "mongo/db/query/index_bounds.h" namespace mongo { + class FieldRef; + + /** + * Helper struct when generating flattened bounds below + * + * A BoundList contains intervals specified by inclusive start + * and end bounds. The intervals should be nonoverlapping and occur in + * the specified direction of traversal. For example, given a simple index {i:1} + * and direction +1, one valid BoundList is: (1, 2); (4, 6). The same BoundList + * would be valid for index {i:-1} with direction -1. + */ + typedef std::vector<std::pair<BSONObj, BSONObj> > BoundList; + /** - * This file contains BSON comparison and manipulation of a shard key pattern. + * A ShardKeyPattern represents the key pattern used to partition data in a collection between + * shards. Shard keys are extracted from documents, simple queries, or Matchable objects based + * on the paths within the key pattern. * - * No higher-level abstractions should be placed here - in particular, anything that requires - * metadata or index information aside from the key patterns themselves. + * Shard key pattern paths may be nested, but are not traversable through arrays - this means + * a shard key pattern path always yields a single value. */ + class ShardKeyPattern { + public: + + // Maximum size of shard key + static const int kMaxShardKeySizeBytes; + + // Maximum number of intervals produced by $in queries. + static const unsigned int kMaxFlattenedInCombinations; + + /** + * Helper to check shard key size and generate an appropriate error message. + */ + static Status checkShardKeySize(const BSONObj& shardKey); + + /** + * Constructs a shard key pattern from a BSON pattern document. If the document is not a + * valid shard key pattern, !isValid() will be true and key extraction will fail. + */ + explicit ShardKeyPattern(const BSONObj& keyPattern); + + /** + * Constructs a shard key pattern from a key pattern, see above. + */ + explicit ShardKeyPattern(const KeyPattern& keyPattern); + + bool isValid() const; + + bool isHashedPattern() const; + + const KeyPattern& getKeyPattern() const; + + const BSONObj& toBSON() const; + + std::string toString() const; + + /** + * Returns true if the provided document is a shard key - i.e. has the same fields as the + * shard key pattern and valid shard key values. + */ + bool isShardKey(const BSONObj& shardKey) const; + + /** + * Given a shard key, return it in normal form where the fields are in the same order as + * the shard key pattern fields. + * + * If the shard key is invalid, returns BSONObj() + */ + BSONObj normalizeShardKey(const BSONObj& shardKey) const; + + /** + * Given a MatchableDocument, extracts the shard key corresponding to the key pattern. + * For each path in the shard key pattern, extracts a value from the matchable document. + * + * Paths to shard key fields must not contain arrays at any level, and shard keys may not + * be array fields, undefined, or non-storable sub-documents. If the shard key pattern is + * a hashed key pattern, this method performs the hashing. + * + * If a shard key cannot be extracted, returns an empty BSONObj(). + * + * Examples: + * If 'this' KeyPattern is { a : 1 } + * { a: "hi" , b : 4} --> returns { a : "hi" } + * { c : 4 , a : 2 } --> returns { a : 2 } + * { b : 2 } -> returns {} + * { a : [1,2] } -> returns {} + * If 'this' KeyPattern is { a : "hashed" } + * { a: 1 } --> returns { a : NumberLong("5902408780260971510") } + * If 'this' KeyPattern is { 'a.b' : 1 } + * { a : { b : "hi" } } --> returns { 'a.b' : "hi" } + * { a : [{ b : "hi" }] } --> returns {} + */ + BSONObj extractShardKeyFromMatchable(const MatchableDocument& matchable) const; + + /** + * Given a document, extracts the shard key corresponding to the key pattern. + * See above. + */ + BSONObj extractShardKeyFromDoc(const BSONObj& doc) const; + + /** + * Given a simple BSON query, extracts the shard key corresponding to the key pattern + * from equality matches in the query. The query expression *must not* be a complex query + * with sorts or other attributes. + * + * Logically, the equalities in the BSON query can be serialized into a BSON document and + * then a shard key is extracted from this equality document. + * + * NOTE: BSON queries and BSON documents look similar but are different languages. Use the + * correct shard key extraction function. + * + * Returns !OK status if the query cannot be parsed. Returns an empty BSONObj() if there is + * no shard key found in the query equalities. + * + * Examples: + * If the key pattern is { a : 1 } + * { a : "hi", b : 4 } --> returns { a : "hi" } + * { a : { $eq : "hi" }, b : 4 } --> returns { a : "hi" } + * { $and : [{a : { $eq : "hi" }}, { b : 4 }] } --> returns { a : "hi" } + * If the key pattern is { 'a.b' : 1 } + * { a : { b : "hi" } } --> returns { 'a.b' : "hi" } + * { 'a.b' : "hi" } --> returns { 'a.b' : "hi" } + * { a : { b : { $eq : "hi" } } } --> returns {} because the query language treats this as + * a : { $eq : { b : ... } } + */ + StatusWith<BSONObj> extractShardKeyFromQuery(const BSONObj& basicQuery) const; + + /** + * Returns true if the shard key pattern can ensure that the unique index pattern is + * respected across all shards. + * + * Primarily this just checks whether the shard key pattern field names are equal to or a + * prefix of the unique index pattern field names. Since documents with the same fields in + * the shard key pattern are guaranteed to go to the same shard, and all documents must + * contain the full shard key, a unique index with a shard key pattern prefix can be sure + * when resolving duplicates that documents on other shards will have different shard keys, + * and so are not duplicates. + * + * Hashed shard key patterns are similar to ordinary patterns in that they guarantee similar + * shard keys go to the same shard. + * + * Examples: + * shard key {a : 1} is compatible with a unique index on {_id : 1} + * shard key {a : 1} is compatible with a unique index on {a : 1 , b : 1} + * shard key {a : 1} is compatible with a unique index on {a : -1 , b : 1 } + * shard key {a : "hashed"} is compatible with a unique index on {a : 1} + * shard key {a : 1} is not compatible with a unique index on {b : 1} + * shard key {a : "hashed" , b : 1 } is not compatible with unique index on { b : 1 } + * + * All unique index patterns starting with _id are assumed to be enforceable by the fact + * that _ids must be unique, and so all unique _id prefixed indexes are compatible with + * any shard key pattern. + * + * NOTE: We assume 'uniqueIndexPattern' is a valid unique index pattern - a pattern like + * { k : "hashed" } is not capable of being a unique index and is an invalid argument to + * this method. + */ + bool isUniqueIndexCompatible(const BSONObj& uniqueIndexPattern) const; + + /** + * Return an ordered list of bounds generated using this KeyPattern and the + * bounds from the IndexBounds. This function is used in sharding to + * determine where to route queries according to the shard key pattern. + * + * Examples: + * + * Key { a: 1 }, Bounds a: [0] => { a: 0 } -> { a: 0 } + * Key { a: 1 }, Bounds a: [2, 3) => { a: 2 } -> { a: 3 } // bound inclusion ignored. + * + * The bounds returned by this function may be a superset of those defined + * by the constraints. For instance, if this KeyPattern is {a : 1, b: 1} + * Bounds: { a : {$in : [1,2]} , b : {$in : [3,4,5]} } + * => {a : 1 , b : 3} -> {a : 1 , b : 5}, {a : 2 , b : 3} -> {a : 2 , b : 5} + * + * If the IndexBounds are not defined for all the fields in this keypattern, which + * means some fields are unsatisfied, an empty BoundList could return. + * + */ + BoundList flattenBounds(const IndexBounds& indexBounds) const; + + private: - const int kMaxShardKeySize = 512; + // Ordered, parsed paths + const OwnedPointerVector<FieldRef> _keyPatternPaths; - bool isUniqueIndexCompatible(const BSONObj& shardKeyPattern, const BSONObj& uIndexKeyPattern); + const KeyPattern _keyPattern; + }; - bool isShardKeySizeValid(const BSONObj& shardKey, std::string* errMsg); } diff --git a/src/mongo/s/shard_key_pattern_test.cpp b/src/mongo/s/shard_key_pattern_test.cpp new file mode 100644 index 00000000000..7410cc3dd60 --- /dev/null +++ b/src/mongo/s/shard_key_pattern_test.cpp @@ -0,0 +1,487 @@ +/* Copyright 2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/s/shard_key_pattern.h" + +#include "mongo/db/hasher.h" +#include "mongo/db/json.h" +#include "mongo/unittest/unittest.h" + +namespace { + + using namespace mongo; + + TEST(ShardKeyPattern, ValidShardKeyPatternSingle) { + + BSONObj empty; + ASSERT(!ShardKeyPattern(empty).isValid()); + + // + // Single field ShardKeyPatterns + // + + ASSERT(ShardKeyPattern(BSON("a" << 1)).isValid()); + ASSERT(ShardKeyPattern(BSON("a" << 1)).isValid()); + ASSERT(ShardKeyPattern(BSON("a" << 1.0f)).isValid()); + ASSERT(ShardKeyPattern(BSON("a" << (long long)1L)).isValid()); + + ASSERT(!ShardKeyPattern(BSON("a" << -1)).isValid()); + ASSERT(!ShardKeyPattern(BSON("a" << -1.0)).isValid()); + ASSERT(!ShardKeyPattern(BSON("a" << "1")).isValid()); + + ASSERT(ShardKeyPattern(BSON("a" << "hashed")).isValid()); + ASSERT(!ShardKeyPattern(BSON("a" << "hash")).isValid()); + ASSERT(!ShardKeyPattern(BSON("" << 1)).isValid()); + ASSERT(!ShardKeyPattern(BSON("." << 1)).isValid()); + + } + + TEST(ShardKeyPattern, ValidShardKeyPatternComposite) { + + // + // Composite ShardKeyPatterns + // + + ASSERT(ShardKeyPattern(BSON("a" << 1 << "b" << 1)).isValid()); + ASSERT(ShardKeyPattern(BSON("a" << 1.0f << "b" << 1.0)).isValid()); + ASSERT(!ShardKeyPattern(BSON("a" << 1 << "b" << -1)).isValid()); + ASSERT(!ShardKeyPattern(BSON("a" << 1 << "b" << "1")).isValid()); + + ASSERT(ShardKeyPattern(BSON("a" << 1 << "b" << 1.0 << "c" << 1.0f)).isValid()); + ASSERT(!ShardKeyPattern(BSON("a" << 1 << "b." << 1.0)).isValid()); + ASSERT(!ShardKeyPattern(BSON("a" << 1 << "" << 1.0)).isValid()); + + } + + TEST(ShardKeyPattern, ValidShardKeyPatternNested) { + + // + // Nested ShardKeyPatterns + // + + ASSERT(ShardKeyPattern(BSON("a.b" << 1)).isValid()); + ASSERT(!ShardKeyPattern(BSON("a.b" << -1)).isValid()); + ASSERT(ShardKeyPattern(BSON("a.b.c.d" << 1.0)).isValid()); + + ASSERT(!ShardKeyPattern(BSON("a" << BSON( "b" << 1 ))).isValid()); + + ASSERT(!ShardKeyPattern(BSON("a.b." << 1)).isValid()); + ASSERT(!ShardKeyPattern(BSON("a.b.." << 1)).isValid()); + ASSERT(!ShardKeyPattern(BSON("a..b" << 1)).isValid()); + + ASSERT(ShardKeyPattern(BSON("a" << 1 << "c.d" << 1.0 << "e.f.g" << 1.0f)).isValid()); + ASSERT(ShardKeyPattern(BSON("a" << 1 << "a.b" << 1.0 << "a.b.c" << 1.0f)).isValid()); + + ASSERT(!ShardKeyPattern(BSON("a" << 1 << "a.b." << 1.0)).isValid()); + ASSERT(!ShardKeyPattern(BSON("a" << BSON( "b" << 1 ) << "c.d" << 1.0)).isValid()); + + } + + TEST(ShardKeyPattern, IsShardKey) { + + ShardKeyPattern pattern(BSON("a.b" << 1 << "c" << 1.0f)); + + ASSERT(pattern.isShardKey(BSON("a.b" << 10 << "c" << 30))); + ASSERT(pattern.isShardKey(BSON("c" << 30 << "a.b" << 10))); + + ASSERT(!pattern.isShardKey(BSON("b" << 10))); + ASSERT(!pattern.isShardKey(BSON("a" << 10 << "c" << 30))); + ASSERT(!pattern.isShardKey(BSON("a" << BSON("b" << 10) << "c" << 30))); + } + + static BSONObj normKey(const ShardKeyPattern& pattern, const BSONObj& doc) { + return pattern.normalizeShardKey(doc); + } + + TEST(ShardKeyPattern, NormalizeShardKey) { + + ShardKeyPattern pattern(BSON("a.b" << 1 << "c" << 1.0f)); + + ASSERT_EQUALS(normKey(pattern, BSON("a.b" << 10 << "c" << 30)), + BSON("a.b" << 10 << "c" << 30)); + ASSERT_EQUALS(normKey(pattern, BSON("c" << 30 << "a.b" << 10)), + BSON("a.b" << 10 << "c" << 30)); + + ASSERT_EQUALS(normKey(pattern, BSON("b" << 10)), BSONObj()); + ASSERT_EQUALS(normKey(pattern, BSON("a" << 10 << "c" << 30)), BSONObj()); + ASSERT_EQUALS(normKey(pattern, BSON("a.b" << BSON("$gt" << 10) << "c" << 30)), BSONObj()); + } + + static BSONObj docKey(const ShardKeyPattern& pattern, const BSONObj& doc) { + return pattern.extractShardKeyFromDoc(doc); + } + + TEST(ShardKeyPattern, ExtractDocShardKeySingle) { + + // + // Single field ShardKeyPatterns + // + + ShardKeyPattern pattern(BSON("a" << 1)); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:10}")), fromjson("{a:10}")); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:10, b:'20'}")), fromjson("{a:10}")); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:{b:10}, c:30}")), fromjson("{a:{b:10}}")); + const BSONRegEx regex("abc"); + ASSERT_EQUALS(docKey(pattern, BSON("a" << regex << "b" << "20")), BSON("a" << regex)); + const BSONObj ref = BSON("$ref" << "coll" << "$id" << 1); + ASSERT_EQUALS(docKey(pattern, BSON("a" << ref)), BSON("a" << ref)); + + ASSERT_EQUALS(docKey(pattern, BSONObj()), BSONObj()); + ASSERT_EQUALS(docKey(pattern, fromjson("{b:10}")), BSONObj()); + ASSERT_EQUALS(docKey(pattern, BSON("" << 10)), BSONObj()); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:[1, 2]}")), BSONObj()); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:{$invalid:true}}")), BSONObj()); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:{$gt:10}}")), BSONObj()); + // BSONObjIterator breaks this for now + //ASSERT_EQUALS(docKey(pattern, BSON("a" << 10 << "a" << 20)), BSONObj()); + } + + TEST(ShardKeyPattern, ExtractDocShardKeyCompound) { + + // + // Compound ShardKeyPatterns + // + + ShardKeyPattern pattern(BSON("a" << 1 << "b" << 1.0)); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:10, b:'20'}")), fromjson("{a:10, b:'20'}")); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:10, b:'20', c:30}")), + fromjson("{a:10, b:'20'}")); + ASSERT_EQUALS(docKey(pattern, BSON("c" << 30 << "b" << "20" << "a" << 10)), + fromjson("{a:10, b:'20'}")); + + ASSERT_EQUALS(docKey(pattern, fromjson("{a:10, b:[1, 2]}")), BSONObj()); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:10, b:{$invalid:true}}")), BSONObj()); + ASSERT_EQUALS(docKey(pattern, fromjson("{b:20}")), BSONObj()); + ASSERT_EQUALS(docKey(pattern, BSON("" << 10 << "b" << "20")), BSONObj()); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:10, b:{$gt:20}}")), BSONObj()); + + // Ordering + ASSERT_EQUALS(docKey(pattern, BSON("b" << 20 << "a" << 10)).firstElement().numberInt(), 10); + } + + TEST(ShardKeyPattern, ExtractDocShardKeyNested) { + + // + // Nested ShardKeyPatterns + // + + ShardKeyPattern pattern(BSON("a.b" << 1 << "c" << 1.0f)); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:{b:10}, c:30}")), fromjson("{'a.b':10, c:30}")); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:{d:[1,2],b:10},c:30,d:40}")), + fromjson("{'a.b':10, c:30}")); + const BSONObj ref = BSON("$ref" << "coll" << "$id" << 1); + ASSERT_EQUALS(docKey(pattern, BSON("a" << BSON( "b" << ref) << "c" << 30)), + BSON("a.b" << ref << "c" << 30)); + + ASSERT_EQUALS(docKey(pattern, fromjson("{a:10, c:30}")), BSONObj()); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:{d:40}, c:30}")), BSONObj()); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:[{b:10}, {b:20}], c:30}")), BSONObj()); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:{b:[10, 20]}, c:30}")), BSONObj()); + } + + TEST(ShardKeyPattern, ExtractDocShardKeyDeepNested) { + + // + // Deeply nested ShardKeyPatterns + // + + ShardKeyPattern pattern(BSON("a.b.c" << 1)); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:{b:{c:10}}}")), fromjson("{'a.b.c':10}")); + + ASSERT_EQUALS(docKey(pattern, fromjson("{a:[{b:{c:10}}]}")), BSONObj()); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:{b:[{c:10}]}}")), BSONObj()); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:{b:{c:[10, 20]}}}")), BSONObj()); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:{b:[{c:10}, {c:20}]}}")), BSONObj()); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:[{b:{c:10}},{b:{c:20}}]}")), BSONObj()); + ASSERT_EQUALS(docKey(pattern, fromjson("{a:[{b:[{c:10},{c:20}]},{b:[{c:30},{c:40}]}]}}")), + BSONObj()); + } + + TEST(ShardKeyPattern, ExtractDocShardKeyHashed) { + + // + // Hashed ShardKeyPattern + // + + const string value = "12345"; + const BSONObj bsonValue = BSON("" << value); + const long long hashValue = BSONElementHasher::hash64(bsonValue.firstElement(), + BSONElementHasher::DEFAULT_HASH_SEED); + + ShardKeyPattern pattern(BSON("a.b" << "hashed")); + ASSERT_EQUALS(docKey(pattern, BSON("a" << BSON("b" << value))), BSON("a.b" << hashValue)); + ASSERT_EQUALS(docKey(pattern, BSON("a" << BSON("b" << value) << "c" << 30)), + BSON("a.b" << hashValue)); + ASSERT_EQUALS(docKey(pattern, BSON("a" << BSON("c" << 30 << "b" << value))), + BSON("a.b" << hashValue)); + + ASSERT_EQUALS(docKey(pattern, BSON("a" << BSON("c" << value))), BSONObj()); + ASSERT_EQUALS(docKey(pattern, BSON("a" << BSON("b" << BSON_ARRAY(value)))), BSONObj()); + ASSERT_EQUALS(docKey(pattern, BSON("a" << BSON_ARRAY(BSON("b" << value)))), BSONObj()); + } + + static BSONObj queryKey(const ShardKeyPattern& pattern, const BSONObj& query) { + StatusWith<BSONObj> status = pattern.extractShardKeyFromQuery(query); + if (!status.isOK()) + return BSONObj(); + return status.getValue(); + } + + TEST(ShardKeyPattern, ExtractQueryShardKeySingle) { + + // + // Single field ShardKeyPatterns + // + + ShardKeyPattern pattern(BSON("a" << 1)); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:10}")), fromjson("{a:10}")); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:10, b:'20'}")), fromjson("{a:10}")); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:{b:10}, c:30}")), fromjson("{a:{b:10}}")); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:10, b:{$gt:20}}")), fromjson("{a:10}")); + + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:{$gt:10}}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:10, b:{$invalid:'20'}}")), BSONObj()); + + // Doc key extraction shouldn't work with query + ASSERT_EQUALS(docKey(pattern, fromjson("{a:{$eq:[10, 20]}, c:30}")), BSONObj()); + + // $eq/$or/$and/$all + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:{$eq:10}}")), fromjson("{a:10}")); + ASSERT_EQUALS(queryKey(pattern, fromjson("{$or:[{a:{$eq:10}}]}")), fromjson("{a:10}")); + ASSERT_EQUALS(queryKey(pattern, fromjson("{$and:[{a:{$eq:10}},{b:'20'}]}")), + fromjson("{a:10}")); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:{$all:[10]}}")), fromjson("{a:10}")); + ASSERT_EQUALS(queryKey(pattern, fromjson("{$or:[{a:{$eq:10}},{a:10}]}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{$and:[{a:10},{a:10}]}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:{$all:[10,10]}}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{$or:[{a:{$eq:10}},{b:'20'}]}")), BSONObj()); + + // Regex can't be extracted from query + const BSONRegEx regex("abc"); + ASSERT_EQUALS(queryKey(pattern, BSON("a" << regex << "b" << "20")), BSONObj()); + } + + TEST(ShardKeyPattern, ExtractQueryShardKeyCompound) { + + // + // Compound ShardKeyPatterns + // + + ShardKeyPattern pattern(BSON("a" << 1 << "b" << 1.0)); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:10, b:'20'}")), fromjson("{a:10, b:'20'}")); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:10, b:'20', c:30}")), + fromjson("{a:10, b:'20'}")); + ASSERT_EQUALS(queryKey(pattern, BSON("c" << 30 << "b" << "20" << "a" << 10)), + fromjson("{a:10, b:'20'}")); + + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:10, b:[1, 2]}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:10, b:{$invalid:true}}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{b:20}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, BSON("" << 10 << "b" << "20")), BSONObj()); + + // $eq/$or/$and/$all + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:{$eq:10}, b:{$all:['20']}}")), + fromjson("{a:10, b:'20'}")); + ASSERT_EQUALS(queryKey(pattern, fromjson("{$and:[{a:{$eq:10},b:{$eq:'20'}}]}")), + fromjson("{a:10, b:'20'}")); + ASSERT_EQUALS(queryKey(pattern, fromjson("{$and:[{a:{$eq:10}},{b:{$eq:'20'}}]}")), + fromjson("{a:10, b:'20'}")); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:10, b:{$gt:20}}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{$or:[{a:{$eq:10}},{b:'20'}]}")), BSONObj()); + + // Ordering + ASSERT_EQUALS(queryKey(pattern, BSON("b" << 20 << "a" << 10)).firstElement().numberInt(), + 10); + + } + + TEST(ShardKeyPattern, ExtractQueryShardKeyNested) { + + // + // Nested ShardKeyPatterns + // + + ShardKeyPattern pattern(BSON("a.b" << 1 << "c" << 1.0f)); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:{b:10}, c:30}")), + fromjson("{'a.b':10, c:30}")); + ASSERT_EQUALS(queryKey(pattern, fromjson("{'a.b':{$eq:10}, c:30, d:40}")), + fromjson("{'a.b':10, c:30}")); + ASSERT_EQUALS(queryKey(pattern, fromjson("{$or:[{'a.b':10, c:30, d:40}]}")), + fromjson("{'a.b':10, c:30}")); + ASSERT_EQUALS(queryKey(pattern, fromjson("{'a.b':{$all:[10]}, c:30, d:40}")), + fromjson("{'a.b':10, c:30}")); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:{b:10,d:40}, c:30}")), + fromjson("{'a.b':10, c:30}")); + ASSERT_EQUALS(queryKey(pattern, fromjson("{$and:[{'a.b':{$eq:10}}, {c:30}]}")), + fromjson("{'a.b':10, c:30}")); + + // Nested $eq is actually a document element + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:{b:{$eq:10}}, c:30}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{$and:[{a:{b:{$eq:10}}}, {c:30}]}")), BSONObj()); + + ASSERT_EQUALS(queryKey(pattern, fromjson("{$or:[{a:{b:{$eq:10}}}, {c:30}]}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:10, c:30}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:{b:10}, c:{$gt:30}}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:{d:40}, c:30}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:[{b:10}, {b:20}], c:30}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:{b:{$eq:[10, 20]}}, c:30}")), BSONObj()); + } + + TEST(ShardKeyPattern, ExtractQueryShardKeyDeepNested) { + + // + // Deeply nested ShardKeyPatterns + // + + ShardKeyPattern pattern(BSON("a.b.c" << 1)); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:{b:{c:10}}}")), fromjson("{'a.b.c':10}")); + ASSERT_EQUALS(queryKey(pattern, fromjson("{'a.b.c':10}")), fromjson("{'a.b.c':10}")); + ASSERT_EQUALS(queryKey(pattern, fromjson("{'a.b.c':{$eq:10}}")), fromjson("{'a.b.c':10}")); + + // Arrays at any nesting level means we can't extract a shard key + ASSERT_EQUALS(queryKey(pattern, fromjson("{'a.b.c':[10]}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{'a.b':[{c:10}]}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:[{b:{c:10}}]}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:{b:[{c:10}]}}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:{b:{c:[10, 20]}}}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:{b:[{c:10}, {c:20}]}}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:[{b:{c:10}},{b:{c:20}}]}")), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, fromjson("{a:[{b:[{c:10},{c:20}]},{b:[{c:30},{c:40}]}]}}")), + BSONObj()); + + } + + TEST(ShardKeyPattern, ExtractQueryShardKeyHashed) { + + // + // Hashed ShardKeyPattern + // + + const string value = "12345"; + const BSONObj bsonValue = BSON("" << value); + const long long hashValue = BSONElementHasher::hash64(bsonValue.firstElement(), + BSONElementHasher::DEFAULT_HASH_SEED); + + // Hashed works basically the same as non-hashed, but applies the hash function at the end + ShardKeyPattern pattern(BSON("a.b" << "hashed")); + ASSERT_EQUALS(queryKey(pattern, BSON("a.b" << value)), BSON("a.b" << hashValue)); + ASSERT_EQUALS(queryKey(pattern, BSON("a" << BSON("b" << value))), BSON("a.b" << hashValue)); + ASSERT_EQUALS(queryKey(pattern, BSON("a.b" << BSON("$eq" << value))), + BSON("a.b" << hashValue)); + ASSERT_EQUALS(queryKey(pattern, BSON("a" << BSON("b" << value) << "c" << 30)), + BSON("a.b" << hashValue)); + ASSERT_EQUALS(queryKey(pattern, BSON("a" << BSON("c" << 30 << "b" << value))), + BSON("a.b" << hashValue)); + ASSERT_EQUALS(queryKey(pattern, // + BSON("$and" << BSON_ARRAY(BSON("a.b" << BSON("$eq" << value))))), + BSON("a.b" << hashValue)); + + ASSERT_EQUALS(queryKey(pattern, BSON("a" << BSON("b" << BSON("$eq" << value)))), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, BSON("a.b" << BSON("$gt" << value))), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, BSON("a" << BSON("c" << value))), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, BSON("a" << BSON("b" << BSON_ARRAY(value)))), BSONObj()); + ASSERT_EQUALS(queryKey(pattern, BSON("a" << BSON_ARRAY(BSON("b" << value)))), BSONObj()); + } + + static bool indexComp(const ShardKeyPattern& pattern, const BSONObj& indexPattern) { + return pattern.isUniqueIndexCompatible(indexPattern); + } + + TEST(ShardKeyPattern, UniqueIndexCompatibleSingle) { + + // + // Single field ShardKeyPatterns + // + + ShardKeyPattern pattern(BSON("a" << 1)); + ASSERT(indexComp(pattern, BSON("a" << 1))); + ASSERT(indexComp(pattern, BSON("a" << -1))); + ASSERT(indexComp(pattern, BSON("a" << 1 << "b" << 1))); + ASSERT(indexComp(pattern, BSON("a" << -1 << "b" << 1))); + + ASSERT(indexComp(pattern, BSON("_id" << 1))); + ASSERT(indexComp(pattern, BSON("_id" << -1 << "b" << 1))); + + ASSERT(!indexComp(pattern, BSON("b" << 1))); + ASSERT(!indexComp(pattern, BSON("b" << -1 << "a" << 1))); + } + + TEST(ShardKeyPattern, UniqueIndexCompatibleCompound) { + + // + // Compound ShardKeyPatterns + // + + ShardKeyPattern pattern(BSON("a" << 1 << "b" << 1.0)); + ASSERT(indexComp(pattern, BSON("a" << 1 << "b" << 1))); + ASSERT(indexComp(pattern, BSON("a" << 1 << "b" << -1.0))); + ASSERT(indexComp(pattern, BSON("a" << 1 << "b" << -1.0 << "c" << 1))); + + ASSERT(indexComp(pattern, BSON("_id" << 1))); + ASSERT(indexComp(pattern, BSON("_id" << -1 << "c" << 1))); + + ASSERT(!indexComp(pattern, BSON("a" << 1))); + ASSERT(!indexComp(pattern, BSON("b" << 1))); + ASSERT(!indexComp(pattern, BSON("a" << 1 << "c" << 1.0f))); + ASSERT(!indexComp(pattern, BSON("b" << -1 << "a" << 1 << "c" << 1))); + } + + TEST(ShardKeyPattern, UniqueIndexCompatibleNested) { + + // + // Nested ShardKeyPatterns + // + + ShardKeyPattern pattern(BSON("a.b" << 1 << "c" << 1.0)); + ASSERT(indexComp(pattern, BSON("a.b" << 1 << "c" << 1.0f))); + + ASSERT(!indexComp(pattern, BSON("a.b" << 1))); + ASSERT(!indexComp(pattern, BSON("a" << 1 << "c" << -1.0))); + ASSERT(!indexComp(pattern, BSON("c" << -1 << "a.b" << 1))); + } + + TEST(ShardKeyPattern, UniqueIndexCompatibleHashed) { + + // + // Hashed ShardKeyPatterns + // + + ShardKeyPattern pattern(BSON("a.b" << "hashed")); + + ASSERT(indexComp(pattern, BSON("a.b" << 1))); + ASSERT(indexComp(pattern, BSON("a.b" << -1))); + ASSERT(indexComp(pattern, BSON("a.b" << 1 << "c" << 1))); + ASSERT(indexComp(pattern, BSON("a.b" << -1 << "c" << 1))); + + ASSERT(indexComp(pattern, BSON("_id" << 1))); + ASSERT(indexComp(pattern, BSON("_id" << -1 << "c" << 1))); + + ASSERT(!indexComp(pattern, BSON("c" << 1))); + ASSERT(!indexComp(pattern, BSON("c" << -1 << "a.b" << 1))); + } +} + diff --git a/src/mongo/s/shardkey.cpp b/src/mongo/s/shardkey.cpp deleted file mode 100644 index 6c70878cdd9..00000000000 --- a/src/mongo/s/shardkey.cpp +++ /dev/null @@ -1,207 +0,0 @@ -// shardkey.cpp - -/** -* Copyright (C) 2008 10gen Inc. -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License, version 3, -* as published by the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Affero General Public License for more details. -* -* You should have received a copy of the GNU Affero General Public License -* along with this program. If not, see <http://www.gnu.org/licenses/>. -* -* As a special exception, the copyright holders give permission to link the -* code of portions of this program with the OpenSSL library under certain -* conditions as described in each individual source file and distribute -* linked combinations including the program with the OpenSSL library. You -* must comply with the GNU Affero General Public License in all respects -* for all of the code used other than as permitted herein. If you modify -* file(s) with this exception, you may extend this exception to your -* version of the file(s), but you are not obligated to do so. If you do not -* wish to do so, delete this exception statement from your version. If you -* delete this exception statement from all source files in the program, -* then also delete it in the license file. -*/ - -#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding - -#include "mongo/platform/basic.h" - -#include "mongo/s/chunk.h" -#include "mongo/s/shard_key_pattern.h" -#include "mongo/db/jsobj.h" -#include "mongo/db/json.h" -#include "mongo/util/log.h" -#include "mongo/util/startup_test.h" -#include "mongo/util/timer.h" - -namespace mongo { - - ShardKeyPattern::ShardKeyPattern( BSONObj p ) : pattern( p.getOwned() ) { - pattern.toBSON().getFieldNames( patternfields ); - - BSONObjBuilder min; - BSONObjBuilder max; - - BSONObjIterator it(p); - while (it.more()) { - BSONElement e (it.next()); - min.appendMinKey(e.fieldName()); - max.appendMaxKey(e.fieldName()); - } - - gMin = min.obj(); - gMax = max.obj(); - } - - static bool _hasShardKey(const BSONObj& doc, - const set<string>& patternFields, - bool allowRegex) { - - // this is written s.t. if doc has lots of fields, if the shard key fields are early, - // it is fast. so a bit more work to try to be semi-fast. - - for (set<string>::const_iterator it = patternFields.begin(); it != patternFields.end(); - ++it) { - BSONElement shardKeyField = doc.getFieldDotted(it->c_str()); - if (shardKeyField.eoo() - || shardKeyField.type() == Array - || (!allowRegex && shardKeyField.type() == RegEx) - || (shardKeyField.type() == Object && - !shardKeyField.embeddedObject().okForStorage())) { - // Don't allow anything for a shard key we can't store -- like $gt/$lt ops - return false; - } - } - return true; - } - - bool ShardKeyPattern::hasShardKey(const BSONObj& doc) const { - return _hasShardKey(doc, patternfields, true); - } - - bool ShardKeyPattern::hasTargetableShardKey(const BSONObj& doc) const { - return _hasShardKey(doc, patternfields, false); - } - - bool ShardKeyPattern::isUniqueIndexCompatible( const KeyPattern& uniqueIndexPattern ) const { - return mongo::isUniqueIndexCompatible( pattern.toBSON(), uniqueIndexPattern.toBSON() ); - } - - string ShardKeyPattern::toString() const { - return pattern.toString(); - } - - /* things to test for compound : - \ middle (deprecating?) - */ - class ShardKeyUnitTest : public StartupTest { - public: - - void hasshardkeytest() { - ShardKeyPattern k( BSON( "num" << 1 ) ); - - BSONObj x = fromjson("{ zid : \"abcdefg\", num: 1.0, name: \"eliot\" }"); - verify( k.hasShardKey(x) ); - verify( !k.hasShardKey( fromjson("{foo:'a'}") ) ); - verify( !k.hasShardKey( fromjson("{x: {$gt: 1}}") ) ); - verify( !k.hasShardKey( fromjson("{num: {$gt: 1}}") ) ); - BSONObj obj = BSON( "num" << BSON( "$ref" << "coll" << "$id" << 1)); - verify( k.hasShardKey(obj)); - - // try compound key - { - ShardKeyPattern k( fromjson("{a:1,b:-1,c:1}") ); - verify( k.hasShardKey( fromjson("{foo:'a',a:'b',c:'z',b:9,k:99}") ) ); - BSONObj obj = BSON( "foo" << "a" << - "a" << BSON("$ref" << "coll" << "$id" << 1) << - "c" << 1 << "b" << 9 << "k" << 99 ); - verify( k.hasShardKey( obj ) ); - verify( !k.hasShardKey( fromjson("{foo:'a',a:[1,2],c:'z',b:9,k:99}") ) ); - verify( !k.hasShardKey( fromjson("{foo:'a',a:{$gt:1},c:'z',b:9,k:99}") ) ); - verify( !k.hasShardKey( fromjson("{foo:'a',a:'b',c:'z',bb:9,k:99}") ) ); - verify( !k.hasShardKey( fromjson("{k:99}") ) ); - } - - // try dotted key - { - ShardKeyPattern k( fromjson("{'a.b':1}") ); - verify( k.hasShardKey( fromjson("{a:{b:1,c:1},d:1}") ) ); - verify( k.hasShardKey( fromjson("{'a.b':1}") ) ); - BSONObj obj = BSON( "c" << "a" << - "a" << BSON("$ref" << "coll" << "$id" << 1) ); - verify( !k.hasShardKey( obj ) ); - obj = BSON( "c" << "a" << - "a" << BSON( "b" << BSON("$ref" << "coll" << "$id" << 1) << - "c" << 1)); - verify( k.hasShardKey( obj ) ); - verify( !k.hasShardKey( fromjson("{'a.c':1}") ) ); - verify( !k.hasShardKey( fromjson("{'a':[{b:1}, {c:1}]}") ) ); - verify( !k.hasShardKey( fromjson("{a:{b:[1,2]},d:1}") ) ); - verify( !k.hasShardKey( fromjson("{a:{c:1},d:1}") ) ); - verify( !k.hasShardKey( fromjson("{a:1}") ) ); - verify( !k.hasShardKey( fromjson("{b:1}") ) ); - } - - } - - void extractkeytest() { - ShardKeyPattern k( fromjson("{a:1,'sub.b':-1,'sub.c':1}") ); - - BSONObj x = fromjson("{a:1,'sub.b':2,'sub.c':3}"); - verify( k.extractKeyFromQueryOrDoc( fromjson("{a:1,sub:{b:2,c:3}}") ).binaryEqual(x) ); - verify( k.extractKeyFromQueryOrDoc( fromjson("{sub:{b:2,c:3},a:1}") ).binaryEqual(x) ); - } - - void uniqueIndexCompatibleTest() { - ShardKeyPattern k1( BSON( "a" << 1 ) ); - verify( k1.isUniqueIndexCompatible( BSON( "_id" << 1 ) ) ); - verify( k1.isUniqueIndexCompatible( BSON( "a" << 1 << "b" << 1 ) ) ); - verify( k1.isUniqueIndexCompatible( BSON( "a" << -1 ) ) ); - verify( ! k1.isUniqueIndexCompatible( BSON( "b" << 1 ) ) ); - - ShardKeyPattern k2( BSON( "a" << "hashed") ); - verify( k2.isUniqueIndexCompatible( BSON( "a" << 1 ) ) ); - verify( ! k2.isUniqueIndexCompatible( BSON( "b" << 1 ) ) ); - } - - void run() { - extractkeytest(); - - ShardKeyPattern k( BSON( "key" << 1 ) ); - - BSONObj min = k.globalMin(); - -// cout << min.jsonString(TenGen) << endl; - - BSONObj max = k.globalMax(); - - BSONObj k1 = BSON( "key" << 5 ); - - verify( min < max ); - verify( min < k.extractKeyFromQueryOrDoc( k1 ) ); - verify( max > min ); - - hasshardkeytest(); - verify( k.hasShardKey( k1 ) ); - verify( ! k.hasShardKey( BSON( "key2" << 1 ) ) ); - - BSONObj a = k1; - BSONObj b = BSON( "key" << 999 ); - - verify( k.extractKeyFromQueryOrDoc( a ) < k.extractKeyFromQueryOrDoc( b ) ); - - // add middle multitype tests - - uniqueIndexCompatibleTest(); - - LOG(1) << "shardKeyTest passed" << endl; - } - } shardKeyTest; - -} // namespace mongo diff --git a/src/mongo/s/shardkey.h b/src/mongo/s/shardkey.h deleted file mode 100644 index a9f8eb2925c..00000000000 --- a/src/mongo/s/shardkey.h +++ /dev/null @@ -1,140 +0,0 @@ -// shardkey.h - -/** -* Copyright (C) 2008 10gen Inc. -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License, version 3, -* as published by the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Affero General Public License for more details. -* -* You should have received a copy of the GNU Affero General Public License -* along with this program. If not, see <http://www.gnu.org/licenses/>. -* -* As a special exception, the copyright holders give permission to link the -* code of portions of this program with the OpenSSL library under certain -* conditions as described in each individual source file and distribute -* linked combinations including the program with the OpenSSL library. You -* must comply with the GNU Affero General Public License in all respects -* for all of the code used other than as permitted herein. If you modify -* file(s) with this exception, you may extend this exception to your -* version of the file(s), but you are not obligated to do so. If you do not -* wish to do so, delete this exception statement from your version. If you -* delete this exception statement from all source files in the program, -* then also delete it in the license file. -*/ - -#pragma once - -#include "mongo/base/string_data.h" -#include "mongo/db/keypattern.h" -#include "mongo/s/shard_key_pattern.h" - -namespace mongo { - - /** - * THIS FUNCTIONALITY IS DEPRECATED - * Everything BSON related in this file should migrate gradually to s/shard_key_pattern.h, new - * functionality should not go here. - */ - - class Chunk; - class FieldRangeSet; - - /* A ShardKeyPattern is a pattern indicating what data to extract from the object to make the shard key from. - Analogous to an index key pattern. - */ - class ShardKeyPattern { - public: - ShardKeyPattern( BSONObj p = BSONObj() ); - - /** - global min is the lowest possible value for this key - e.g. { num : MinKey } - */ - BSONObj globalMin() const { return gMin; } - - /** - global max is the highest possible value for this key - */ - BSONObj globalMax() const { return gMax; } - - /** - @return whether or not obj has all fields in this shard key pattern - e.g. - ShardKey({num:1}).hasShardKey({ name:"joe", num:3 }) is true - ShardKey({"a.b":1}).hasShardKey({ "a.b":"joe"}) is true - ShardKey({"a.b":1}).hasShardKey({ "a": {"b":"joe"}}) is true - - ShardKey({num:1}).hasShardKey({ name:"joe"}) is false - ShardKey({num:1}).hasShardKey({ name:"joe", num:{$gt:3} }) is false - - see unit test for more examples - */ - bool hasShardKey( const BSONObj& doc ) const; - - /** - * Same as the above, but disallow certain shard key values which are interpreted for - * targeting as a multi-shard query (i.e. RegExes) - */ - bool hasTargetableShardKey( const BSONObj& doc ) const; - - BSONObj key() const { return pattern.toBSON(); } - - std::string toString() const; - - /** - * DEPRECATED function to return a shard key from either a document or a query expression. - * Always prefer the more specific keypattern.h extractKeyFromXXX functions instead. - * TODO: Eliminate completely. - */ - BSONObj extractKeyFromQueryOrDoc(const BSONObj& from) const; - - BSONObj extendRangeBound( const BSONObj& bound , bool makeUpperInclusive ) const { - return pattern.extendRangeBound( bound , makeUpperInclusive ); - } - - /** - * @return - * true if this shard key is compatible with a unique index on 'uniqueIndexPattern'. - * Primarily this just checks whether 'this' is a prefix of 'uniqueIndexPattern', - * However it does not need to be an exact syntactic prefix due to "hashed" - * indexes or mismatches in ascending/descending order. Also, uniqueness of the - * _id field is guaranteed by the generation process (or by the user) so every - * index that begins with _id is unique index compatible with any shard key. - * Examples: - * shard key {a : 1} is compatible with a unique index on {_id : 1} - * shard key {a : 1} is compatible with a unique index on {a : 1 , b : 1} - * shard key {a : 1} is compatible with a unique index on {a : -1 , b : 1 } - * shard key {a : "hashed"} is compatible with a unique index on {a : 1} - * shard key {a : 1} is not compatible with a unique index on {b : 1} - * shard key {a : "hashed" , b : 1 } is not compatible with unique index on { b : 1 } - * Note: - * this method assumes that 'uniqueIndexPattern' is a valid index pattern, - * and is capable of being a unique index. A pattern like { k : "hashed" } - * is never capable of being a unique index, and thus is an invalid setting - * for the 'uniqueIndexPattern' argument. - */ - bool isUniqueIndexCompatible( const KeyPattern& uniqueIndexPattern ) const; - - private: - KeyPattern pattern; - BSONObj gMin; - BSONObj gMax; - - /* question: better to have patternfields precomputed or not? depends on if we use copy constructor often. */ - std::set<std::string> patternfields; - }; - - // See note above - do not use in new code - inline BSONObj ShardKeyPattern::extractKeyFromQueryOrDoc(const BSONObj& from) const { - BSONObj k = pattern.extractShardKeyFromQuery( from ); - uassert(13334, "Shard Key must be less than 512 bytes", k.objsize() < kMaxShardKeySize); - return k; - } - -} |