#include "mongo/db/pipeline/document_source.h"
namespace mongo {
using boost::intrusive_ptr;
using std::vector;
REGISTER_MULTI_STAGE_ALIAS(bucket, DocumentSourceBucket::createFromBson);
namespace {
intrusive_ptr getExpressionConstant(BSONElement expressionElem,
VariablesParseState vps) {
auto expr = Expression::parseOperand(expressionElem, vps)->optimize();
return dynamic_cast(expr.get());
} // namespace
vector> DocumentSourceBucket::createFromBson(
BSONElement elem, const intrusive_ptr& pExpCtx) {
str::stream() << "Argument to $bucket stage must be an object, but found type: "
<< typeName(elem.type())
<< ".",
elem.type() == BSONType::Object);
const BSONObj bucketObj = elem.embeddedObject();
BSONObjBuilder groupObjBuilder;
BSONObjBuilder switchObjBuilder;
VariablesIdGenerator idGenerator;
VariablesParseState vps(&idGenerator);
vector boundaryValues;
BSONElement groupByField;
Value defaultValue;
bool outputFieldSpecified = false;
for (auto&& argument : bucketObj) {
const auto argName = argument.fieldNameStringData();
if ("groupBy" == argName) {
groupByField = argument;
const bool groupByIsExpressionInObject = groupByField.type() == BSONType::Object &&
groupByField.embeddedObject().firstElementFieldName()[0] == '$';
const bool groupByIsPrefixedPath =
groupByField.type() == BSONType::String && groupByField.valueStringData()[0] == '$';
str::stream() << "The $bucket 'groupBy' field must be defined as a $-prefixed "
"path or an expression, but found: "
<< groupByField.toString(false, false)
<< ".",
groupByIsExpressionInObject || groupByIsPrefixedPath);
} else if ("boundaries" == argName) {
str::stream() << "The $bucket 'boundaries' field must be an array, but found type: "
<< typeName(argument.type())
<< ".",
argument.type() == BSONType::Array);
for (auto&& boundaryElem : argument.embeddedObject()) {
auto exprConst = getExpressionConstant(boundaryElem, vps);
str::stream() << "The $bucket 'boundaries' field must be an array of "
"constant values, but found value: "
<< boundaryElem.toString(false, false)
<< ".",
<< "The $bucket 'boundaries' field must have at least 2 values, but found "
<< boundaryValues.size()
<< " value(s).",
boundaryValues.size() >= 2);
// Make sure that the boundaries are unique, sorted in ascending order, and have the
// same canonical type.
for (size_t i = 1; i < boundaryValues.size(); ++i) {
Value lower = boundaryValues[i - 1];
Value upper = boundaryValues[i];
int lowerCanonicalType = canonicalizeBSONType(lower.getType());
int upperCanonicalType = canonicalizeBSONType(upper.getType());
str::stream() << "All values in the the 'boundaries' option to $bucket "
"must have the same type. Found conflicting types "
<< typeName(lower.getType())
<< " and "
<< typeName(upper.getType())
<< ".",
lowerCanonicalType == upperCanonicalType);
// TODO SERVER-25038: This check must be deferred so that it respects the final
// collator, which is not necessarily the same as the collator at parse time.
<< "The 'boundaries' option to $bucket must be sorted, but elements "
<< i - 1
<< " and "
<< i
<< " are not in ascending order ("
<< lower.toString()
<< " is not less than "
<< upper.toString()
<< ").",
pExpCtx->getValueComparator().evaluate(lower < upper));
} else if ("default" == argName) {
// If there is a default, make sure that it parses to a constant expression then add
// default to switch.
auto exprConst = getExpressionConstant(argument, vps);
<< "The $bucket 'default' field must be a constant expression, but found: "
<< argument.toString(false, false)
<< ".",
defaultValue = exprConst->getValue();
defaultValue.addToBsonObj(&switchObjBuilder, "default");
} else if ("output" == argName) {
outputFieldSpecified = true;
str::stream() << "The $bucket 'output' field must be an object, but found type: "
<< typeName(argument.type())
<< ".",
argument.type() == BSONType::Object);
for (auto&& outputElem : argument.embeddedObject()) {
} else {
uasserted(40197, str::stream() << "Unrecognized option to $bucket: " << argName << ".");
const bool isMissingRequiredField = groupByField.eoo() || boundaryValues.empty();
"$bucket requires 'groupBy' and 'boundaries' to be specified.",
Value lowerValue = boundaryValues.front();
Value upperValue = boundaryValues.back();
if (canonicalizeBSONType(defaultValue.getType()) ==
canonicalizeBSONType(lowerValue.getType())) {
// If the default has the same canonical type as the bucket's boundaries, then make sure the
// default is less than the lowest boundary or greater than or equal to the highest
// boundary.
// TODO SERVER-25038: This check must be deferred so that it respects the final collator,
// which is not necessarily the same as the collator at parse time.
const auto& valueCmp = pExpCtx->getValueComparator();
const bool hasValidDefault = valueCmp.evaluate(defaultValue < lowerValue) ||
valueCmp.evaluate(defaultValue >= upperValue);
"The $bucket 'default' field must be less than the lowest boundary or greater than "
"or equal to the highest boundary.",
// Make the branches for the $switch expression.
BSONArrayBuilder branchesBuilder;
for (size_t i = 1; i < boundaryValues.size(); ++i) {
Value lower = boundaryValues[i - 1];
Value upper = boundaryValues[i];
BSONObj caseExpr =
BSON("$and" << BSON_ARRAY(BSON("$gte" << BSON_ARRAY(groupByField << lower))
<< BSON("$lt" << BSON_ARRAY(groupByField << upper))));
branchesBuilder.append(BSON("case" << caseExpr << "then" << lower));
// Add the $switch expression to the group BSON object.
switchObjBuilder.append("branches", branchesBuilder.arr());
groupObjBuilder.append("_id", BSON("$switch" << switchObjBuilder.obj()));
// If no output is specified, add a count field by default.
if (!outputFieldSpecified) {
groupObjBuilder.append("count", BSON("$sum" << 1));
BSONObj groupObj = BSON("$group" << groupObjBuilder.obj());
BSONObj sortObj = BSON("$sort" << BSON("_id" << 1));
auto groupSource = DocumentSourceGroup::createFromBson(groupObj.firstElement(), pExpCtx);
auto sortSource = DocumentSourceSort::createFromBson(sortObj.firstElement(), pExpCtx);
return {groupSource, sortSource};
} // namespace mongo