diff options
Diffstat (limited to 'src/backend/optimizer/prep/prepunion.c')
-rw-r--r-- | src/backend/optimizer/prep/prepunion.c | 41 |
1 files changed, 36 insertions, 5 deletions
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 750d59a951..3a155c0d0a 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -22,7 +22,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.150 2008/08/07 01:11:50 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.151 2008/08/07 03:04:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -60,6 +60,7 @@ static Plan *generate_union_plan(SetOperationStmt *op, PlannerInfo *root, double tuple_fraction, List *refnames_tlist, List **sortClauses); static Plan *generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, + double tuple_fraction, List *refnames_tlist, List **sortClauses); static List *recurse_union_children(Node *setOp, PlannerInfo *root, double tuple_fraction, @@ -229,7 +230,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, refnames_tlist, sortClauses); else - plan = generate_nonunion_plan(op, root, + plan = generate_nonunion_plan(op, root, tuple_fraction, refnames_tlist, sortClauses); @@ -341,6 +342,7 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root, */ static Plan * generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, + double tuple_fraction, List *refnames_tlist, List **sortClauses) { @@ -351,6 +353,10 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, *groupList, *planlist, *child_sortclauses; + double dNumDistinctRows; + double dNumOutputRows; + long numDistinctRows; + bool use_hash; SetOpCmd cmd; /* Recurse on children, ensuring their outputs are marked */ @@ -394,9 +400,31 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, } /* + * XXX for the moment, take the number of distinct groups as being the + * total input size, ie, the worst case. This is too conservative, but + * we don't want to risk having the hashtable overrun memory; also, + * it's not clear how to get a decent estimate of the true size. + */ + dNumDistinctRows = plan->plan_rows; + + /* Also convert to long int --- but 'ware overflow! */ + numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX); + + /* + * The output size is taken as 10% of that, which is a completely bogus + * guess, but it's what we've used historically. + */ + dNumOutputRows = ceil(dNumDistinctRows * 0.1); + + /* * Decide whether to hash or sort, and add a sort node if needed. */ - plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan); + use_hash = choose_hashed_setop(root, groupList, plan, + tuple_fraction, dNumDistinctRows, + (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT"); + + if (!use_hash) + plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan); /* * Finally, add a SetOp plan node to generate the correct output. @@ -414,9 +442,12 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */ break; } - plan = (Plan *) make_setop(cmd, plan, groupList, list_length(op->colTypes) + 1); + plan = (Plan *) make_setop(cmd, use_hash ? SETOP_HASHED : SETOP_SORTED, + plan, groupList, list_length(op->colTypes) + 1, + numDistinctRows, dNumOutputRows); - *sortClauses = groupList; + /* Result is sorted only if we're not hashing */ + *sortClauses = use_hash ? NIL : groupList; return plan; } |