summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path
diff options
context:
space:
mode:
authorDavid Rowley <drowley@postgresql.org>2021-11-24 10:06:59 +1300
committerDavid Rowley <drowley@postgresql.org>2021-11-24 10:06:59 +1300
commite502150f7d0be41e3c8784be007fa871a32d8a7f (patch)
treea6c96abbe3eae534d938d05539627b4f03d23f62 /src/backend/optimizer/path
parent1922d7c6e1a74178bd2f1d5aa5a6ab921b3fcd34 (diff)
downloadpostgresql-e502150f7d0be41e3c8784be007fa871a32d8a7f.tar.gz
Allow Memoize to operate in binary comparison mode
Memoize would always use the hash equality operator for the cache key types to determine if the current set of parameters were the same as some previously cached set. Certain types such as floating points where -0.0 and +0.0 differ in their binary representation but are classed as equal by the hash equality operator may cause problems as unless the join uses the same operator it's possible that whichever join operator is being used would be able to distinguish the two values. In which case we may accidentally return in the incorrect rows out of the cache. To fix this here we add a binary mode to Memoize to allow it to the current set of parameters to previously cached values by comparing bit-by-bit rather than logically using the hash equality operator. This binary mode is always used for LATERAL joins and it's used for normal joins when any of the join operators are not hashable. Reported-by: Tom Lane Author: David Rowley Discussion: https://postgr.es/m/3004308.1632952496@sss.pgh.pa.us Backpatch-through: 14, where Memoize was added
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r--src/backend/optimizer/path/joinpath.c38
1 files changed, 34 insertions, 4 deletions
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 0f3ad8aa65..322460e968 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -371,19 +371,21 @@ allow_star_schema_join(PlannerInfo *root,
* Returns true the hashing is possible, otherwise return false.
*
* Additionally we also collect the outer exprs and the hash operators for
- * each parameter to innerrel. These set in 'param_exprs' and 'operators'
- * when we return true.
+ * each parameter to innerrel. These set in 'param_exprs', 'operators' and
+ * 'binary_mode' when we return true.
*/
static bool
paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
RelOptInfo *outerrel, RelOptInfo *innerrel,
- List **param_exprs, List **operators)
+ List **param_exprs, List **operators,
+ bool *binary_mode)
{
ListCell *lc;
*param_exprs = NIL;
*operators = NIL;
+ *binary_mode = false;
if (param_info != NULL)
{
@@ -431,6 +433,20 @@ paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
*operators = lappend_oid(*operators, hasheqoperator);
*param_exprs = lappend(*param_exprs, expr);
+
+ /*
+ * When the join operator is not hashable then it's possible that
+ * the operator will be able to distinguish something that the
+ * hash equality operator could not. For example with floating
+ * point types -0.0 and +0.0 are classed as equal by the hash
+ * function and equality function, but some other operator may be
+ * able to tell those values apart. This means that we must put
+ * memoize into binary comparison mode so that it does bit-by-bit
+ * comparisons rather than a "logical" comparison as it would
+ * using the hash equality operator.
+ */
+ if (!OidIsValid(rinfo->hashjoinoperator))
+ *binary_mode = true;
}
}
@@ -461,6 +477,17 @@ paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
*operators = lappend_oid(*operators, typentry->eq_opr);
*param_exprs = lappend(*param_exprs, expr);
+
+ /*
+ * We must go into binary mode as we don't have too much of an idea of
+ * how these lateral Vars are being used. See comment above when we
+ * set *binary_mode for the non-lateral Var case. This could be
+ * relaxed a bit if we had the RestrictInfos and knew the operators
+ * being used, however for cases like Vars that are arguments to
+ * functions we must operate in binary mode as we don't have
+ * visibility into what the function is doing with the Vars.
+ */
+ *binary_mode = true;
}
/* We're okay to use memoize */
@@ -481,6 +508,7 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
List *param_exprs;
List *hash_operators;
ListCell *lc;
+ bool binary_mode;
/* Obviously not if it's disabled */
if (!enable_memoize)
@@ -572,7 +600,8 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
outerrel,
innerrel,
&param_exprs,
- &hash_operators))
+ &hash_operators,
+ &binary_mode))
{
return (Path *) create_memoize_path(root,
innerrel,
@@ -580,6 +609,7 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
param_exprs,
hash_operators,
extra->inner_unique,
+ binary_mode,
outer_path->parent->rows);
}