Skip to content

Commit

Permalink
Bug#36921175 Complex Join Predicate with Subquery Not Possible To Off…
Browse files Browse the repository at this point in the history
…load to HeatWave (1/2 noclose)

Eliminate actual sorting in window specifications if they
do not contain references to inner tables; in such cases there is no
effective partitioning or ordering and the presence of the outer
reference hinders offloading to RAPID.

Note that MySQL allows outer references in PARTITION BY and ORDER BY
expressions, whereas the standard requires all column references to be
inner. This patch does not change this liberal interpretation, but
skips the actual sorting in the access path (by skipping the orderings
contribution to Window::m_sorting_order unless the ordering is
semantically meaningful, i.e. it contains at least one inner
reference).

Change-Id: I64a00c90d5f37c104e5a375b1d59e415da81d6f2
  • Loading branch information
Dag Wanvik authored and dahlerlend committed Sep 14, 2024
1 parent 4496e7b commit 8f92347
Show file tree
Hide file tree
Showing 8 changed files with 245 additions and 30 deletions.
2 changes: 1 addition & 1 deletion mysql-test/r/subquery_scalar_to_derived_correlated.result
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ EXPLAIN SELECT a,
FROM t3;
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t3 NULL ALL NULL NULL NULL NULL 2 100.00 NULL
2 DEPENDENT SUBQUERY t2 NULL ALL NULL NULL NULL NULL 2 100.00 Using filesort
2 DEPENDENT SUBQUERY t2 NULL ALL NULL NULL NULL NULL 2 100.00 NULL
Warnings:
Note 1276 Field or reference 'test.t3.a' of SELECT #2 was resolved in SELECT #1
Note 3598 To get information about window functions use EXPLAIN FORMAT=JSON
Expand Down
2 changes: 1 addition & 1 deletion mysql-test/r/window_functions.result
Original file line number Diff line number Diff line change
Expand Up @@ -9201,7 +9201,7 @@ SELECT AVG(i) FROM t1 ORDER BY RANK() OVER (PARTITION BY AVG(i) ORDER BY i);
ERROR 42000: In aggregated query without GROUP BY, expression #1 of PARTITION BY or ORDER BY clause of window '<unnamed window>' contains nonaggregated column 'test.t1.i'; this is incompatible with sql_mode=only_full_group_by
SELECT AVG(i), RANK() OVER w FROM t1 WINDOW w AS (ORDER BY i);
ERROR 42000: In aggregated query without GROUP BY, expression #1 of PARTITION BY or ORDER BY clause of window 'w' contains nonaggregated column 'test.t1.i'; this is incompatible with sql_mode=only_full_group_by
SELECT (select AVG(i)+RANK() OVER (ORDER BY i)) FROM t1;
SELECT (SELECT AVG(i)+RANK() OVER (ORDER BY i)) FROM t1;
ERROR 42000: In aggregated query without GROUP BY, expression #1 of SELECT list contains nonaggregated column 'test.t1.i'; this is incompatible with sql_mode=only_full_group_by
DROP TABLE t1;
# End of checking for errors
Expand Down
117 changes: 117 additions & 0 deletions mysql-test/r/window_functions_bugs.result
Original file line number Diff line number Diff line change
Expand Up @@ -2276,3 +2276,120 @@ ON (ta.vkey=outer_t.c5) LIMIT 1)
1
NULL
DROP TABLE t1;
#
# Bug#36921175 Complex Join Predicate with Subquery Not
# Possible To Offload to HeatWave
# See also HeatWave test added to check offload
CREATE TABLE t1(col INT);
CREATE TABLE t2(col INT);
INSERT INTO t1 VALUES (0), (1), (2), (NULL);
INSERT INTO t1 VALUES (0), (1), (2), (NULL);
ANALYZE TABLE t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
# verify that PARTTITION BY containing no inner reference
# is eliminated
SELECT
COUNT(s1.col)
FROM ( t1 AS s1
JOIN
t1 AS s2
ON ( s2.col IN (WITH RECURSIVE cte1 AS (
SELECT DENSE_RANK() OVER (PARTITION BY s1.col)
FROM t2
WHERE col >= 7)
SELECT * FROM cte1))
);
COUNT(s1.col)
0
EXPLAIN FORMAT=tree SELECT
COUNT(s1.col)
FROM ( t1 AS s1
JOIN
t1 AS s2
ON ( s2.col IN (WITH RECURSIVE cte1 AS (
SELECT DENSE_RANK() OVER (PARTITION BY s1.col)
FROM t2
WHERE col >= 7)
SELECT * FROM cte1))
);
EXPLAIN
-> Aggregate: count(s1.col) (rows=1)
-> Inner hash join (no condition) (rows=8)
-> Table scan on s1 (rows=8)
-> Hash
-> Remove duplicate s2 rows using temporary table (weedout) (rows=1)
-> Inner hash join (s2.col = cte1.`DENSE_RANK() OVER (PARTITION BY s1.col)`) (rows=1)
-> Table scan on s2 (rows=8)
-> Hash
-> Table scan on cte1 (rows=1)
-> Materialize CTE cte1 (rows=1)
-> Window aggregate: dense_rank() OVER (PARTITION BY s1.col ) (rows=1)
-> Filter: (t2.col >= 7) (rows=1)
-> Table scan on t2 (rows=1)

Warnings:
Note 1276 Field or reference 'test.s1.col' of SELECT #3 was resolved in SELECT #1
SET optimizer_trace="enabled=on";
SELECT
COUNT(s1.col)
FROM ( t1 AS s1
JOIN
t1 AS s2
ON ( s2.col IN (WITH RECURSIVE cte1 AS (
SELECT DENSE_RANK() OVER (PARTITION BY s1.col)
FROM t2
WHERE col >= 7)
SELECT * FROM cte1))
);
COUNT(s1.col)
0
Check that trace shows the elimination
SELECT JSON_PRETTY(JSON_EXTRACT(
trace,
'$.steps[*].join_preparation.steps[*].'
'join_preparation.steps[*].join_preparation.steps[0]'))
FROM information_schema.OPTIMIZER_TRACE;
JSON_PRETTY(JSON_EXTRACT(
trace,
'$.steps[*].join_preparation.steps[*].'
'join_preparation.steps[*].join_preparation.steps[0]'))
[
{
"eliminated sorting for PARTITION BY in window": "(PARTITION BY `s1`.`col` ) "
}
]
SET optimizer_trace="enabled=default";
# verify that PARTTITION BY is *not* eliminated; has an outer
# reference and an inner reference
EXPLAIN FORMAT=tree
SELECT
COUNT(s1.col)
FROM ( t1 AS s1
JOIN
t1 AS s2
ON ( s2.col IN (WITH RECURSIVE cte1 AS (
SELECT DENSE_RANK() OVER (PARTITION BY s1.col + t2.col)
FROM t2
WHERE col >= 7)
SELECT * FROM cte1))
);
EXPLAIN
-> Aggregate: count(s1.col) (rows=1)
-> Inner hash join (no condition) (rows=8)
-> Table scan on s1 (rows=8)
-> Hash
-> Remove duplicate s2 rows using temporary table (weedout) (rows=1)
-> Inner hash join (s2.col = cte1.`DENSE_RANK() OVER (PARTITION BY s1.col + t2.col)`) (rows=1)
-> Table scan on s2 (rows=8)
-> Hash
-> Table scan on cte1 (rows=1)
-> Materialize CTE cte1 (rows=1)
-> Window aggregate: dense_rank() OVER (PARTITION BY (s1.col + t2.col) ) (rows=1)
-> Sort: (s1.col + t2.col) (rows=1)
-> Filter: (t2.col >= 7) (rows=1)
-> Table scan on t2 (rows=1)

Warnings:
Note 1276 Field or reference 'test.s1.col' of SELECT #3 was resolved in SELECT #1
DROP TABLE t1, t2;
12 changes: 2 additions & 10 deletions mysql-test/r/window_functions_explain.result
Original file line number Diff line number Diff line change
Expand Up @@ -13834,24 +13834,17 @@ EXPLAIN
"query_block": {
"select_id": 2,
"cost_info": {
"query_cost": "20.05"
"query_cost": "2.05"
},
"windowing": {
"windows": [
{
"name": "<unnamed window>",
"using_filesort": true,
"filesort_key": [
"(1 * 1)"
],
"functions": [
"rank"
]
}
],
"cost_info": {
"sort_cost": "18.00"
},
"table": {
"table_name": "t",
"access_type": "ALL",
Expand Down Expand Up @@ -14235,8 +14228,7 @@ EXPLAIN
"name": "<unnamed window>",
"using_filesort": true,
"filesort_key": [
"(select (`a` + `d`) from `u`)",
"(select `d` from `u`)"
"(select (`a` + `d`) from `u`)"
],
"frame_buffer": {
"using_temporary_table": true,
Expand Down
2 changes: 1 addition & 1 deletion mysql-test/t/window_functions.test
Original file line number Diff line number Diff line change
Expand Up @@ -4043,7 +4043,7 @@ SELECT AVG(i) FROM t1 ORDER BY RANK() OVER (PARTITION BY AVG(i) ORDER BY i);
--error ER_MIX_OF_GROUP_FUNC_AND_FIELDS
SELECT AVG(i), RANK() OVER w FROM t1 WINDOW w AS (ORDER BY i);
--error ER_MIX_OF_GROUP_FUNC_AND_FIELDS
SELECT (select AVG(i)+RANK() OVER (ORDER BY i)) FROM t1;
SELECT (SELECT AVG(i)+RANK() OVER (ORDER BY i)) FROM t1;

DROP TABLE t1;

Expand Down
60 changes: 60 additions & 0 deletions mysql-test/t/window_functions_bugs.test
Original file line number Diff line number Diff line change
Expand Up @@ -1545,3 +1545,63 @@ SELECT ROW(AVG(vkey) OVER (PARTITION BY vkey), 59) =
FROM t1 AS outer_t;

DROP TABLE t1;

--echo #
--echo # Bug#36921175 Complex Join Predicate with Subquery Not
--echo # Possible To Offload to HeatWave
--echo # See also HeatWave test added to check offload
CREATE TABLE t1(col INT);
CREATE TABLE t2(col INT);
INSERT INTO t1 VALUES (0), (1), (2), (NULL);
INSERT INTO t1 VALUES (0), (1), (2), (NULL);
ANALYZE TABLE t1;
--echo # verify that PARTTITION BY containing no inner reference
--echo # is eliminated
let $query =
SELECT
COUNT(s1.col)
FROM ( t1 AS s1
JOIN
t1 AS s2
ON ( s2.col IN (WITH RECURSIVE cte1 AS (
SELECT DENSE_RANK() OVER (PARTITION BY s1.col)
FROM t2
WHERE col >= 7)
SELECT * FROM cte1))
);
eval $query;
--replace_regex $elide_costs
--skip_if_hypergraph # Different the query plan.
eval EXPLAIN FORMAT=tree $query;

SET optimizer_trace="enabled=on";
eval $query;

--echo Check that trace shows the elimination
SELECT JSON_PRETTY(JSON_EXTRACT(
trace,
'$.steps[*].join_preparation.steps[*].'
'join_preparation.steps[*].join_preparation.steps[0]'))
FROM information_schema.OPTIMIZER_TRACE;

SET optimizer_trace="enabled=default";

--echo # verify that PARTTITION BY is *not* eliminated; has an outer
--echo # reference and an inner reference
--replace_regex $elide_costs
--skip_if_hypergraph # Different the query plan.
EXPLAIN FORMAT=tree
SELECT
COUNT(s1.col)
FROM ( t1 AS s1
JOIN
t1 AS s2
ON ( s2.col IN (WITH RECURSIVE cte1 AS (
SELECT DENSE_RANK() OVER (PARTITION BY s1.col + t2.col)
FROM t2
WHERE col >= 7)
SELECT * FROM cte1))
);

DROP TABLE t1, t2;

74 changes: 57 additions & 17 deletions sql/window.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#include "sql/key_spec.h"
#include "sql/mem_root_array.h"
#include "sql/mysqld_cs.h"
#include "sql/opt_trace.h"
#include "sql/parse_location.h"
#include "sql/parse_tree_nodes.h" // PT_*
#include "sql/parse_tree_window.h" // PT_window
Expand Down Expand Up @@ -399,27 +400,55 @@ ORDER *Window::sorting_order(THD *thd, bool implicitly_grouped) {
return nullptr;
}

ORDER *part = effective_partition_by() ? effective_partition_by()->value.first
: nullptr;
ORDER *ord =
effective_order_by() ? effective_order_by()->value.first : nullptr;
// Use Window::can_eliminate_order to determine if we can skip the sort as
// specified by PARTITION BY and/or ORDER BY clauses. If so also add info
// about it to the optimizer trace, if active.
std::array<std::pair<const PT_order_list *, const char *>, 2> orders{
std::make_pair(effective_partition_by(), "PARTITION BY"),
std::make_pair(effective_order_by(), "ORDER BY")};
constexpr uint part_i = 0;
constexpr uint ord_i = 1;
ORDER *sort[2] = {nullptr, nullptr};
uint idx = 0;
for (auto &order : orders) {
sort[idx] = order.first ? order.first->value.first : nullptr;
if (can_eliminate_order(sort[idx])) {
sort[idx] = nullptr;

char buff1[256];
String legend(buff1, sizeof(buff1), system_charset_info);
legend.length(0);
legend.append("eliminated sorting for ");
legend.append(order.second);
legend.append(" in window");
Opt_trace_context *const trace = &thd->opt_trace;
Opt_trace_object trace_wrapper(trace);

char buff2[256];
String w_spec(buff2, sizeof(buff2), system_charset_info);
w_spec.length(0);
print(thd, &w_spec, enum_query_type(QT_ORDINARY | QT_NO_DEFAULT_DB),
/*expand_definition*/ false);

trace_wrapper.add_utf8(legend.ptr(), w_spec.ptr(), w_spec.length());
}
idx++;
}

/*
1. Copy both lists
2. Append the ORDER BY list to the partition list.
This ensures that all columns are present in the resulting sort ordering
and that all ORDER BY expressions are at the end.
The resulting sort can the be used to detect partition change and also
satisfy the window ordering.
Set up m_sorting_order. The resulting sort can then be used to detect
partition change and also satisfy the window ordering. This ensures that
all columns are present in the resulting sort ordering and that all ORDER
BY expressions are at the end.
*/
if (ord == nullptr)
m_sorting_order = part;
else if (part == nullptr)
m_sorting_order = ord;
if (sort[ord_i] == nullptr)
m_sorting_order = sort[part_i];
else if (sort[part_i] == nullptr)
m_sorting_order = sort[ord_i];
else {
ORDER *sorting = clone(thd, part);
ORDER *ob = clone(thd, ord);
// Since we are building a new list, we must clone the contributors
ORDER *sorting = clone(thd, sort[part_i]);
ORDER *ob = clone(thd, sort[ord_i]);
append_to_back(&sorting, ob);
m_sorting_order = sorting;
}
Expand Down Expand Up @@ -1089,6 +1118,17 @@ void Window::eliminate_unused_objects(List<Window> *windows) {
}
}

bool Window::can_eliminate_order(ORDER *order) const {
if (order == nullptr) return false;
for (ORDER *o = order; o != nullptr; o = o->next) {
// contains inner column? if so, return false
const table_map used_tables =
(*o->item)->used_tables() & ~PSEUDO_TABLE_BITS;
if (used_tables != 0) return false;
}
return true;
}

bool Window::setup_windows1(THD *thd, Query_block *select,
Ref_item_array ref_item_array, Table_ref *tables,
mem_root_deque<Item *> *fields,
Expand Down
6 changes: 6 additions & 0 deletions sql/window.h
Original file line number Diff line number Diff line change
Expand Up @@ -1033,6 +1033,12 @@ class Window {
Table_ref *tables,
mem_root_deque<Item *> *fields, ORDER *o,
bool partition_order);
/**
If the ordering expression contains no inner column references, return true,
else false
*/
bool can_eliminate_order(ORDER *order) const;

/**
Return true if this window's name is not unique in windows
*/
Expand Down

0 comments on commit 8f92347

Please sign in to comment.