From aa189237e8b2bee02176010fc067130ba2fad58a Mon Sep 17 00:00:00 2001 From: KhaledR57 Date: Thu, 6 Nov 2025 08:44:50 +0200 Subject: [PATCH 1/3] MDEV-24943: Implement FILTER clause support for aggregate functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aggregates lacked the SQL-standard FILTER clause, forcing CASE-based workarounds that reduced readability across (sum, avg, count, …). This update introduces the ability to specify a FILTER clause for aggregate functions, allowing for more granular control over which rows are included in the aggregation. Also, improves standards compliance and makes queries clearer and more readable. The FILTER(WHERE ...) condition may contain any expression allowed in regular WHERE clauses, except ~~subqueries~~, window functions, and outer references. --- .../main/aggregates-filter.combinations | 9 + mysql-test/main/aggregates-filter.result | 609 ++++++++++ mysql-test/main/aggregates-filter.test | 1075 +++++++++++++++++ sql/item_func.cc | 16 +- sql/item_func.h | 3 + sql/item_jsonfunc.cc | 5 +- sql/item_sum.cc | 168 ++- sql/item_sum.h | 32 + sql/item_windowfunc.cc | 5 + sql/lex.h | 1 + sql/opt_range.cc | 12 + sql/opt_sum.cc | 16 +- sql/sql_select.cc | 51 + sql/sql_yacc.yy | 50 +- storage/sequence/sequence.cc | 5 +- 15 files changed, 2015 insertions(+), 42 deletions(-) create mode 100644 mysql-test/main/aggregates-filter.combinations create mode 100644 mysql-test/main/aggregates-filter.result create mode 100644 mysql-test/main/aggregates-filter.test diff --git a/mysql-test/main/aggregates-filter.combinations b/mysql-test/main/aggregates-filter.combinations new file mode 100644 index 0000000000000..ce2a60320816a --- /dev/null +++ b/mysql-test/main/aggregates-filter.combinations @@ -0,0 +1,9 @@ +[innodb] +innodb +default-storage-engine=innodb + +[myisam] +default-storage-engine=myisam + +[aria] +default-storage-engine=aria diff --git a/mysql-test/main/aggregates-filter.result b/mysql-test/main/aggregates-filter.result new file mode 100644 index 0000000000000..81a73dfba4512 --- /dev/null +++ b/mysql-test/main/aggregates-filter.result @@ -0,0 +1,609 @@ +# +# Setup test table +# +# +# Create custom aggregate function +# +# +# Basic aggregates +# +avg_result weighted_avg_result sum_result count_result min_result max_result group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result json_objectagg_result std_result stddev_samp_result variance_result var_samp_result collect_result +137.5000 120.3056768558952 550 3 75 200 Novel 0 15 6 ["Phone","Tablet",null,null,"Laptop",null,null,null] {"item2":"Tablet", "item5":"Laptop"} 44.7214 54.1987 1100.0000 312.5000 MULTIPOINT(1 1,2 2,4 4,5 5,7 7,8 8) +# +# Aggregates with DISTINCT +# +avg_result sum_result count_result min_result max_result group_concat_result json_arrayagg_result collect_result +175.0000 525 1 75 200 Laptop,Phone,Tablet [null,"Electronics"] MULTIPOINT(2 2,4 4,8 8) +# +# Aggregates with WHERE (AND) +# +avg_result weighted_avg_result sum_result count_result min_result max_result group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result json_objectagg_result std_result stddev_samp_result variance_result var_samp_result collect_result +183.3333 156.04395604395606 250 5 75 200 Laptop,Phone,Tablet 0 15 9 [null,"Tablet",null,null,"Laptop",null,null,null] {"item2":"Tablet", "item5":"Laptop"} 41.4578 35.3553 742.1875 NULL MULTIPOINT(2 2,4 4,8 8) +# +# Aggregates with WHERE (OR) +# +avg_result weighted_avg_result sum_result count_result min_result max_result group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result json_objectagg_result std_result stddev_samp_result variance_result var_samp_result collect_result +137.5000 NULL 825 6 50 200 Guide,Novel,Pants,Phone,Shirt,Textbook 0 15 14 ["Phone","Tablet",null,null,"Laptop","Shirt","Pants",null] {"item1":"Phone", "item2":"Tablet", "item5":"Laptop"} 49.4764 54.1987 2447.9167 3541.6667 MULTIPOINT(1 1,2 2,3 3,4 4,5 5,7 7,8 8) +# +# Aggregates with WHERE (LIKE) +# +avg_result weighted_avg_result sum_result count_result min_result max_result group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result json_objectagg_result std_result stddev_samp_result variance_result var_samp_result collect_result +137.5000 NULL 350 5 100 100 Guide,Novel,Textbook 6 15 5 [null,"Tablet",null,null,null,"Shirt",null,null] {"item2":"Tablet", "item6":"Shirt"} 12.5000 54.1987 2447.9167 312.5000 MULTIPOINT(1 1,2 2,3 3,4 4,5 5,7 7,8 8) +# +# Aggregates with (GROUP BY) +# +category avg_result weighted_avg_result sum_result count_result min_result max_result group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result json_objectagg_result std_result stddev_samp_result variance_result var_samp_result collect_result +Books 200.0000 200 400 2 NULL 200 NULL 7 15 8 [null,null,null] NULL 0.0000 0.0000 0.0000 NULL MULTIPOINT(4 4,8 8) +Clothing 75.0000 75 NULL 2 75 75 NULL 0 0 0 [null,null] NULL NULL NULL NULL NULL MULTIPOINT(7 7) +Electronics 116.6667 120 150 3 100 100 Laptop,Phone,Tablet 0 15 1 [null,"Tablet","Laptop"] {"item2":"Tablet", "item5":"Laptop"} 23.5702 28.8675 555.5556 NULL MULTIPOINT(1 1,2 2,5 5) +category min_amount +Books 24.99 +Clothing 50.00 +Electronics 100.00 +category min_category +Books Books +Clothing Clothing +Electronics Electronics +category avg_value_0e0 +Books 31.666666666666668 +Clothing 45 +Electronics 27.5 +category sum_value +Books 95.00 +Clothing 90.00 +Electronics 65.00 +# +# Aggregates with multiple GROUP BY columns +# +category status avg_result +Books active 200.0000 +Books inactive NULL +Clothing NULL 50.0000 +Clothing active 75.0000 +Electronics active 116.6667 +category status weighted_avg_result +Books active 200 +Books inactive 200 +Clothing NULL 50 +Clothing active 75 +Electronics active 120 +category status sum_result +Books active 400 +Books inactive NULL +Clothing NULL 50 +Clothing active 75 +Electronics active 350 +category status count_result +Books active 2 +Books inactive 1 +Clothing NULL 1 +Clothing active 1 +Electronics active 3 +category status min_result +Books active 200 +Books inactive NULL +Clothing NULL NULL +Clothing active 75 +Electronics active 100 +category status max_result +Books active 200 +Books inactive NULL +Clothing NULL 50 +Clothing active 75 +Electronics active 150 +category status group_concat_result +Books active Guide,Textbook +Books inactive Novel +Clothing NULL Shirt +Clothing active Pants +Electronics active Laptop,Phone,Tablet +category status bit_and_result +Books active 7 +Books inactive 18446744073709551615 +Clothing NULL 3 +Clothing active 12 +Electronics active 0 +category status bit_or_result +Books active 15 +Books inactive 0 +Clothing NULL 0 +Clothing active 12 +Electronics active 15 +category status bit_xor_result +Books active 8 +Books inactive 0 +Clothing NULL 3 +Clothing active 12 +Electronics active 9 +category status json_arrayagg_result +Books active ["Textbook","Guide"] +Books inactive ["Novel"] +Clothing NULL ["Shirt"] +Clothing active ["Pants"] +Electronics active ["Phone","Tablet","Laptop"] +category status json_objectagg_result +Books active {"item4":"Textbook", "item8":"Guide"} +Books inactive {"item3":"Novel"} +Clothing NULL {"item6":"Shirt"} +Clothing active {"item7":"Pants"} +Electronics active {"item1":"Phone", "item2":"Tablet", "item5":"Laptop"} +category status std_result +Books active 0.0000 +Books inactive NULL +Clothing NULL 0.0000 +Clothing active 0.0000 +Electronics active 23.5702 +category status stddev_samp_result +Books active 0.0000 +Books inactive NULL +Clothing NULL NULL +Clothing active NULL +Electronics active 28.8675 +category status variance_result +Books active 0.0000 +Books inactive NULL +Clothing NULL 0.0000 +Clothing active 0.0000 +Electronics active 555.5556 +category status var_samp_result +Books active NULL +Books inactive NULL +Clothing NULL NULL +Clothing active NULL +Electronics active NULL +category status collect_result +Books active MULTIPOINT(4 4,8 8) +Books inactive NULL +Clothing NULL NULL +Clothing active MULTIPOINT(7 7) +Electronics active MULTIPOINT(1 1,2 2,5 5) +# +# GROUP BY with HAVING on aggregates +# +category avg_result weighted_avg_result sum_result count_result min_result max_result group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result json_objectagg_result std_result stddev_samp_result variance_result var_samp_result collect_result +Books 200.0000 200 400 2 NULL 200 Guide,Novel,Textbook 7 15 8 ["Novel","Textbook","Guide"] {"item3":"Novel", "item4":"Textbook", "item8":"Guide"} 0.0000 0.0000 0.0000 NULL MULTIPOINT(4 4,8 8) +# +# HAVING with alias +# +category avg_value weighted_avg_value sum_value count_value min_value max_value +Books 200.0000 200 400 2 NULL 200 +# +# WHERE + GROUP BY + HAVING +# +category avg_result weighted_avg_result sum_result count_result min_result max_result group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result json_objectagg_result std_result stddev_samp_result variance_result var_samp_result collect_result +Books 200.0000 200 400 2 NULL 200 Guide,Novel,Textbook 7 15 8 ["Novel","Textbook","Guide"] {"item3":"Novel", "item4":"Textbook", "item8":"Guide"} 0.0000 0.0000 0.0000 NULL MULTIPOINT(4 4,8 8) +Electronics 116.6667 120 150 3 100 100 Laptop,Phone,Tablet 0 15 9 ["Phone","Tablet","Laptop"] {"item1":"Phone", "item2":"Tablet", "item5":"Laptop"} 23.5702 28.8675 555.5556 NULL MULTIPOINT(1 1,2 2,5 5) +# +# WHERE + multiple aggregates with FILTER +# +category total_avg total_weighted_avg active_avg active_weighted_avg total_sum active_sum total_count active_count total_min active_min total_max active_max +Books 200.0000 NULL 200.0000 200 400 NULL 2 2 NULL NULL 200 200 +Clothing 62.5000 66.07142857142857 75.0000 75 NULL NULL 2 2 NULL NULL 75 75 +Electronics 116.6667 120 116.6667 120 150 350 3 1 100 100 150 150 +# +# WHERE + FILTER + HAVING +# +category active_avg active_weighted_avg active_sum active_count group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result json_objectagg_result std_result stddev_samp_result variance_result var_samp_result collect_result +Books 200.0000 200 400 0 Guide,Textbook 7 15 8 ["Textbook","Guide"] {"item4":"Textbook", "item8":"Guide"} 0.0000 0.0000 0.0000 NULL MULTIPOINT(4 4,8 8) +Clothing 75.0000 75 NULL 0 Pants,Shirt 0 12 15 ["Shirt","Pants"] {"item6":"Shirt", "item7":"Pants"} 12.5000 NULL 156.2500 NULL MULTIPOINT(7 7) +Electronics 116.6667 120 150 3 Laptop,Phone,Tablet 0 15 9 ["Phone","Tablet","Laptop"] {"item1":"Phone", "item2":"Tablet", "item5":"Laptop"} 23.5702 28.8675 555.5556 NULL MULTIPOINT(1 1,2 2,5 5) +# +# WHERE + window function (OVER) +# +category cat_avg cat_sum cat_count cat_min cat_max bit_and_result bit_or_result bit_xor_result std_result stddev_samp_result variance_result var_samp_result collect_result +Books 200.0000 400 2 NULL 200 7 15 8 0.0000 0.0000 0.0000 NULL MULTIPOINT(4 4,8 8) +Books 200.0000 400 2 NULL 200 7 15 8 0.0000 0.0000 0.0000 NULL MULTIPOINT(4 4,8 8) +Books 200.0000 400 2 NULL 200 7 15 8 0.0000 0.0000 0.0000 NULL MULTIPOINT(4 4,8 8) +Clothing 75.0000 NULL 2 75 75 0 12 15 12.5000 NULL 156.2500 NULL MULTIPOINT(7 7) +Clothing 75.0000 NULL 2 75 75 0 12 15 12.5000 NULL 156.2500 NULL MULTIPOINT(7 7) +Electronics 116.6667 150 3 100 100 0 15 9 23.5702 28.8675 555.5556 NULL MULTIPOINT(1 1,2 2,5 5) +Electronics 116.6667 150 3 100 100 0 15 9 23.5702 28.8675 555.5556 NULL MULTIPOINT(1 1,2 2,5 5) +Electronics 116.6667 150 3 100 100 0 15 9 23.5702 28.8675 555.5556 NULL MULTIPOINT(1 1,2 2,5 5) +# +# WHERE + window function with ORDER BY +# +id running_avg running_sum running_count running_min running_max bit_and_result bit_or_result bit_xor_result std_result stddev_samp_result variance_result var_samp_result collect_result +1 100.0000 100 1 100 100 6 6 0 0.0000 NULL 0.0000 NULL MULTIPOINT(1 1) +2 125.0000 250 2 100 150 6 14 0 25.0000 35.3553 625.0000 NULL MULTIPOINT(1 1,2 2) +3 125.0000 250 3 100 150 6 14 0 25.0000 35.3553 625.0000 NULL MULTIPOINT(1 1,2 2) +4 150.0000 450 4 100 200 6 15 7 40.8248 50.0000 1666.6667 NULL MULTIPOINT(1 1,2 2,4 4) +5 137.5000 550 5 100 200 0 15 14 41.4578 47.8714 1718.7500 NULL MULTIPOINT(1 1,2 2,4 4,5 5) +6 120.0000 550 6 100 200 0 15 13 50.9902 47.8714 2600.0000 NULL MULTIPOINT(1 1,2 2,4 4,5 5) +7 112.5000 625 7 75 200 0 15 1 49.4764 50.0000 2447.9167 NULL MULTIPOINT(1 1,2 2,4 4,5 5,7 7) +8 125.0000 825 8 75 200 0 15 14 55.0973 54.1987 3035.7143 NULL MULTIPOINT(1 1,2 2,4 4,5 5,7 7,8 8) +# +# WHERE + window function with frame specification +# +id windowed_avg windowed_sum windowed_count windowed_min windowed_max bit_and_result bit_or_result bit_xor_result std_result stddev_samp_result variance_result var_samp_result collect_result +1 125.0000 150 2 100 NULL 6 14 0 25.0000 35.3553 625.0000 NULL MULTIPOINT(1 1,2 2) +2 125.0000 150 2 100 NULL 6 14 0 25.0000 35.3553 625.0000 NULL MULTIPOINT(1 1,2 2) +3 175.0000 350 2 150 200 6 15 7 25.0000 35.3553 625.0000 NULL MULTIPOINT(2 2,4 4) +4 150.0000 200 2 100 200 1 15 14 50.0000 70.7107 2500.0000 NULL MULTIPOINT(4 4,5 5) +5 150.0000 200 3 100 200 1 15 13 62.3610 70.7107 3888.8889 NULL MULTIPOINT(4 4,5 5) +6 87.5000 NULL 3 75 100 0 13 6 20.4124 17.6777 416.6667 NULL MULTIPOINT(5 5,7 7) +7 137.5000 200 3 75 200 0 15 0 65.6167 88.3883 4305.5556 NULL MULTIPOINT(7 7,8 8) +8 137.5000 200 2 75 200 12 15 3 62.5000 88.3883 3906.2500 NULL MULTIPOINT(7 7,8 8) +# +# Subquery with aggregates in WHERE +# +category status value price amount name key_name value_col bit_value geom +Books inactive NULL 19.99 19.99 Novel item3 Novel NULL POINT(3 3) +Books active 200 29.99 29.99 Textbook item4 Textbook 7 POINT(4 4) +Books active 200 24.99 24.99 Guide item8 Guide 15 POINT(8 8) +# +# Correlated subquery with FILTER in SELECT list +# +category filtered_sum filtered_count +Books NULL 2 +Clothing 75 1 +Electronics 350 3 +# +# Derived table (FROM subquery) with FILTER +# +category filtered_avg filtered_sum +Books 200.0000 400 +Clothing 75.0000 NULL +Electronics 116.6667 150 +# +# Scalar subquery with FILTER +# +category value global_active_avg diff_from_avg +Electronics 100 137.5000 -37.5000 +Electronics 150 137.5000 12.5000 +Books NULL 137.5000 NULL +Books 200 137.5000 62.5000 +Electronics 100 137.5000 -37.5000 +Clothing 50 137.5000 -87.5000 +Clothing 75 137.5000 -62.5000 +Books 200 137.5000 62.5000 +avg_result +183.3333 +# +# IN subquery in FILTER WHERE +# +count_result +8 +# +# AVG with STD in subquery +# +AVG(x) +12.02340868 +# +# Aggregates with GROUP BY and ORDER BY +# +category avg_val weighted_avg_val sum_val count_val min_val max_val group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result json_objectagg_result std_result stddev_samp_result variance_result var_samp_result collect_result +Books 200.0000 200 400 2 NULL 200 Guide,Novel,Textbook 7 15 8 ["Novel","Textbook","Guide"] {"item3":"Novel", "item4":"Textbook", "item8":"Guide"} 0.0000 0.0000 0.0000 NULL MULTIPOINT(4 4,8 8) +Electronics 116.6667 120 150 3 100 100 Laptop,Phone,Tablet 0 15 9 ["Phone","Tablet","Laptop"] {"item1":"Phone", "item2":"Tablet", "item5":"Laptop"} 23.5702 28.8675 555.5556 NULL MULTIPOINT(1 1,2 2,5 5) +Clothing 75.0000 75 NULL 2 75 75 Pants,Shirt 0 12 15 ["Shirt","Pants"] {"item6":"Shirt", "item7":"Pants"} 12.5000 NULL 156.2500 NULL MULTIPOINT(7 7) +# +# Complete query with WHERE, GROUP BY, HAVING, ORDER BY +# +category avg_val weighted_avg_val sum_val count_val min_val max_val group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result json_objectagg_result std_result stddev_samp_result variance_result var_samp_result collect_result +Books 200.0000 200 400 2 NULL 200 Guide,Textbook 7 15 8 ["Textbook","Guide"] {"item4":"Textbook", "item8":"Guide"} 0.0000 0.0000 0.0000 NULL MULTIPOINT(4 4,8 8) +Electronics 116.6667 120 150 3 100 100 Laptop,Phone,Tablet 0 15 9 ["Phone","Tablet","Laptop"] {"item1":"Phone", "item2":"Tablet", "item5":"Laptop"} 23.5702 28.8675 555.5556 NULL MULTIPOINT(1 1,2 2,5 5) +# +# Aggregates with GROUP BY, ORDER BY, and LIMIT +# +category avg_val weighted_avg_val sum_val count_val min_val max_val group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result json_objectagg_result std_result stddev_samp_result variance_result var_samp_result collect_result +Books 200.0000 200 400 2 NULL 200 Guide,Novel,Textbook 7 15 8 ["Novel","Textbook","Guide"] {"item3":"Novel", "item4":"Textbook", "item8":"Guide"} 0.0000 0.0000 0.0000 NULL MULTIPOINT(4 4,8 8) +Electronics 116.6667 120 150 3 100 100 Laptop,Phone,Tablet 0 15 9 ["Phone","Tablet","Laptop"] {"item1":"Phone", "item2":"Tablet", "item5":"Laptop"} 23.5702 28.8675 555.5556 NULL MULTIPOINT(1 1,2 2,5 5) +# +# Aggregates with ROLLUP +# +category status avg_val weighted_avg_val sum_val count_val min_val max_val group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result json_objectagg_result std_result stddev_samp_result variance_result var_samp_result collect_result +Books active 200.0000 200 400 2 NULL 200 Guide,Textbook 7 15 8 ["Textbook","Guide"] {"item4":"Textbook", "item8":"Guide"} 0.0000 0.0000 0.0000 NULL MULTIPOINT(4 4,8 8) +Books inactive NULL 200 NULL 0 NULL NULL Novel 18446744073709551615 0 0 ["Novel"] {"item3":"Novel"} NULL NULL NULL NULL NULL +Books NULL 200.0000 200 400 2 NULL 200 Guide,Novel,Textbook 7 15 8 ["Textbook","Guide","Novel"] {"item4":"Textbook", "item8":"Guide", "item3":"Novel"} 0.0000 0.0000 0.0000 NULL MULTIPOINT(4 4,8 8) +Clothing NULL NULL 200 NULL 1 NULL 50 Shirt 3 0 3 ["Shirt"] {"item6":"Shirt"} 0.0000 NULL 0.0000 NULL NULL +Clothing active 75.0000 75 NULL 1 75 75 Pants 12 12 12 ["Pants"] {"item7":"Pants"} 0.0000 NULL 0.0000 NULL MULTIPOINT(7 7) +Clothing NULL 75.0000 75 NULL 2 75 75 Pants,Shirt 0 12 15 ["Shirt","Pants"] {"item6":"Shirt", "item7":"Pants"} 12.5000 NULL 156.2500 NULL MULTIPOINT(7 7) +Electronics active 116.6667 120 150 3 100 100 Laptop,Phone,Tablet 0 15 9 ["Phone","Tablet","Laptop"] {"item1":"Phone", "item2":"Tablet", "item5":"Laptop"} 23.5702 28.8675 555.5556 NULL MULTIPOINT(1 1,2 2,5 5) +Electronics NULL 116.6667 120 150 3 100 100 Laptop,Phone,Tablet 0 15 9 ["Phone","Tablet","Laptop"] {"item1":"Phone", "item2":"Tablet", "item5":"Laptop"} 23.5702 28.8675 555.5556 NULL MULTIPOINT(1 1,2 2,5 5) +NULL NULL 137.5000 120.3056768558952 550 7 75 200 Guide,Laptop,Novel,Pants,Phone,Shirt,Tablet,Textbook 0 15 14 ["Textbook","Guide","Novel","Shirt","Pants","Phone","Tablet","Laptop"] {"item4":"Textbook", "item8":"Guide", "item3":"Novel", "item6":"Shirt", "item7":"Pants", "item1":"Phone", "item2":"Tablet", "item5":"Laptop"} 55.0973 54.1987 3035.7143 NULL MULTIPOINT(4 4,8 8,7 7,1 1,2 2,5 5) +# +# COUNT with column names (not just *) +# +count_value count_category count_name +6 3 3 +# +# COUNT DISTINCT with multiple columns +# +count_distinct +3 +# +# FILTER on indexed column +# +# Create index on status column +avg_result weighted_avg_result sum_result count_result min_result max_result bit_and_result std_result stddev_samp_result variance_result var_samp_result collect_result +137.5000 120.3056768558952 350 3 75 150 0 49.4764 54.1987 3035.7143 NULL MULTIPOINT(1 1,2 2,5 5) +# +# Empty result set (all rows filtered out) +# +avg_result weighted_avg_result sum_result count_result min_result max_result group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result json_objectagg_result std_result stddev_samp_result variance_result var_samp_result collect_result +NULL NULL NULL 0 NULL NULL NULL 18446744073709551615 0 0 [null,null,null,null,null,null,null,null] NULL NULL NULL NULL NULL NULL +# +# FILTER with NULL conditions +# +avg_result weighted_avg_result count_result sum_result min_result max_result group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result json_objectagg_result std_result stddev_samp_result variance_result var_samp_result collect_result +NULL NULL 0 NULL NULL NULL NULL 18446744073709551615 0 0 [null,null,null,null,null,null,null,null] NULL NULL NULL NULL NULL NULL +# +# FILTER with JOINs +# +# Create second table for JOIN test +category avg_result weighted_avg_result sum_result count_result +Books 200.0000 NULL 200 2 +Electronics 150.0000 150 NULL 0 +# +# FILTER with window functions - different partitions +# +category status cat_avg status_avg cat_count status_count +Books active 200.0000 183.3333 2 4 +Books active 200.0000 183.3333 2 4 +Books inactive 200.0000 NULL 2 0 +Clothing NULL 75.0000 NULL 2 0 +Clothing active 75.0000 183.3333 2 4 +Electronics active 116.6667 183.3333 3 4 +Electronics active 116.6667 183.3333 3 4 +Electronics active 116.6667 183.3333 3 4 +# +# FILTER with empty table +# +# Create empty table +avg_result weighted_avg_result sum_result count_result min_result max_result group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result json_objectagg_result std_result stddev_samp_result variance_result var_samp_result collect_result +NULL NULL NULL 0 NULL NULL NULL 18446744073709551615 0 0 NULL NULL NULL NULL NULL NULL NULL +# +# FILTER with CASE in WHERE clause +# +avg_result weighted_avg_result sum_result count_result +137.5000 120.3056768558952 550 3 +# +# ORDER BY with FILTER aggregates +# +category avg_val weighted_avg_val sum_val +Books 200.0000 200 400 +Electronics 116.6667 120 150 +Clothing 75.0000 75 NULL +# +# Aggregates on id column itself with FILTER +# +avg_id weighted_avg_id sum_id count_id min_id max_id distinct_count_id +4.5000 3.7991266375545854 14 3 1 8 5 +# +# Aggregates on id with GROUP BY and FILTER +# +category avg_id weighted_avg_id sum_id count_id min_id max_id +Books 6.0000 5.818181818181818 12 2 NULL 8 +Clothing 6.5000 6.642857142857143 7 0 7 7 +Electronics 5.0000 5 8 1 1 5 +# +# STORED AGGREGATE FUNCTION TESTS +# +# Multiple stored aggregates with different FILTER conditions +category active_weighted inactive_weighted high_value_weighted low_value_weighted electronics_weighted +Books 200 NULL 200 NULL NULL +Clothing 75 NULL 200 66.07142857142857 NULL +Electronics 120 NULL 150 100 120 +# Stored aggregate with subquery correlation and FILTER +category correlated_weighted_avg +Books 150 +Books 150 +Clothing 75 +Clothing 75 +Electronics 150 +Electronics 150 +# Stored aggregate with all rows filtered out +category no_match_weighted +Books NULL +Clothing NULL +Electronics NULL +# Stored aggregate with complex nested conditions in FILTER +category complex_filter_weighted +Books 200 +Clothing 75 +Electronics 120 +# +# SEQUENCE ENGINE TESTS +# +# COUNT with FILTER on sequence +COUNT(*) FILTER (WHERE seq < 5) +4 +# Verify FILTER and CASE produce same results +filtered_count case_count +4 4 +# SUM with FILTER on sequence +SUM(seq) FILTER (WHERE seq < 5) +10 +# Verify FILTER and CASE produce same results +filtered_sum case_sum +10 10 +# Multiple FILTER conditions on sequence +count_le_3 count_gt_7 count_between sum_even sum_odd +3 3 3 30 25 +# Mixed filtered and unfiltered aggregates on sequence +unfiltered_count filtered_count unfiltered_sum filtered_sum +10 4 55 45 +# FILTER with different sequence ranges +COUNT(*) FILTER (WHERE seq < 50) +49 +SUM(seq) FILTER (WHERE seq BETWEEN 10 AND 20) +165 +# AVG, MIN, MAX with FILTER on sequence +total_avg filtered_avg filtered_min filtered_max +5.5000 3.0000 4 7 +# FILTER that matches no rows +COUNT(*) FILTER (WHERE seq > 100) +0 +SUM(seq) FILTER (WHERE seq < 0) +NULL +# FILTER that matches all rows +COUNT(*) FILTER (WHERE seq > 0) +10 +SUM(seq) FILTER (WHERE seq <= 10) +55 +# Sequence with window functions and FILTER +seq filtered_total even_count +1 1 0 +2 3 1 +3 6 1 +4 6 2 +5 6 2 +# +# FILTER with loose index scan (QUICK_GROUP_MIN_MAX_SELECT) +# +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +# t2 is the same as t1, but with some NULLs in the MIN/MAX column +# add rows with NULL's in the MIN/MAX column +Table Op Msg_type Msg_text +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status OK +# Queries without WHERE clause - basic MIN/MAX with FILTER +a1 MIN(a2) FILTER (WHERE a2 > 'a') +a b +b b +c b +d b +a1 MAX(a2) FILTER (WHERE a2 < 'b') +a a +b a +c a +d a +a1 a2 b MIN(c) FILTER (WHERE c > 'b') MAX(c) FILTER (WHERE c < 'n') +a a a b111 d111 +a a b e112 h112 +a b a i121 l121 +a b b m122 m122 +b a a b211 d211 +b a b e212 h212 +b b a i221 l221 +b b b m222 m222 +c a a b311 d311 +c a b e312 h312 +c b a i321 l321 +c b b m322 m322 +d a a b411 d411 +d a b e412 h412 +d b a i421 l421 +d b b m422 m422 +# Queries with predicates over GROUP BY attributes +a1 MAX(c) FILTER (WHERE c LIKE '%11%') +a d111 +a h112 +a NULL +a NULL +b d211 +b NULL +b NULL +b NULL +d d411 +d NULL +d NULL +d NULL +a1 a2 b MIN(c) FILTER (WHERE c > 'c') MAX(c) FILTER (WHERE c < 'm') +a a a c111 d111 +a a b e112 h112 +a b a i121 l121 +a b b m122 NULL +b a a c211 d211 +b a b e212 h212 +b b a i221 l221 +b b b m222 NULL +c a a c311 d311 +c a b e312 h312 +c b a i321 l321 +c b b m322 NULL +a1 a2 b MIN(c) FILTER (WHERE d != 'xy2') MAX(c) FILTER (WHERE d != 'xy3') +a a a c111 d111 +a a b e112 f112 +b a a c211 d211 +b a b e212 f212 +c a a c311 d311 +c a b e312 f312 +d a a c411 d411 +d a b e412 f412 +# IS NULL predicates +a1 a2 b MIN(c) FILTER (WHERE c > 'a5') +a a NULL a777 +c a NULL c777 +# Mixed predicates +a1 a2 b MIN(c) FILTER (WHERE d LIKE 'xy%') MAX(c) FILTER (WHERE d LIKE 'xy%') +a a b e112 h112 +b a b e212 h212 +c a b e312 h312 +c b b m322 p322 +d a b e412 h412 +d b b m422 p422 +# Test with NULLs in MIN/MAX column +a1 a2 b MIN(c) FILTER (WHERE c > 'a5') MAX(c) FILTER (WHERE c < 'z') +a a NULL a777 a999 +a a a b111 d111 +a a b e112 h112 +a b a i121 l121 +a b b m122 p122 +b a a b211 d211 +b a b e212 h212 +b b a i221 l221 +b b b m222 p222 +c a NULL c777 c999 +c a a b311 d311 +c a b e312 h312 +c b a i321 l321 +c b b m322 p322 +d a a b411 d411 +d a b e412 h412 +d b a i421 l421 +d b b m422 p422 +# Test FILTER with HAVING clause +a1 min_c +a b111 +b b211 +c b311 +d b411 +# +# Test large integer with FILTER (stolen from func_group.test :) +# +cast_min +9223372036854775807 +9223372036854775807 +NULL +# +# ERROR CASES +# +# FILTER with RANK (should error - FILTER only works with aggregates) +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'FILTER (WHERE status = 'active') OVER (ORDER BY value) FROM test_aggregates' at line 1 +# FILTER with LAG (should error - FILTER only works with aggregates) +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'FILTER (WHERE status = 'active') OVER (ORDER BY id) FROM test_aggregates' at line 1 +# OVER clause before FILTER clause (should error - correct order is FILTER then OVER) +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '(WHERE status = 'active') FROM test_aggregates' at line 1 +# Regular scalar function (should error - FILTER only for aggregates) +ERROR HY000: Incorrect usage of FILTER and NON-AGGREGATE FUNCTION +# FILTER on plain column (should error - not a function) +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '(WHERE status = 'active') FROM test_aggregates' at line 1 +# Empty FILTER clause (should error - WHERE condition required) +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near ') FROM test_aggregates' at line 1 +# Nested FILTER clauses (should error - cannot nest FILTER) +ERROR HY000: Invalid use of group function +# Multiple WHERE keywords (should error) +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'WHERE value > 100) FROM test_aggregates' at line 1 +# Using column alias in FILTER WHERE (should error - alias not available) +ERROR 42S22: Unknown column 'val' in 'SELECT' +# Window function in FILTER WHERE condition (should error - not allowed) +ERROR HY000: Incorrect usage of window function and FILTER +# This should error - aggregate in WHERE clause +ERROR HY000: Invalid use of group function +# Missing WHERE keyword (should error) +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'status = 'active') FROM test_aggregates' at line 1 +# Missing parentheses (should error) +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'WHERE status = 'active' FROM test_aggregates' at line 1 +# +# Illegal use of FILTER with non-aggregate stored function +# +# FILTER clause with non-aggregate stored function should fail +ERROR HY000: Incorrect usage of FILTER and NON-AGGREGATE FUNCTION +# +# CLEANUP +# +# +# End of aggregates FILTER test +# diff --git a/mysql-test/main/aggregates-filter.test b/mysql-test/main/aggregates-filter.test new file mode 100644 index 0000000000000..72fb133f6b321 --- /dev/null +++ b/mysql-test/main/aggregates-filter.test @@ -0,0 +1,1075 @@ +# +# MDEV-24943: Add FILTER clause +# +# Test for aggregate functions with FILTER clause. +# The FILTER clause extends aggregate functions with a WHERE condition. +# +# Test coverage includes: +# - Basic aggregate functions (AVG, SUM, COUNT, MIN, MAX) +# - Extended aggregates (GROUP_CONCAT, BIT_*, JSON_*, STD, VARIANCE) +# - DISTINCT and column/star variations +# - GROUP BY, HAVING, ORDER BY, LIMIT +# - Window functions with FILTER +# - Subqueries, JOINs, and edge cases +# - Stored aggregate functions +# - Sequence engine tables +# - MIN/MAX with FILTER and loose index scan (QUICK_GROUP_MIN_MAX_SELECT) +# - Error cases for invalid FILTER usage +# + +--source include/have_sequence.inc + +--disable_query_log +--echo # +--echo # Setup test table +--echo # +CREATE TABLE test_aggregates ( + id INT PRIMARY KEY, + category VARCHAR(50), + status VARCHAR(20), + value INT, + price DECIMAL(10,2), + amount DECIMAL(10,2) unique NOT NULL, + name VARCHAR(50), + key_name VARCHAR(50), + value_col VARCHAR(50), + bit_value INT, + extra_value float(10,2), + geom GEOMETRY +); + +INSERT INTO test_aggregates VALUES +(1, 'Electronics', 'active', 100, 299.99, 100.00, 'Phone', 'item1', 'Phone', 6, 10.00, ST_GeomFromText('POINT(1 1)')), +(2, 'Electronics', 'active', 150, 399.99, 399.99, 'Tablet', 'item2', 'Tablet', 14, 25.00, ST_GeomFromText('POINT(2 2)')), +(3, 'Books', 'inactive', NULL, 19.99, 19.99, 'Novel', 'item3', 'Novel', NULL, 15.00, ST_GeomFromText('POINT(3 3)')), +(4, 'Books', 'active', 200, 29.99, 29.99, 'Textbook', 'item4', 'Textbook', 7, 20.00, ST_GeomFromText('POINT(4 4)')), +(5, 'Electronics', 'active', 100, 499.99, 499.99, 'Laptop', 'item5', 'Laptop', 9, 30.00, ST_GeomFromText('POINT(5 5)')), +(6, 'Clothing', NULL, 50, NULL, 50.00, 'Shirt', 'item6', 'Shirt', 3, 40.00, ST_GeomFromText('POINT(6 6)')), +(7, 'Clothing', 'active', 75, 89.99, 89.99, 'Pants', 'item7', 'Pants', 12, 50.00, ST_GeomFromText('POINT(7 7)')), +(8, 'Books', 'active', 200, 24.99, 24.99, 'Guide', 'item8', 'Guide', 15, 60.00, ST_GeomFromText('POINT(8 8)')); + +--echo # +--echo # Create custom aggregate function +--echo # +delimiter |; +CREATE AGGREGATE FUNCTION weighted_avg(val INT, weight INT) RETURNS DOUBLE +BEGIN + DECLARE sum_val_weight DOUBLE DEFAULT 0; + DECLARE sum_weight DOUBLE DEFAULT 0; + DECLARE CONTINUE HANDLER FOR NOT FOUND + RETURN IF(sum_weight > 0, sum_val_weight / sum_weight, NULL); + LOOP + FETCH GROUP NEXT ROW; + SET sum_val_weight = sum_val_weight + val * weight; + SET sum_weight = sum_weight + weight; + END LOOP; +END| +delimiter ;| + + +--echo # +--echo # Basic aggregates +--echo # + +SELECT + AVG(value) FILTER (WHERE status = 'active') as avg_result, + weighted_avg(value, amount) FILTER (WHERE status = 'active') as weighted_avg_result, + SUM(value) FILTER (WHERE value > 100) as sum_result, + COUNT(*) FILTER (WHERE category = 'Electronics') as count_result, + MIN(value) FILTER (WHERE amount > 50) as min_result, + MAX(value) FILTER (WHERE value IS NOT NULL) as max_result, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE status = 'inactive') as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE value BETWEEN 100 AND 200) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE category IN ('Books', 'Electronics')) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE bit_value IS NOT NULL) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE price > 100) as json_arrayagg_result, + JSON_OBJECTAGG(key_name, value_col) FILTER (WHERE amount > 200) as json_objectagg_result, + STD(value) FILTER (WHERE value >= 100) as std_result, + STDDEV_SAMP(value) FILTER (WHERE status IS NOT NULL) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE value < 200) as variance_result, + VAR_SAMP(value) FILTER (WHERE category = 'Clothing') as var_samp_result, + ST_AsText(ST_COLLECT(geom) FILTER (WHERE status = 'active')) as collect_result +FROM test_aggregates; + +--echo # +--echo # Aggregates with DISTINCT +--echo # + +SELECT + AVG(DISTINCT value) FILTER (WHERE value > 100) as avg_result, + SUM(DISTINCT value) FILTER (WHERE status = 'active') as sum_result, + COUNT(DISTINCT category) FILTER (WHERE category = 'Electronics') as count_result, + MIN(DISTINCT value) FILTER (WHERE amount > 50) as min_result, + MAX(DISTINCT value) FILTER (WHERE id > 3) as max_result, + GROUP_CONCAT(DISTINCT name ORDER BY name SEPARATOR ',') FILTER (WHERE price > 100) as group_concat_result, + JSON_ARRAYAGG(DISTINCT category) FILTER (WHERE price > 100) as json_arrayagg_result, + ST_AsText(ST_COLLECT(DISTINCT geom) FILTER (WHERE value > 100)) as collect_result +FROM test_aggregates; + +--echo # +--echo # Aggregates with WHERE (AND) +--echo # + +SELECT + AVG(value) FILTER (WHERE status = 'active' AND value > 100) as avg_result, + weighted_avg(value, amount) FILTER (WHERE status = 'active' AND value > 100) as weighted_avg_result, + SUM(value) FILTER (WHERE category = 'Electronics' AND amount > 200) as sum_result, + COUNT(*) FILTER (WHERE value IS NOT NULL AND id > 3) as count_result, + MIN(value) FILTER (WHERE status = 'active' AND amount > 50) as min_result, + MAX(value) FILTER (WHERE category IN ('Books', 'Electronics') AND value > 0) as max_result, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE price > 100 AND status = 'active') as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE bit_value IS NOT NULL AND value > 50) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE category = 'Books' AND amount > 20) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE value BETWEEN 100 AND 200 AND status IS NOT NULL) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE amount > 100 AND category = 'Electronics') as json_arrayagg_result, + JSON_OBJECTAGG(key_name, value_col) FILTER (WHERE amount > 200 AND price > 100) as json_objectagg_result, + STD(value) FILTER (WHERE value >= 100 AND id <= 5) as std_result, + STDDEV_SAMP(value) FILTER (WHERE value > 50 AND amount > 100) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE status = 'active' AND value < 200) as variance_result, + VAR_SAMP(value) FILTER (WHERE category = 'Clothing' AND status IS NOT NULL) as var_samp_result, + ST_AsText(ST_COLLECT(geom) FILTER (WHERE status = 'active' AND value > 100)) as collect_result +FROM test_aggregates; + +--echo # +--echo # Aggregates with WHERE (OR) +--echo # + +SELECT + AVG(value) FILTER (WHERE category IN ('Books', 'Electronics') OR amount > 50) as avg_result, + weighted_avg(value, amount) FILTER (WHERE category IN ('Books', 'Electronics') OR amount > 50) as weighted_avg_result, + SUM(value) FILTER (WHERE status = 'active' OR value > 100) as sum_result, + COUNT(*) FILTER (WHERE category = 'Electronics' OR id > 5) as count_result, + MIN(value) FILTER (WHERE value IS NOT NULL OR amount > 200) as min_result, + MAX(value) FILTER (WHERE price > 100 OR status IS NOT NULL) as max_result, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE category = 'Books' OR amount BETWEEN 20 AND 100) as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE bit_value IS NOT NULL OR value >= 100) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE category IN ('Clothing', 'Electronics') OR amount < 100) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE status = 'inactive' OR value < 200) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE amount > 50 OR category = 'Clothing') as json_arrayagg_result, + JSON_OBJECTAGG(key_name, value_col) FILTER (WHERE amount > 200 OR category = 'Electronics') as json_objectagg_result, + STD(value) FILTER (WHERE value > 50 OR id <= 4) as std_result, + STDDEV_SAMP(value) FILTER (WHERE status IS NOT NULL OR amount > 100) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE category = 'Electronics' OR price IS NOT NULL) as variance_result, + VAR_SAMP(value) FILTER (WHERE category = 'Clothing' OR status IS NOT NULL) as var_samp_result, + ST_AsText(ST_COLLECT(geom) FILTER (WHERE category IN ('Books', 'Electronics') OR amount > 50)) as collect_result +FROM test_aggregates; + +--echo # +--echo # Aggregates with WHERE (LIKE) +--echo # + +SELECT + AVG(value) FILTER (WHERE status LIKE '%active%') as avg_result, + weighted_avg(value, amount) FILTER (WHERE status LIKE '%active%') as weighted_avg_result, + SUM(value) FILTER (WHERE category LIKE 'Elect%') as sum_result, + COUNT(category) FILTER (WHERE name LIKE '%t%') as count_result, + MIN(value) FILTER (WHERE category LIKE '%s') as min_result, + MAX(value) FILTER (WHERE name LIKE 'P%') as max_result, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE category LIKE 'B%') as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE name LIKE '%one') as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE category LIKE '%ics') as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE status LIKE 'a%') as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE name LIKE '%t') as json_arrayagg_result, + JSON_OBJECTAGG(key_name, value_col) FILTER (WHERE name LIKE '%t') as json_objectagg_result, + STD(value) FILTER (WHERE category LIKE 'C%') as std_result, + STDDEV_SAMP(value) FILTER (WHERE status LIKE '%e') as stddev_samp_result, + VARIANCE(value) FILTER (WHERE status LIKE '%e') as variance_result, + VAR_SAMP(value) FILTER (WHERE category LIKE 'C%') as var_samp_result, + ST_AsText(ST_COLLECT(geom) FILTER (WHERE status LIKE '%active%')) as collect_result +FROM test_aggregates; + +--echo # +--echo # Aggregates with (GROUP BY) +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') as avg_result, + weighted_avg(value, amount) FILTER (WHERE status = 'active') as weighted_avg_result, + SUM(value) FILTER (WHERE value > 100) as sum_result, + COUNT(*) FILTER (WHERE value IS NOT NULL) as count_result, + MIN(value) FILTER (WHERE amount > 50) as min_result, + MAX(value) FILTER (WHERE id > 3) as max_result, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE price > 100) as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE bit_value IS NOT NULL) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE value BETWEEN 100 AND 200) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE category IN ('Books', 'Electronics')) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE amount > 200) as json_arrayagg_result, + JSON_OBJECTAGG(key_name, value_col) FILTER (WHERE amount > 200) as json_objectagg_result, + STD(value) FILTER (WHERE value >= 100) as std_result, + STDDEV_SAMP(value) FILTER (WHERE status IS NOT NULL OR amount > 100) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE id <= 5) as variance_result, + VAR_SAMP(value) FILTER (WHERE category = 'Clothing' AND status IS NOT NULL) as var_samp_result, + ST_AsText(ST_COLLECT(geom) FILTER (WHERE status = 'active')) as collect_result +FROM test_aggregates GROUP BY category; + +SELECT category, MIN(amount) FILTER (WHERE id > 0 AND value IS NOT NULL) as min_amount FROM test_aggregates GROUP BY category; +SELECT category, MIN(category) FILTER (WHERE id > 0) as min_category FROM test_aggregates GROUP BY category; +SELECT category, AVG(extra_value+0.0e0) FILTER (WHERE extra_value > 10) as avg_value_0e0 FROM test_aggregates GROUP BY category; +SELECT category, SUM(extra_value) FILTER (WHERE extra_value > 10) as sum_value FROM test_aggregates GROUP BY category; + +--echo # +--echo # Aggregates with multiple GROUP BY columns +--echo # + +SELECT category, status, AVG(value) FILTER (WHERE value > 0) as avg_result FROM test_aggregates GROUP BY category, status; +SELECT category, status, weighted_avg(value, amount) FILTER (WHERE value > 0) as weighted_avg_result FROM test_aggregates GROUP BY category, status; +SELECT category, status, SUM(value) FILTER (WHERE amount > 0) as sum_result FROM test_aggregates GROUP BY category, status; +SELECT category, status, COUNT(*) FILTER (WHERE id > 0) as count_result FROM test_aggregates GROUP BY category, status; +SELECT category, status, MIN(value) FILTER (WHERE price > 0) as min_result FROM test_aggregates GROUP BY category, status; +SELECT category, status, MAX(value) FILTER (WHERE value IS NOT NULL) as max_result FROM test_aggregates GROUP BY category, status; +SELECT category, status, GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE category IS NOT NULL) as group_concat_result FROM test_aggregates GROUP BY category, status; +SELECT category, status, BIT_AND(bit_value) FILTER (WHERE bit_value > 0) as bit_and_result FROM test_aggregates GROUP BY category, status; +SELECT category, status, BIT_OR(bit_value) FILTER (WHERE value > 50) as bit_or_result FROM test_aggregates GROUP BY category, status; +SELECT category, status, BIT_XOR(bit_value) FILTER (WHERE id > 2) as bit_xor_result FROM test_aggregates GROUP BY category, status; +SELECT category, status, JSON_ARRAYAGG(name) FILTER (WHERE name IS NOT NULL) as json_arrayagg_result FROM test_aggregates GROUP BY category, status; +SELECT category, status, JSON_OBJECTAGG(key_name, value_col) FILTER (WHERE name IS NOT NULL) as json_objectagg_result FROM test_aggregates GROUP BY category, status; +SELECT category, status, STD(value) FILTER (WHERE value >= 0) as std_result FROM test_aggregates GROUP BY category, status; +SELECT category, status, STDDEV_SAMP(value) FILTER (WHERE status IS NOT NULL) as stddev_samp_result FROM test_aggregates GROUP BY category, status; +SELECT category, status, VARIANCE(value) FILTER (WHERE amount >= 0) as variance_result FROM test_aggregates GROUP BY category, status; +SELECT category, status, VAR_SAMP(value) FILTER (WHERE category = 'Clothing' AND status IS NOT NULL) as var_samp_result FROM test_aggregates GROUP BY category, status; +SELECT category, status, ST_AsText(ST_COLLECT(geom) FILTER (WHERE status = 'active')) as collect_result FROM test_aggregates GROUP BY category, status; + +--echo # +--echo # GROUP BY with HAVING on aggregates +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') as avg_result, + weighted_avg(value, amount) FILTER (WHERE status = 'active') as weighted_avg_result, + SUM(value) FILTER (WHERE value > 100) as sum_result, + COUNT(*) FILTER (WHERE value IS NOT NULL) as count_result, + MIN(value) FILTER (WHERE amount > 50) as min_result, + MAX(value) FILTER (WHERE id > 3) as max_result, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE name IS NOT NULL) as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE bit_value > 0) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE value > 50) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE id > 2) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE name IS NOT NULL) as json_arrayagg_result, + JSON_OBJECTAGG(key_name, value_col) FILTER (WHERE name IS NOT NULL) as json_objectagg_result, + STD(value) FILTER (WHERE value >= 0) as std_result, + STDDEV_SAMP(value) FILTER (WHERE status IS NOT NULL) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE amount >= 0) as variance_result, + VAR_SAMP(value) FILTER (WHERE category = 'Clothing' AND status IS NOT NULL) as var_samp_result, + ST_AsText(ST_COLLECT(geom) FILTER (WHERE status = 'active')) as collect_result +FROM test_aggregates GROUP BY category +HAVING AVG(value) FILTER (WHERE status = 'active') > 120 AND SUM(value) FILTER (WHERE value > 100) > 200; + +--echo # +--echo # HAVING with alias +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') as avg_value, + weighted_avg(value, amount) FILTER (WHERE status = 'active') as weighted_avg_value, + SUM(value) FILTER (WHERE value > 100) as sum_value, + COUNT(*) FILTER (WHERE value IS NOT NULL) as count_value, + MIN(value) FILTER (WHERE amount > 50) as min_value, + MAX(value) FILTER (WHERE id > 3) as max_value +FROM test_aggregates GROUP BY category HAVING avg_value > 100 AND sum_value > 200; + +--echo # +--echo # WHERE + GROUP BY + HAVING +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') as avg_result, + weighted_avg(value, amount) FILTER (WHERE status = 'active') as weighted_avg_result, + SUM(value) FILTER (WHERE value > 100) as sum_result, + COUNT(*) FILTER (WHERE value IS NOT NULL) as count_result, + MIN(value) FILTER (WHERE amount > 50) as min_result, + MAX(value) FILTER (WHERE id > 3) as max_result, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE name IS NOT NULL) as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE bit_value > 0) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE value > 50) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE id > 2) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE name IS NOT NULL) as json_arrayagg_result, + JSON_OBJECTAGG(key_name, value_col) FILTER (WHERE name IS NOT NULL) as json_objectagg_result, + STD(value) FILTER (WHERE value >= 0) as std_result, + STDDEV_SAMP(value) FILTER (WHERE status IS NOT NULL) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE amount >= 0) as variance_result, + VAR_SAMP(value) FILTER (WHERE category = 'Clothing' AND status IS NOT NULL) as var_samp_result, + ST_AsText(ST_COLLECT(geom) FILTER (WHERE status = 'active')) as collect_result +FROM test_aggregates +GROUP BY category +HAVING AVG(value) FILTER (WHERE status = 'active') > 0 AND SUM(value) FILTER (WHERE value > 100) > 0; + +--echo # +--echo # WHERE + multiple aggregates with FILTER +--echo # + +SELECT category, + AVG(value) FILTER (WHERE amount > 0) as total_avg, + weighted_avg(value, amount) FILTER (WHERE amount > 0) as total_weighted_avg, + AVG(value) FILTER (WHERE status = 'active') as active_avg, + weighted_avg(value, amount) FILTER (WHERE status = 'active') as active_weighted_avg, + SUM(value) FILTER (WHERE value > 100) as total_sum, + SUM(value) FILTER (WHERE category = 'Electronics') as active_sum, + COUNT(*) FILTER (WHERE value IS NOT NULL) as total_count, + COUNT(*) FILTER (WHERE id > 3) as active_count, + MIN(value) FILTER (WHERE price > 100) as total_min, + MIN(value) FILTER (WHERE amount > 200) as active_min, + MAX(value) FILTER (WHERE value BETWEEN 50 AND 200) as total_max, + MAX(value) FILTER (WHERE bit_value IS NOT NULL) as active_max +FROM test_aggregates +GROUP BY category; + +--echo # +--echo # WHERE + FILTER + HAVING +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') as active_avg, + weighted_avg(value, amount) FILTER (WHERE status = 'active') as active_weighted_avg, + SUM(value) FILTER (WHERE value > 100) as active_sum, + COUNT(*) FILTER (WHERE category = 'Electronics') as active_count, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE name IS NOT NULL) as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE bit_value > 0) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE value > 50) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE id > 2) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE name IS NOT NULL) as json_arrayagg_result, + JSON_OBJECTAGG(key_name, value_col) FILTER (WHERE name IS NOT NULL) as json_objectagg_result, + STD(value) FILTER (WHERE value >= 0) as std_result, + STDDEV_SAMP(value) FILTER (WHERE status IS NOT NULL) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE amount >= 0) as variance_result, + VAR_SAMP(value) FILTER (WHERE category = 'Clothing' AND status IS NOT NULL) as var_samp_result, + ST_AsText(ST_COLLECT(geom) FILTER (WHERE status = 'active')) as collect_result +FROM test_aggregates +WHERE value IS NOT NULL +GROUP BY category +HAVING AVG(value) FILTER (WHERE value IS NOT NULL) > 1 AND SUM(value) FILTER (WHERE amount > 0) > 1; + +--echo # +--echo # WHERE + window function (OVER) +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') OVER (PARTITION BY category) as cat_avg, + SUM(value) FILTER (WHERE value > 100) OVER (PARTITION BY category) as cat_sum, + COUNT(*) FILTER (WHERE value IS NOT NULL) OVER (PARTITION BY category) as cat_count, + MIN(value) FILTER (WHERE amount > 50) OVER (PARTITION BY category) as cat_min, + MAX(value) FILTER (WHERE id > 3) OVER (PARTITION BY category) as cat_max, + BIT_AND(bit_value) FILTER (WHERE bit_value > 0) OVER (PARTITION BY category) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE value > 50) OVER (PARTITION BY category) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE id > 2) OVER (PARTITION BY category) as bit_xor_result, + STD(value) FILTER (WHERE value >= 0) OVER (PARTITION BY category) as std_result, + STDDEV_SAMP(value) FILTER (WHERE status IS NOT NULL) OVER (PARTITION BY category) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE amount >= 0) OVER (PARTITION BY category) as variance_result, + VAR_SAMP(value) FILTER (WHERE category = 'Clothing' AND status IS NOT NULL) OVER (PARTITION BY category) as var_samp_result, + ST_AsText(ST_COLLECT(geom) FILTER (WHERE status = 'active') OVER (PARTITION BY category)) as collect_result +FROM test_aggregates; + +--echo # +--echo # WHERE + window function with ORDER BY +--echo # + +SELECT id, + AVG(value) FILTER (WHERE value IS NOT NULL) OVER (ORDER BY id) as running_avg, + SUM(value) FILTER (WHERE status = 'active') OVER (ORDER BY id) as running_sum, + COUNT(*) FILTER (WHERE amount > 0) OVER (ORDER BY id) as running_count, + MIN(value) FILTER (WHERE value > 50) OVER (ORDER BY id) as running_min, + MAX(value) FILTER (WHERE id > 0) OVER (ORDER BY id) as running_max, + BIT_AND(bit_value) FILTER (WHERE bit_value > 0) OVER (ORDER BY id) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE value > 50) OVER (ORDER BY id) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE id > 2) OVER (ORDER BY id) as bit_xor_result, + STD(value) FILTER (WHERE value >= 0) OVER (ORDER BY id) as std_result, + STDDEV_SAMP(value) FILTER (WHERE status IS NOT NULL) OVER (ORDER BY id) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE amount >= 0) OVER (ORDER BY id) as variance_result, + VAR_SAMP(value) FILTER (WHERE category = 'Clothing' AND status IS NOT NULL) OVER (ORDER BY id) as var_samp_result, + ST_AsText(ST_COLLECT(geom) FILTER (WHERE status = 'active') OVER (ORDER BY id)) as collect_result +FROM test_aggregates; + +--echo # +--echo # WHERE + window function with frame specification +--echo # + +SELECT id, + AVG(value) FILTER (WHERE status = 'active') OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as windowed_avg, + SUM(value) FILTER (WHERE value > 100) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as windowed_sum, + COUNT(*) FILTER (WHERE value IS NOT NULL) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as windowed_count, + MIN(value) FILTER (WHERE amount > 50) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as windowed_min, + MAX(value) FILTER (WHERE id > 3) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as windowed_max, + BIT_AND(bit_value) FILTER (WHERE bit_value > 0) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE value > 50) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE id > 2) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as bit_xor_result, + STD(value) FILTER (WHERE value >= 0) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as std_result, + STDDEV_SAMP(value) FILTER (WHERE status IS NOT NULL) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE amount >= 0) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as variance_result, + VAR_SAMP(value) FILTER (WHERE category = 'Clothing' AND status IS NOT NULL) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as var_samp_result, + ST_AsText(ST_COLLECT(geom) FILTER (WHERE status = 'active') OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)) as collect_result +FROM test_aggregates; + +--echo # +--echo # Subquery with aggregates in WHERE +--echo # + +SELECT category, status, value, price, amount, name, key_name, value_col, bit_value, ST_AsText(geom) as geom FROM test_aggregates +WHERE category IN ( + SELECT category FROM test_aggregates GROUP BY category + HAVING AVG(value) FILTER (WHERE status = 'active') > 120 AND SUM(value) FILTER (WHERE value > 100) > 200 +); + +--echo # +--echo # Correlated subquery with FILTER in SELECT list +--echo # + +SELECT + t1.category, + ( + SELECT SUM(t2.value) FILTER (WHERE t2.amount > 50) + FROM test_aggregates t2 + WHERE t2.category = t1.category + ) AS filtered_sum, + ( + SELECT COUNT(*) FILTER (WHERE t2.status = 'active') + FROM test_aggregates t2 + WHERE t2.category = t1.category + ) AS filtered_count +FROM test_aggregates t1 +GROUP BY t1.category; + +--echo # +--echo # Derived table (FROM subquery) with FILTER +--echo # + +SELECT category, filtered_avg, filtered_sum +FROM ( + SELECT + category, + AVG(value) FILTER (WHERE status = 'active') AS filtered_avg, + SUM(value) FILTER (WHERE value > 100) AS filtered_sum + FROM test_aggregates + GROUP BY category +) AS derived_table +WHERE filtered_avg IS NOT NULL; + +--echo # +--echo # Scalar subquery with FILTER +--echo # + +SELECT + category, + value, + (SELECT AVG(value) FILTER (WHERE status = 'active') FROM test_aggregates) AS global_active_avg, + value - (SELECT AVG(value) FILTER (WHERE status = 'active') FROM test_aggregates) AS diff_from_avg +FROM test_aggregates; + +SELECT + AVG(value) FILTER (WHERE value > (SELECT AVG(value) FROM test_aggregates)) as avg_result +FROM test_aggregates; + +--echo # +--echo # IN subquery in FILTER WHERE +--echo # + +SELECT + COUNT(*) FILTER (WHERE category IN (SELECT DISTINCT category FROM test_aggregates WHERE status = 'active')) as count_result +FROM test_aggregates; + +--echo # +--echo # AVG with STD in subquery +--echo # + +SELECT AVG(x) FROM (SELECT STD(value) FILTER (WHERE 1=1) as x FROM test_aggregates GROUP BY category) t; + +--echo # +--echo # Aggregates with GROUP BY and ORDER BY +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') as avg_val, + weighted_avg(value, amount) FILTER (WHERE status = 'active') as weighted_avg_val, + SUM(value) FILTER (WHERE value > 100) as sum_val, + COUNT(*) FILTER (WHERE value IS NOT NULL) as count_val, + MIN(value) FILTER (WHERE amount > 50) as min_val, + MAX(value) FILTER (WHERE id > 3) as max_val, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE name IS NOT NULL) as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE bit_value > 0) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE value > 50) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE id > 2) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE name IS NOT NULL) as json_arrayagg_result, + JSON_OBJECTAGG(key_name, value_col) FILTER (WHERE name IS NOT NULL) as json_objectagg_result, + STD(value) FILTER (WHERE value >= 0) as std_result, + STDDEV_SAMP(value) FILTER (WHERE status IS NOT NULL) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE amount >= 0) as variance_result, + VAR_SAMP(value) FILTER (WHERE category = 'Clothing' AND status IS NOT NULL) as var_samp_result, + ST_AsText(ST_COLLECT(geom) FILTER (WHERE status = 'active')) as collect_result +FROM test_aggregates +GROUP BY category +ORDER BY avg_val DESC, weighted_avg_val DESC, sum_val DESC; + +--echo # +--echo # Complete query with WHERE, GROUP BY, HAVING, ORDER BY +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') as avg_val, + weighted_avg(value, amount) FILTER (WHERE status = 'active') as weighted_avg_val, + SUM(value) FILTER (WHERE value > 100) as sum_val, + COUNT(*) FILTER (WHERE value IS NOT NULL) as count_val, + MIN(value) FILTER (WHERE amount > 50) as min_val, + MAX(value) FILTER (WHERE id > 3) as max_val, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE name IS NOT NULL) as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE bit_value > 0) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE value > 50) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE id > 2) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE name IS NOT NULL) as json_arrayagg_result, + JSON_OBJECTAGG(key_name, value_col) FILTER (WHERE name IS NOT NULL) as json_objectagg_result, + STD(value) FILTER (WHERE value >= 0) as std_result, + STDDEV_SAMP(value) FILTER (WHERE status IS NOT NULL) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE amount >= 0) as variance_result, + VAR_SAMP(value) FILTER (WHERE category = 'Clothing' AND status IS NOT NULL) as var_samp_result, + ST_AsText(ST_COLLECT(geom) FILTER (WHERE status = 'active')) as collect_result +FROM test_aggregates +WHERE status = 'active' +GROUP BY category +HAVING AVG(value) FILTER (WHERE status = 'active') > 0 AND SUM(value) FILTER (WHERE value > 100) > 0 +ORDER BY avg_val DESC, sum_val DESC; + +--echo # +--echo # Aggregates with GROUP BY, ORDER BY, and LIMIT +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') as avg_val, + weighted_avg(value, amount) FILTER (WHERE status = 'active') as weighted_avg_val, + SUM(value) FILTER (WHERE value > 100) as sum_val, + COUNT(*) FILTER (WHERE value IS NOT NULL) as count_val, + MIN(value) FILTER (WHERE amount > 50) as min_val, + MAX(value) FILTER (WHERE id > 3) as max_val, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE name IS NOT NULL) as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE bit_value > 0) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE value > 50) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE id > 2) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE name IS NOT NULL) as json_arrayagg_result, + JSON_OBJECTAGG(key_name, value_col) FILTER (WHERE name IS NOT NULL) as json_objectagg_result, + STD(value) FILTER (WHERE value >= 0) as std_result, + STDDEV_SAMP(value) FILTER (WHERE status IS NOT NULL) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE amount >= 0) as variance_result, + VAR_SAMP(value) FILTER (WHERE category = 'Clothing' AND status IS NOT NULL) as var_samp_result, + ST_AsText(ST_COLLECT(geom) FILTER (WHERE status = 'active')) as collect_result +FROM test_aggregates +GROUP BY category +ORDER BY AVG(value) FILTER (WHERE status = 'active') DESC, weighted_avg(value, amount) FILTER (WHERE status = 'active') DESC +LIMIT 2; + +--echo # +--echo # Aggregates with ROLLUP +--echo # + +SELECT category, status, + AVG(value) FILTER (WHERE status = 'active') as avg_val, + weighted_avg(value, amount) FILTER (WHERE status = 'active') as weighted_avg_val, + SUM(value) FILTER (WHERE value > 100) as sum_val, + COUNT(*) FILTER (WHERE value IS NOT NULL) as count_val, + MIN(value) FILTER (WHERE amount > 50) as min_val, + MAX(value) FILTER (WHERE id > 3) as max_val, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE name IS NOT NULL) as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE bit_value > 0) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE value > 50) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE id > 2) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE name IS NOT NULL) as json_arrayagg_result, + JSON_OBJECTAGG(key_name, value_col) FILTER (WHERE name IS NOT NULL) as json_objectagg_result, + STD(value) FILTER (WHERE value >= 0) as std_result, + STDDEV_SAMP(value) FILTER (WHERE status IS NOT NULL) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE amount >= 0) as variance_result, + VAR_SAMP(value) FILTER (WHERE category = 'Clothing' AND status IS NOT NULL) as var_samp_result, + ST_AsText(ST_COLLECT(geom) FILTER (WHERE status = 'active')) as collect_result +FROM test_aggregates +GROUP BY category, status WITH ROLLUP; + +--echo # +--echo # COUNT with column names (not just *) +--echo # + +SELECT + COUNT(value) FILTER (WHERE status = 'active') as count_value, + COUNT(category) FILTER (WHERE value > 100) as count_category, + COUNT(name) FILTER (WHERE category = 'Electronics') as count_name +FROM test_aggregates; + +--echo # +--echo # COUNT DISTINCT with multiple columns +--echo # + +SELECT + COUNT(DISTINCT category, status) FILTER (WHERE status = 'active' OR value > 100) as count_distinct +FROM test_aggregates; + +--echo # +--echo # FILTER on indexed column +--echo # + +--echo # Create index on status column +CREATE INDEX idx_status ON test_aggregates(status); +CREATE INDEX idx_category ON test_aggregates(category); + +SELECT + AVG(value) FILTER (WHERE status = 'active') as avg_result, + weighted_avg(value, amount) FILTER (WHERE status = 'active') as weighted_avg_result, + SUM(value) FILTER (WHERE category = 'Electronics') as sum_result, + COUNT(*) FILTER (WHERE category = 'Electronics') as count_result, + MIN(value) FILTER (WHERE status = 'active') as min_result, + MAX(value) FILTER (WHERE category = 'Electronics') as max_result, + BIT_AND(bit_value) FILTER (WHERE bit_value > 0) as bit_and_result, + STD(value) FILTER (WHERE status = 'active') as std_result, + STDDEV_SAMP(value) FILTER (WHERE status IS NOT NULL) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE amount >= 0) as variance_result, + VAR_SAMP(value) FILTER (WHERE category = 'Clothing' AND status IS NOT NULL) as var_samp_result, + ST_AsText(ST_COLLECT(geom) FILTER (WHERE category = 'Electronics')) as collect_result +FROM test_aggregates; + +--echo # +--echo # Empty result set (all rows filtered out) +--echo # + +SELECT + AVG(value) FILTER (WHERE 1=0) as avg_result, + weighted_avg(value, amount) FILTER (WHERE 1=0) as weighted_avg_result, + SUM(value) FILTER (WHERE 2=1) as sum_result, + COUNT(*) FILTER (WHERE 3=2) as count_result, + MIN(value) FILTER (WHERE 4=3) as min_result, + MAX(value) FILTER (WHERE 5=4) as max_result, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE 0) as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE bit_value < 0) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE bit_value < 0) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE bit_value < 0) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE 0) as json_arrayagg_result, + JSON_OBJECTAGG(key_name, value_col) FILTER (WHERE 0) as json_objectagg_result, + STD(value) FILTER (WHERE 5=4) as std_result, + STDDEV_SAMP(value) FILTER (WHERE 5=4) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE 5=4) as variance_result, + VAR_SAMP(value) FILTER (WHERE 5=4) as var_samp_result, + ST_AsText(ST_COLLECT(geom) FILTER (WHERE 5=4)) as collect_result +FROM test_aggregates; + +--echo # +--echo # FILTER with NULL conditions +--echo # + +SELECT + AVG(value) FILTER (WHERE NULL) as avg_result, + weighted_avg(value, amount) FILTER (WHERE NULL) as weighted_avg_result, + COUNT(*) FILTER (WHERE NULL) as count_result, + SUM(value) FILTER (WHERE NULL) as sum_result, + MIN(value) FILTER (WHERE NULL) as min_result, + MAX(value) FILTER (WHERE NULL) as max_result, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE NULL) as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE NULL) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE NULL) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE NULL) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE NULL) as json_arrayagg_result, + JSON_OBJECTAGG(key_name, value_col) FILTER (WHERE NULL) as json_objectagg_result, + STD(value) FILTER (WHERE NULL) as std_result, + STDDEV_SAMP(value) FILTER (WHERE NULL) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE NULL) as variance_result, + VAR_SAMP(value) FILTER (WHERE NULL) as var_samp_result, + ST_AsText(ST_COLLECT(geom) FILTER (WHERE NULL)) as collect_result +FROM test_aggregates; + +--echo # +--echo # FILTER with JOINs +--echo # + +--echo # Create second table for JOIN test +CREATE TABLE test_aggregates2 ( + id INT PRIMARY KEY, + ref_id INT, + extra_value INT +); + +INSERT INTO test_aggregates2 VALUES +(1, 1, 10), +(2, 2, 20), +(3, 3, 30), +(4, 4, 40); + +SELECT + t1.category, + AVG(t1.value) FILTER (WHERE t2.extra_value > 15) as avg_result, + weighted_avg(t1.value, t1.amount) FILTER (WHERE t2.extra_value > 15) as weighted_avg_result, + SUM(t1.value) FILTER (WHERE t2.extra_value > 20) as sum_result, + COUNT(*) FILTER (WHERE t2.extra_value > 25) as count_result +FROM test_aggregates t1 +JOIN test_aggregates2 t2 ON t1.id = t2.ref_id +GROUP BY t1.category; + +DROP TABLE test_aggregates2; + +--echo # +--echo # FILTER with window functions - different partitions +--echo # + +SELECT category, status, + AVG(value) FILTER (WHERE status = 'active') OVER (PARTITION BY category) as cat_avg, + AVG(value) FILTER (WHERE value > 100) OVER (PARTITION BY status) as status_avg, + COUNT(*) FILTER (WHERE value IS NOT NULL) OVER (PARTITION BY category) as cat_count, + COUNT(*) FILTER (WHERE amount > 50) OVER (PARTITION BY status) as status_count +FROM test_aggregates +ORDER BY category, status; + +--echo # +--echo # FILTER with empty table +--echo # + +--echo # Create empty table +CREATE TABLE empty_test ( + id INT, + value INT, + status VARCHAR(20) +); + +SELECT + AVG(value) FILTER (WHERE status = 'active') as avg_result, + weighted_avg(value, id) FILTER (WHERE status = 'active') as weighted_avg_result, + SUM(value) FILTER (WHERE value > 100) as sum_result, + COUNT(*) FILTER (WHERE value IS NOT NULL) as count_result, + MIN(value) FILTER (WHERE id > 2) as min_result, + MAX(value) FILTER (WHERE id > 3) as max_result, + GROUP_CONCAT(status ORDER BY status SEPARATOR ',') FILTER (WHERE status IS NOT NULL) as group_concat_result, + BIT_AND(value) FILTER (WHERE value > 0) as bit_and_result, + BIT_OR(value) FILTER (WHERE value > 50) as bit_or_result, + BIT_XOR(value) FILTER (WHERE id > 2) as bit_xor_result, + JSON_ARRAYAGG(status) FILTER (WHERE status IS NOT NULL) as json_arrayagg_result, + JSON_OBJECTAGG(status, value) FILTER (WHERE status IS NOT NULL) as json_objectagg_result, + STD(value) FILTER (WHERE value >= 0) as std_result, + STDDEV_SAMP(value) FILTER (WHERE status IS NOT NULL) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE value >= 0) as variance_result, + VAR_SAMP(value) FILTER (WHERE value >= 0) as var_samp_result, + ST_AsText(ST_COLLECT(value) FILTER (WHERE value >= 0)) as collect_result +FROM empty_test; + +DROP TABLE empty_test; + +--echo # +--echo # FILTER with CASE in WHERE clause +--echo # + +SELECT + AVG(value) FILTER (WHERE CASE WHEN status = 'active' THEN 1 ELSE 0 END = 1) as avg_result, + weighted_avg(value, amount) FILTER (WHERE CASE WHEN status = 'active' THEN 1 ELSE 0 END = 1) as weighted_avg_result, + SUM(value) FILTER (WHERE CASE WHEN value > 100 THEN 1 ELSE 0 END = 1) as sum_result, + COUNT(*) FILTER (WHERE CASE WHEN category = 'Electronics' THEN 1 ELSE 0 END = 1) as count_result +FROM test_aggregates; + +--echo # +--echo # ORDER BY with FILTER aggregates +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') as avg_val, + weighted_avg(value, amount) FILTER (WHERE status = 'active') as weighted_avg_val, + SUM(value) FILTER (WHERE value > 100) as sum_val +FROM test_aggregates +GROUP BY category +ORDER BY AVG(value) FILTER (WHERE status = 'active') DESC, weighted_avg(value, amount) FILTER (WHERE status = 'active') DESC, SUM(value) FILTER (WHERE value > 100) DESC; + +--echo # +--echo # Aggregates on id column itself with FILTER +--echo # + +SELECT + AVG(id) FILTER (WHERE status = 'active') as avg_id, + weighted_avg(id, amount) FILTER (WHERE status = 'active') as weighted_avg_id, + SUM(id) FILTER (WHERE value > 100) as sum_id, + COUNT(id) FILTER (WHERE category = 'Electronics') as count_id, + MIN(id) FILTER (WHERE amount > 50) as min_id, + MAX(id) FILTER (WHERE value IS NOT NULL) as max_id, + COUNT(DISTINCT id) FILTER (WHERE id > 3) as distinct_count_id +FROM test_aggregates; + +--echo # +--echo # Aggregates on id with GROUP BY and FILTER +--echo # + +SELECT category, + AVG(id) FILTER (WHERE id > 3) as avg_id, + weighted_avg(id, amount) FILTER (WHERE id > 3) as weighted_avg_id, + SUM(id) FILTER (WHERE status = 'active') as sum_id, + COUNT(id) FILTER (WHERE value > 100) as count_id, + MIN(id) FILTER (WHERE amount > 50) as min_id, + MAX(id) FILTER (WHERE value IS NOT NULL) as max_id +FROM test_aggregates +GROUP BY category; + +--echo # +--echo # STORED AGGREGATE FUNCTION TESTS +--echo # + +--echo # Multiple stored aggregates with different FILTER conditions +SELECT + category, + weighted_avg(value, amount) FILTER (WHERE status = 'active') as active_weighted, + weighted_avg(value, amount) FILTER (WHERE status = 'inactive') as inactive_weighted, + weighted_avg(value, amount) FILTER (WHERE value > 100) as high_value_weighted, + weighted_avg(value, amount) FILTER (WHERE value <= 100) as low_value_weighted, + weighted_avg(value, amount) FILTER (WHERE category = 'Electronics') as electronics_weighted +FROM test_aggregates +GROUP BY category; + +--echo # Stored aggregate with subquery correlation and FILTER +SELECT + t1.category, + ( + SELECT weighted_avg(t2.value, t2.amount) FILTER (WHERE t2.value > t1.value) + FROM test_aggregates t2 + WHERE t2.category = t1.category + ) as correlated_weighted_avg +FROM test_aggregates t1 +GROUP BY t1.category, t1.value +ORDER BY t1.category, t1.value; + +--echo # Stored aggregate with all rows filtered out +SELECT + category, + weighted_avg(value, amount) FILTER (WHERE 1 = 0) as no_match_weighted +FROM test_aggregates +GROUP BY category; + +--echo # Stored aggregate with complex nested conditions in FILTER +SELECT + category, + weighted_avg(value, amount) FILTER ( + WHERE (status = 'active' AND value > 50) + OR (category = 'Electronics' AND amount > 200) + OR (value BETWEEN 100 AND 200 AND bit_value IS NOT NULL) + ) as complex_filter_weighted +FROM test_aggregates +GROUP BY category; + +--echo # +--echo # SEQUENCE ENGINE TESTS +--echo # + +--echo # COUNT with FILTER on sequence +SELECT COUNT(*) FILTER (WHERE seq < 5) FROM seq_1_to_10; + +--echo # Verify FILTER and CASE produce same results +SELECT + COUNT(*) FILTER (WHERE seq < 5) AS filtered_count, + SUM(CASE WHEN seq < 5 THEN 1 ELSE 0 END) AS case_count +FROM seq_1_to_10; + +--echo # SUM with FILTER on sequence +SELECT SUM(seq) FILTER (WHERE seq < 5) FROM seq_1_to_10; + +--echo # Verify FILTER and CASE produce same results +SELECT + SUM(seq) FILTER (WHERE seq < 5) AS filtered_sum, + SUM(CASE WHEN seq < 5 THEN seq ELSE 0 END) AS case_sum +FROM seq_1_to_10; + +--echo # Multiple FILTER conditions on sequence +SELECT + COUNT(*) FILTER (WHERE seq <= 3) AS count_le_3, + COUNT(*) FILTER (WHERE seq > 7) AS count_gt_7, + COUNT(*) FILTER (WHERE seq BETWEEN 4 AND 6) AS count_between, + SUM(seq) FILTER (WHERE seq % 2 = 0) AS sum_even, + SUM(seq) FILTER (WHERE seq % 2 = 1) AS sum_odd +FROM seq_1_to_10; + +--echo # Mixed filtered and unfiltered aggregates on sequence +SELECT + COUNT(*) AS unfiltered_count, + COUNT(*) FILTER (WHERE seq < 5) AS filtered_count, + SUM(seq) AS unfiltered_sum, + SUM(seq) FILTER (WHERE seq >= 5) AS filtered_sum +FROM seq_1_to_10; + +--echo # FILTER with different sequence ranges +SELECT COUNT(*) FILTER (WHERE seq < 50) FROM seq_1_to_100; +SELECT SUM(seq) FILTER (WHERE seq BETWEEN 10 AND 20) FROM seq_1_to_100; + +--echo # AVG, MIN, MAX with FILTER on sequence +SELECT + AVG(seq) AS total_avg, + AVG(seq) FILTER (WHERE seq <= 5) AS filtered_avg, + MIN(seq) FILTER (WHERE seq > 3) AS filtered_min, + MAX(seq) FILTER (WHERE seq < 8) AS filtered_max +FROM seq_1_to_10; + +--echo # FILTER that matches no rows +SELECT COUNT(*) FILTER (WHERE seq > 100) FROM seq_1_to_10; +SELECT SUM(seq) FILTER (WHERE seq < 0) FROM seq_1_to_10; + +--echo # FILTER that matches all rows +SELECT COUNT(*) FILTER (WHERE seq > 0) FROM seq_1_to_10; +SELECT SUM(seq) FILTER (WHERE seq <= 10) FROM seq_1_to_10; + +--echo # Sequence with window functions and FILTER +SELECT + seq, + SUM(seq) FILTER (WHERE seq <= 3) OVER (ORDER BY seq) AS filtered_total, + COUNT(*) FILTER (WHERE seq % 2 = 0) OVER (ORDER BY seq) AS even_count +FROM seq_1_to_5; + + +--echo # +--echo # FILTER with loose index scan (QUICK_GROUP_MIN_MAX_SELECT) +--echo # + +CREATE TABLE t1 ( + a1 CHAR(64), a2 CHAR(64), b CHAR(16), c CHAR(16) NOT NULL, d CHAR(16), dummy CHAR(64) DEFAULT ' ' +) charset=latin1; + +INSERT INTO t1 (a1, a2, b, c, d) VALUES +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'), +('d','a','a','a411','xy1'),('d','a','a','b411','xy2'),('d','a','a','c411','xy3'),('d','a','a','d411','xy4'), +('d','a','b','e412','xy1'),('d','a','b','f412','xy2'),('d','a','b','g412','xy3'),('d','a','b','h412','xy4'), +('d','b','a','i421','xy1'),('d','b','a','j421','xy2'),('d','b','a','k421','xy3'),('d','b','a','l421','xy4'), +('d','b','b','m422','xy1'),('d','b','b','n422','xy2'),('d','b','b','o422','xy3'),('d','b','b','p422','xy4'); + +CREATE INDEX idx_t1_0 ON t1 (a1); +CREATE INDEX idx_t1_1 ON t1 (a1,a2,b,c); +CREATE INDEX idx_t1_2 ON t1 (a1,a2,b); +ANALYZE TABLE t1; + +--echo # t2 is the same as t1, but with some NULLs in the MIN/MAX column +CREATE TABLE t2 ( + a1 CHAR(64), a2 CHAR(64) NOT NULL, b CHAR(16), c CHAR(16), d CHAR(16), dummy CHAR(64) DEFAULT ' ' +) charset=latin1; + +INSERT INTO t2 SELECT * FROM t1; + +--echo # add rows with NULL's in the MIN/MAX column +INSERT INTO t2 (a1, a2, b, c, d) VALUES +('a','a',NULL,'a777','xyz'),('a','a',NULL,'a888','xyz'),('a','a',NULL,'a999','xyz'), +('a','a','a',NULL,'xyz'), +('a','a','b',NULL,'xyz'), +('a','b','a',NULL,'xyz'), +('c','a',NULL,'c777','xyz'),('c','a',NULL,'c888','xyz'),('c','a',NULL,'c999','xyz'), +('d','b','b',NULL,'xyz'); + +CREATE INDEX idx_t2_0 ON t2 (a1); +CREATE INDEX idx_t2_1 ON t2 (a1,a2,b,c); +CREATE INDEX idx_t2_2 ON t2 (a1,a2,b); +ANALYZE TABLE t2; + +--echo # Queries without WHERE clause - basic MIN/MAX with FILTER +SELECT a1, MIN(a2) FILTER (WHERE a2 > 'a') FROM t1 GROUP BY a1; +SELECT a1, MAX(a2) FILTER (WHERE a2 < 'b') FROM t1 GROUP BY a1; +SELECT a1, a2, b, MIN(c) FILTER (WHERE c > 'b'), MAX(c) FILTER (WHERE c < 'n') FROM t1 GROUP BY a1,a2,b; + +--echo # Queries with predicates over GROUP BY attributes +SELECT a1, MAX(c) FILTER (WHERE c LIKE '%11%') FROM t1 WHERE a1 IN ('a','b','d') GROUP BY a1,a2,b; +SELECT a1,a2,b,MIN(c) FILTER (WHERE c > 'c'),MAX(c) FILTER (WHERE c < 'm') FROM t1 WHERE a1 < 'd' GROUP BY a1,a2,b; +SELECT a1,a2,b,MIN(c) FILTER (WHERE d != 'xy2'),MAX(c) FILTER (WHERE d != 'xy3') FROM t1 WHERE (c > 'b111') AND (c <= 'g112') GROUP BY a1,a2,b; + +--echo # IS NULL predicates +SELECT a1,a2,b,MIN(c) FILTER (WHERE c > 'a5') FROM t2 WHERE (a2 = 'a') AND b IS NULL GROUP BY a1; + +--echo # Mixed predicates +SELECT a1,a2,b,MIN(c) FILTER (WHERE d LIKE 'xy%'),MAX(c) FILTER (WHERE d LIKE 'xy%') FROM t1 WHERE (a1 >= 'c' OR a2 < 'b') AND (b > 'a') GROUP BY a1,a2,b; + +--echo # Test with NULLs in MIN/MAX column +SELECT a1,a2,b,MIN(c) FILTER (WHERE c > 'a5'),MAX(c) FILTER (WHERE c < 'z') FROM t2 GROUP BY a1,a2,b; + +--echo # Test FILTER with HAVING clause +SELECT a1, MIN(c) FILTER (WHERE d > 'xy1') AS min_c FROM t1 GROUP BY a1 HAVING MIN(c) FILTER (WHERE d > 'xy1') > 'b1'; + +--echo # +--echo # Test large integer with FILTER (stolen from func_group.test :) +--echo # + +CREATE TABLE t1_min (id INT, a DOUBLE); +INSERT INTO t1_min VALUES (1, 0x7FFFFFFFFFFFFFFF),(2, 0x7FFFFFFFFFFFFFFF), (3, NULL); +SELECT + CAST(MIN(a) FILTER (WHERE a IS NOT NULL) AS SIGNED) AS cast_min +FROM t1_min GROUP BY id; +DROP TABLE t1_min; + +--echo # +--echo # ERROR CASES +--echo # + +--echo # FILTER with RANK (should error - FILTER only works with aggregates) +--error ER_PARSE_ERROR +SELECT RANK() FILTER (WHERE status = 'active') OVER (ORDER BY value) FROM test_aggregates; + +--echo # FILTER with LAG (should error - FILTER only works with aggregates) +--error ER_PARSE_ERROR +SELECT LAG(value) FILTER (WHERE status = 'active') OVER (ORDER BY id) FROM test_aggregates; + +--echo # OVER clause before FILTER clause (should error - correct order is FILTER then OVER) +--error ER_PARSE_ERROR +SELECT AVG(value) OVER (PARTITION BY category) FILTER (WHERE status = 'active') FROM test_aggregates; + +--echo # Regular scalar function (should error - FILTER only for aggregates) +--error ER_WRONG_USAGE +SELECT UPPER(name) FILTER (WHERE status = 'active') FROM test_aggregates; + +--echo # FILTER on plain column (should error - not a function) +--error ER_PARSE_ERROR +SELECT value FILTER (WHERE status = 'active') FROM test_aggregates; + +--echo # Empty FILTER clause (should error - WHERE condition required) +--error ER_PARSE_ERROR +SELECT AVG(value) FILTER () FROM test_aggregates; + +--echo # Nested FILTER clauses (should error - cannot nest FILTER) +--error ER_INVALID_GROUP_FUNC_USE +SELECT AVG(value) FILTER (WHERE COUNT(*) FILTER (WHERE status = 'active') > 5) FROM test_aggregates; + +--echo # Multiple WHERE keywords (should error) +--error ER_PARSE_ERROR +SELECT AVG(value) FILTER (WHERE status = 'active' WHERE value > 100) FROM test_aggregates; + +--echo # Using column alias in FILTER WHERE (should error - alias not available) +--error ER_BAD_FIELD_ERROR +SELECT value as val, AVG(value) FILTER (WHERE val > 100) FROM test_aggregates GROUP BY value; + +--echo # Window function in FILTER WHERE condition (should error - not allowed) +--error ER_WRONG_USAGE +SELECT AVG(value) FILTER (WHERE ROW_NUMBER() OVER (ORDER BY id) > 2) FROM test_aggregates; + +--echo # This should error - aggregate in WHERE clause +--error ER_INVALID_GROUP_FUNC_USE +SELECT category, AVG(value) FILTER (WHERE value > AVG(value)) FROM test_aggregates GROUP BY category; + +--echo # Missing WHERE keyword (should error) +--error ER_PARSE_ERROR +SELECT AVG(value) FILTER (status = 'active') FROM test_aggregates; + +--echo # Missing parentheses (should error) +--error ER_PARSE_ERROR +SELECT AVG(value) FILTER WHERE status = 'active' FROM test_aggregates; + +--echo # +--echo # Illegal use of FILTER with non-aggregate stored function +--echo # + +delimiter |; +CREATE FUNCTION simple_double(x INT) RETURNS INT +DETERMINISTIC +BEGIN + RETURN x * 2; +END| +delimiter ;| + +--echo # FILTER clause with non-aggregate stored function should fail +--error ER_WRONG_USAGE +SELECT simple_double(value) FILTER (WHERE status = 'active') FROM test_aggregates; + +--echo # +--echo # CLEANUP +--echo # + +DROP FUNCTION weighted_avg; +DROP FUNCTION simple_double; +DROP TABLE test_aggregates, t1, t2; +--enable_query_log + +--echo # +--echo # End of aggregates FILTER test +--echo # + diff --git a/sql/item_func.cc b/sql/item_func.cc index 81d514d457f28..b4b68652d9063 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -6683,7 +6683,8 @@ longlong Item_func_row_count::val_int() Item_func_sp::Item_func_sp(THD *thd, Name_resolution_context *context_arg, sp_name *name, const Sp_handler *sph): - Item_func(thd), Item_sp(thd, context_arg, name), m_handler(sph) + Item_func(thd), Item_sp(thd, context_arg, name), m_handler(sph), + m_filter(NULL) { set_maybe_null(); } @@ -6692,7 +6693,8 @@ Item_func_sp::Item_func_sp(THD *thd, Name_resolution_context *context_arg, Item_func_sp::Item_func_sp(THD *thd, Name_resolution_context *context_arg, sp_name *name_arg, const Sp_handler *sph, List &list): - Item_func(thd, list), Item_sp(thd, context_arg, name_arg), m_handler(sph) + Item_func(thd, list), Item_sp(thd, context_arg, name_arg), m_handler(sph), + m_filter(NULL) { set_maybe_null(); } @@ -6904,6 +6906,12 @@ Item_func_sp::fix_fields(THD *thd, Item **ref) if (res) DBUG_RETURN(TRUE); + if (m_filter && m_sp->agg_type() != GROUP_AGGREGATE) + { + my_error(ER_WRONG_USAGE, MYF(0), "FILTER", "NON-AGGREGATE FUNCTION"); + DBUG_RETURN(TRUE); + } + if (m_sp->agg_type() == GROUP_AGGREGATE) { Item_sum_sp *item_sp; @@ -6926,6 +6934,10 @@ Item_func_sp::fix_fields(THD *thd, Item **ref) DBUG_RETURN(TRUE); *ref= item_sp; item_sp->name= name; + if (m_filter) + { + item_sp->set_filter(m_filter); + } bool err= item_sp->fix_fields(thd, ref); if (err) DBUG_RETURN(TRUE); diff --git a/sql/item_func.h b/sql/item_func.h index 0be8cf61591b1..db77957670108 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -4054,6 +4054,7 @@ class Item_func_sp :public Item_func, { private: const Sp_handler *m_handler; + Item *m_filter; bool execute(); @@ -4076,6 +4077,8 @@ class Item_func_sp :public Item_func, virtual ~Item_func_sp() = default; + void set_filter(Item *filter) { m_filter= filter; } + void update_used_tables() override; void cleanup() override; diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc index bca2c5b8a7561..65480e37eacb3 100644 --- a/sql/item_jsonfunc.cc +++ b/sql/item_jsonfunc.cc @@ -4805,6 +4805,9 @@ Item_func_json_objectagg::fix_fields(THD *thd, Item **ref) / collation.collation->mbminlen * collation.collation->mbmaxlen); + if (fix_filter(thd)) + return TRUE; + if (check_sum_func(thd, ref)) return TRUE; @@ -4842,7 +4845,7 @@ bool Item_func_json_objectagg::add() String *key; key= args[0]->val_str(&buf); - if (args[0]->is_null()) + if (args[0]->is_null() || !filter_passed()) return 0; null_value= 0; diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 7746d47421911..b4a460446bb1f 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -499,6 +499,8 @@ Item_sum::Item_sum(THD *thd, Item_sum *item): with_distinct= item->with_distinct; if (item->aggr) set_aggregator(thd, item->aggr->Aggrtype()); + if (item->has_filter()) + set_filter(item->filter_expr); } @@ -532,6 +534,12 @@ void Item_sum::print(String *str, enum_query_type query_type) pargs[i]->print(str, query_type); } str->append(')'); + if (has_filter()) + { + str->append(STRING_WITH_LEN(" FILTER(WHERE ")); + filter_expr->print(str, query_type); + str->append(')'); + } } void Item_sum::fix_num_length_and_dec() @@ -568,6 +576,51 @@ void Item_sum::update_used_tables () item->used_tables() == 0 && !item->const_item() */ } + if (has_filter()) + { + filter_expr->update_used_tables(); + used_tables_cache|= filter_expr->used_tables(); + } +} + + +bool Item_sum::filter_passed() +{ + if (!has_filter()) + return true; + /* Skip filter check if we're in endup phase (processing distinct values + that already passed the filter during collection phase) */ + if (aggr && aggr->is_in_endup_phase()) + return true; + return filter_expr->val_int(); +} + + +bool Item_sum::fix_filter(THD *thd) +{ + if (!has_filter()) + return false; + + if (filter_expr->fix_fields_if_needed_for_scalar(thd, &filter_expr)) + return true; + + /* We should ignore FIELD's in filter expressions */ + with_flags|= (filter_expr->with_flags & ~item_with_t::FIELD); + + if (filter_expr->type() == Item::SUM_FUNC_ITEM || + filter_expr->type() == Item::WINDOW_FUNC_ITEM) + { + my_error(ER_WRONG_USAGE, MYF(0), "aggregate function", "FILTER"); + return true; + } + + if (filter_expr->with_window_func()) + { + my_error(ER_WRONG_USAGE, MYF(0), "window function", "FILTER"); + return true; + } + + return false; } @@ -577,6 +630,11 @@ Item *Item_sum::set_arg(uint i, THD *thd, Item *new_val) return new_val; } +void Item_sum::set_filter(THD *thd, Item *new_filter_expr) +{ + thd->change_item_tree(&filter_expr, new_filter_expr); +} + int Item_sum::set_aggregator(THD *thd, Aggregator::Aggregator_type aggregator) { @@ -984,6 +1042,9 @@ bool Aggregator_distinct::add() if (copy_funcs(tmp_table_param->items_to_copy, table->in_use)) return TRUE; + if (!item_sum->filter_passed()) + return 0; + for (Field **field=table->field ; *field ; field++) if ((*field)->is_real_null(0)) return 0; // Don't count NULL @@ -1009,7 +1070,7 @@ bool Aggregator_distinct::add() else { item_sum->get_arg(0)->save_in_field(table->field[0], FALSE); - if (table->field[0]->is_null()) + if (table->field[0]->is_null() || !item_sum->filter_passed()) return 0; DBUG_ASSERT(tree); item_sum->null_value= 0; @@ -1124,6 +1185,10 @@ Item_sum_num::fix_fields(THD *thd, Item **ref) /* We should ignore FIELD's in arguments to sum functions */ with_flags|= (args[i]->with_flags & ~item_with_t::FIELD); } + + if (fix_filter(thd)) + return TRUE; + result_field=0; max_length=float_length(decimals); null_value=1; @@ -1156,6 +1221,9 @@ Item_sum_min_max::fix_fields(THD *thd, Item **ref) if (fix_length_and_dec(thd)) DBUG_RETURN(TRUE); + if (fix_filter(thd)) + DBUG_RETURN(TRUE); + if (!is_window_func_sum_expr()) setup_hybrid(thd, args[0], NULL); result_field=0; @@ -1383,6 +1451,10 @@ Item_sum_sp::fix_fields(THD *thd, Item **ref) /* We should ignore FIELD's in arguments to sum functions */ with_flags|= (args[i]->with_flags & ~item_with_t::FIELD); } + + if (fix_filter(thd)) + return TRUE; + result_field= NULL; max_length= float_length(decimals); null_value= 1; @@ -1437,6 +1509,8 @@ Item_sum_sp::execute() bool Item_sum_sp::add() { + if (!filter_passed()) + return false; return execute_impl(current_thd, args, arg_count); } @@ -1653,7 +1727,7 @@ void Item_sum_sum::add_helper(bool perform_removal) direct_reseted_field= FALSE; my_decimal value; const my_decimal *val= aggr->arg_val_decimal(&value); - if (!aggr->arg_is_null(true)) + if (!aggr->arg_is_null(true) && filter_passed()) { if (perform_removal) { @@ -1698,7 +1772,7 @@ void Item_sum_sum::add_helper(bool perform_removal) sum-= aggr->arg_val_real(); else sum+= aggr->arg_val_real(); - if (!aggr->arg_is_null(true)) + if (!aggr->arg_is_null(true) && filter_passed()) { if (perform_removal) { @@ -1928,7 +2002,7 @@ bool Item_sum_count::add() else { direct_reseted_field= FALSE; - if (aggr->arg_is_null(false)) + if (aggr->arg_is_null(false) || !filter_passed()) DBUG_RETURN(0); count++; } @@ -1943,7 +2017,7 @@ bool Item_sum_count::add() void Item_sum_count::remove() { DBUG_ASSERT(aggr->Aggrtype() == Aggregator::SIMPLE_AGGREGATOR); - if (aggr->arg_is_null(false)) + if (aggr->arg_is_null(false) || !filter_passed()) return; if (count > 0) count--; @@ -2051,7 +2125,7 @@ bool Item_sum_avg::add() { if (Item_sum_sum::add()) return TRUE; - if (!aggr->arg_is_null(true)) + if (!aggr->arg_is_null(true) && filter_passed()) count++; return FALSE; } @@ -2059,7 +2133,7 @@ bool Item_sum_avg::add() void Item_sum_avg::remove() { Item_sum_sum::remove(); - if (!aggr->arg_is_null(true)) + if (!aggr->arg_is_null(true) && filter_passed()) { if (count > 0) count--; @@ -2299,7 +2373,7 @@ bool Item_sum_variance::add() */ double nr= args[0]->val_real(); - if (!args[0]->null_value) + if (!args[0]->null_value && filter_passed()) m_stddev.recurrence_next(nr); return 0; } @@ -2336,7 +2410,7 @@ void Item_sum_variance::reset_field() nr= args[0]->val_real(); /* sets null_value as side-effect */ - if (args[0]->null_value) + if (args[0]->null_value || !filter_passed()) bzero(res,Stddev::binary_size()); else Stddev(nr).to_binary(res); @@ -2367,7 +2441,7 @@ void Item_sum_variance::update_field() double nr= args[0]->val_real(); /* sets null_value as side-effect */ - if (args[0]->null_value) + if (args[0]->null_value || !filter_passed()) return; /* Serialize format is (double)m, (double)s, (longlong)count */ @@ -2552,7 +2626,7 @@ bool Item_sum_min::add() DBUG_PRINT("info", ("null_value: %s", null_value ? "TRUE" : "FALSE")); /* args[0] < value */ arg_cache->cache_value(); - if (!arg_cache->null_value && + if (!arg_cache->null_value && filter_passed() && (null_value || cmp->compare() < 0)) { value->store(arg_cache); @@ -2592,7 +2666,7 @@ bool Item_sum_max::add() /* args[0] > value */ arg_cache->cache_value(); DBUG_PRINT("info", ("null_value: %s", null_value ? "TRUE" : "FALSE")); - if (!arg_cache->null_value && + if (!arg_cache->null_value && filter_passed() && (null_value || cmp->compare() > 0)) { value->store(arg_cache); @@ -2641,7 +2715,7 @@ bool Item_sum_bit::clear_as_window() bool Item_sum_bit::remove_as_window(ulonglong value) { DBUG_ASSERT(as_window_function); - if (num_values_added == 0) + if (num_values_added == 0 || args[0]->null_value || !filter_passed()) return 0; // Nothing to remove. for (int i= 0; i < NUM_BIT_COUNTERS; i++) @@ -2687,7 +2761,7 @@ void Item_sum_or::set_bits_from_counters() bool Item_sum_or::add() { ulonglong value= (ulonglong) args[0]->val_int(); - if (!args[0]->null_value) + if (!args[0]->null_value && filter_passed()) { if (as_window_function) return add_as_window(value); @@ -2715,7 +2789,7 @@ Item *Item_sum_xor::copy_or_same(THD* thd) bool Item_sum_xor::add() { ulonglong value= (ulonglong) args[0]->val_int(); - if (!args[0]->null_value) + if (!args[0]->null_value && filter_passed()) { if (as_window_function) return add_as_window(value); @@ -2750,7 +2824,7 @@ Item *Item_sum_and::copy_or_same(THD* thd) bool Item_sum_and::add() { ulonglong value= (ulonglong) args[0]->val_int(); - if (!args[0]->null_value) + if (!args[0]->null_value && filter_passed()) { if (as_window_function) return add_as_window(value); @@ -2784,7 +2858,7 @@ void Item_sum_min_max::reset_field() String tmp(buff,sizeof(buff),result_field->charset()),*res; res= arg0->val_str(&tmp); - if (arg0->null_value) + if (arg0->null_value || !filter_passed()) { result_field->set_null(); result_field->reset(); @@ -2802,7 +2876,7 @@ void Item_sum_min_max::reset_field() if (maybe_null()) { - if (arg0->null_value) + if (arg0->null_value || !filter_passed()) { nr=0; result_field->set_null(); @@ -2820,7 +2894,7 @@ void Item_sum_min_max::reset_field() if (maybe_null()) { - if (arg0->null_value) + if (arg0->null_value || !filter_passed()) { nr=0.0; result_field->set_null(); @@ -2837,7 +2911,7 @@ void Item_sum_min_max::reset_field() if (maybe_null()) { - if (arg_dec.is_null()) + if (arg_dec.is_null() || !filter_passed()) result_field->set_null(); else result_field->set_notnull(); @@ -2888,7 +2962,7 @@ void Item_sum_sum::reset_field() null_flag= direct_sum_is_null; } else - null_flag= args[0]->null_value; + null_flag= args[0]->null_value || !filter_passed(); if (null_flag) result_field->set_null(); @@ -2910,7 +2984,7 @@ void Item_sum_count::reset_field() direct_counted= FALSE; direct_reseted_field= TRUE; } - else if (!args[0]->maybe_null() || !args[0]->is_null()) + else if ((!args[0]->maybe_null() || !args[0]->is_null()) && filter_passed()) nr= 1; DBUG_PRINT("info", ("nr: %lld", nr)); int8store(res,nr); @@ -2926,8 +3000,17 @@ void Item_sum_avg::reset_field() { longlong tmp; VDec value(args[0]); - tmp= value.is_null() ? 0 : 1; - value.to_binary(res, f_precision, f_scale); + if (value.is_null() || !filter_passed()) + { + tmp= 0; + my_decimal_set_zero(dec_buffs); + dec_buffs[0].to_binary(res, f_precision, f_scale); + } + else + { + tmp= 1; + value.to_binary(res, f_precision, f_scale); + } res+= dec_bin_size; int8store(res, tmp); } @@ -2935,7 +3018,7 @@ void Item_sum_avg::reset_field() { double nr= args[0]->val_real(); - if (args[0]->null_value) + if (args[0]->null_value || !filter_passed()) bzero(res,sizeof(double)+sizeof(longlong)); else { @@ -2987,12 +3070,12 @@ void Item_sum_sum::update_field() else { arg_val= args[0]->val_decimal(&value); - null_flag= args[0]->null_value; + null_flag= args[0]->null_value || !filter_passed(); } if (!null_flag) { - if (!result_field->is_null()) + if (!result_field->is_null() && filter_passed()) { my_decimal field_value(result_field); my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs, arg_val, &field_value); @@ -3021,7 +3104,7 @@ void Item_sum_sum::update_field() else { nr= args[0]->val_real(); - null_flag= args[0]->null_value; + null_flag= args[0]->null_value || !filter_passed(); } if (!null_flag) { @@ -3045,7 +3128,7 @@ void Item_sum_count::update_field() direct_counted= direct_reseted_field= FALSE; nr+= direct_count; } - else if (!args[0]->maybe_null() || !args[0]->is_null()) + else if ((!args[0]->maybe_null() || !args[0]->is_null()) && filter_passed()) nr++; DBUG_PRINT("info", ("nr: %lld", nr)); int8store(res,nr); @@ -3063,7 +3146,7 @@ void Item_sum_avg::update_field() if (result_type() == DECIMAL_RESULT) { VDec tmp(args[0]); - if (!tmp.is_null()) + if (!tmp.is_null() && filter_passed()) { binary2my_decimal(E_DEC_FATAL_ERROR, res, dec_buffs + 1, f_precision, f_scale); @@ -3080,7 +3163,7 @@ void Item_sum_avg::update_field() double nr; nr= args[0]->val_real(); - if (!args[0]->null_value) + if (!args[0]->null_value && filter_passed()) { double old_nr; float8get(old_nr, res); @@ -3107,6 +3190,10 @@ Item *Item_sum_avg::result_item(THD *thd, Field *field) void Item_sum_min_max::update_field() { DBUG_ENTER("Item_sum_min_max::update_field"); + if (!filter_passed()) + { + DBUG_VOID_RETURN; + } Item *UNINIT_VAR(tmp_item); if (unlikely(direct_added)) { @@ -3555,6 +3642,9 @@ Item_sum_str::fix_fields(THD *thd, Item **ref) return true; } + if (fix_filter(thd)) + return TRUE; + if (fix_fields_impl(thd, ref)) return TRUE; @@ -4202,7 +4292,7 @@ bool Item_func_group_concat::repack_tree(THD *thd) bool Item_func_group_concat::add(bool exclude_nulls) { - if (always_null && exclude_nulls) + if ((always_null || !filter_passed()) && exclude_nulls) return 0; copy_fields(tmp_table_param); if (copy_funcs(tmp_table_param->items_to_copy, table->in_use)) @@ -4224,6 +4314,9 @@ bool Item_func_group_concat::add(bool exclude_nulls) exclude_nulls) return 0; // Skip row if it contains null + if (!filter_passed() && !exclude_nulls) + field->set_null(); + buf.set_buffer_if_not_allocated(&my_charset_bin); if (tree && (res= field->val_str(&buf))) row_str_len+= res->length(); @@ -4610,6 +4703,13 @@ void Item_func_group_concat::print(String *str, enum_query_type query_type) row_limit->print(str, query_type); } str->append(STRING_WITH_LEN(")")); + + if (has_filter()) + { + str->append(STRING_WITH_LEN(" FILTER(WHERE ")); + (*get_filter())->print(str, query_type); + str->append(')'); + } } @@ -4639,7 +4739,7 @@ bool Item_func_collect::add() { uint current_geometry_srid; has_cached_result= false; - if (tmp_arg[0]->null_value) + if (tmp_arg[0]->null_value || !filter_passed()) return 0; if(is_distinct && list_contains_element(wkb)) @@ -4663,7 +4763,7 @@ void Item_func_collect::remove() { String *wkb= args[0]->val_str(&value); has_cached_result= false; - if (args[0]->null_value) return; + if (args[0]->null_value || !filter_passed()) return; List_iterator geometries_iterator(geometries); String* temp_geometry; diff --git a/sql/item_sum.h b/sql/item_sum.h index 12783892b2e5f..2ed7d96f0b227 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -103,6 +103,13 @@ class Aggregator : public Sql_alloc (updated by arg_val*()). */ virtual bool arg_is_null(bool use_null_value) = 0; + + /** + Check if we're currently in the endup() phase processing distinct values. + This is used to skip filter checks for DISTINCT aggregates since the filter + was already applied during the collection phase. + */ + virtual bool is_in_endup_phase() const { return false; } }; @@ -341,11 +348,34 @@ class Item_sum :public Item_func_or_sum /* TRUE if this is aggregate function of a window function */ bool window_func_sum_expr_flag; + /* Optional filter clause for the aggregate function */ + Item *filter_expr; + public: bool has_force_copy_fields() const { return force_copy_fields; } bool has_with_distinct() const { return with_distinct; } + /* Filter expression helpers */ + void set_filter(Item *filter_expr) { this->filter_expr= filter_expr; } + void set_filter(THD *thd, Item *new_filter_expr); + bool has_filter() const { return filter_expr != NULL; } + Item **get_filter() { return &filter_expr; } + bool filter_passed(); + bool fix_filter(THD *thd); + + bool walk(Item_processor processor, void *arg, item_walk_flags flags) override + { + if (walk_args(processor, arg, flags)) + return true; + if (has_filter()) + { + if (filter_expr->walk(processor, arg, flags)) + return true; + } + return (this->*processor)(arg); + } + enum Sumfunctype { COUNT_FUNC, COUNT_DISTINCT_FUNC, SUM_FUNC, SUM_DISTINCT_FUNC, AVG_FUNC, AVG_DISTINCT_FUNC, MIN_FUNC, MAX_FUNC, STD_FUNC, @@ -548,6 +578,7 @@ class Item_sum :public Item_func_or_sum aggr= NULL; with_distinct= FALSE; force_copy_fields= FALSE; + filter_expr= NULL; } /** @@ -711,6 +742,7 @@ class Aggregator_distinct : public Aggregator my_decimal *arg_val_decimal(my_decimal * value) override; double arg_val_real() override; bool arg_is_null(bool use_null_value) override; + bool is_in_endup_phase() const override { return use_distinct_values; } bool unique_walk_function(void *element); bool unique_walk_function_for_count(void *element); diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc index f0a5737bdf73a..74f845dcabfcd 100644 --- a/sql/item_windowfunc.cc +++ b/sql/item_windowfunc.cc @@ -170,6 +170,11 @@ void Item_window_func::split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, Item **p_item= &window_func()->arguments()[i]; (*p_item)->split_sum_func2(thd, ref_pointer_array, fields, p_item, flags); } + if (window_func()->has_filter()) + { + Item **p_filter= window_func()->get_filter(); + (*p_filter)->split_sum_func2(thd, ref_pointer_array, fields, p_filter, flags); + } window_func()->setup_caches(thd); } diff --git a/sql/lex.h b/sql/lex.h index 41a34ef738989..93d60bb428a90 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -245,6 +245,7 @@ SYMBOL symbols[] = { { "FETCH", SYM(FETCH_SYM)}, { "FIELDS", SYM(COLUMNS)}, { "FILE", SYM(FILE_SYM)}, + { "FILTER", SYM(FILTER_SYM)}, { "FIRST", SYM(FIRST_SYM)}, { "FIXED", SYM(FIXED_SYM)}, { "FLOAT", SYM(FLOAT_SYM)}, diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 9f0b37f835cd7..9096f38798d0f 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -14655,6 +14655,18 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time) Item_sum **func_ptr= join->sum_funcs; while ((min_max_item= *(func_ptr++))) { + /* + FILTER clause requires per-row evaluation, cannot use loose index scan. + */ + if (min_max_item->has_filter()) + { + if (unlikely(trace_group.trace_started())) + trace_group. + add("chosen", false). + add("cause", "aggregate has FILTER clause"); + DBUG_RETURN(NULL); + } + if (min_max_item->sum_func() == Item_sum::MIN_FUNC) have_min= TRUE; else if (min_max_item->sum_func() == Item_sum::MAX_FUNC) diff --git a/sql/opt_sum.cc b/sql/opt_sum.cc index 6d5bf8c6f487f..fc99d1116ef82 100644 --- a/sql/opt_sum.cc +++ b/sql/opt_sum.cc @@ -357,11 +357,12 @@ int opt_sum_query(THD *thd, /* If the expr in COUNT(expr) can never be null we can change this to the number of rows in the tables if this number is exact and - there are no outer joins. + there are no outer joins and there is no filter clause. */ if (!conds && !((Item_sum_count*) item)->get_arg(0)->maybe_null() && !outer_tables && maybe_exact_count && - ((item->used_tables() & OUTER_REF_TABLE_BIT) == 0)) + ((item->used_tables() & OUTER_REF_TABLE_BIT) == 0) && + !((Item_sum_count*) item)->has_filter()) { if (!is_exact_count) { @@ -382,6 +383,17 @@ int opt_sum_query(THD *thd, case Item_sum::MIN_FUNC: case Item_sum::MAX_FUNC: { + /* + Do not attempt MIN/MAX constant replacement if a FILTER clause + is present on the aggregate. FILTER must be evaluated per-row and + cannot be folded by the index-based shortcut here. + */ + if (item_sum->has_filter()) + { + const_result= 0; + break; + } + int is_max= MY_TEST(item_sum->sum_func() == Item_sum::MAX_FUNC); /* If MIN/MAX(expr) is the first part of a key or if all previous diff --git a/sql/sql_select.cc b/sql/sql_select.cc index d5152d5b51566..497e41ebd29e5 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -22253,6 +22253,44 @@ bool Create_tmp_table::add_fields(THD *thd, new_field->flags|= FIELD_PART_OF_TMP_UNIQUE; } } + + /* + If the aggregate has FILTER, materialize the predicate into the tmp table + and rewrite it to read from the tmp row (we compute aggregates later). + */ + if (sum_item->has_filter()) + { + Item *fexpr= *sum_item->get_filter(); + if (!fexpr->const_item()) + { + Item *tmp_item; + Field *new_field= + create_tmp_field(table, fexpr, ©_func, + tmp_from_field, &m_default_field[fieldnr], + m_group != 0, not_all_columns, + distinct_record_structure, false); + if (!new_field) + goto err; + tmp_from_field++; + + thd->mem_root= mem_root_save; + if (!(tmp_item= new (thd->mem_root) Item_field(thd, new_field))) + goto err; + static_cast(tmp_item)->set_refers_to_temp_table(); + sum_item->set_filter(thd, tmp_item); + thd->mem_root= &table->mem_root; + + uneven_delta= m_uneven_bit_length; + add_field(table, new_field, fieldnr++, param->force_not_null_cols); + m_field_count[current_counter]++; + m_uneven_bit[current_counter]+= (m_uneven_bit_length - uneven_delta); + + if (!(new_field->flags & NOT_NULL_FLAG)) + tmp_item->set_maybe_null(); + if (current_counter == distinct) + new_field->flags|= FIELD_PART_OF_TMP_UNIQUE; + } + } } else { @@ -29113,6 +29151,19 @@ count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param, else param->func_count++; } + + // Count FILTER so it can be stored in the GROUP BY temp table and read later + if (sum_item->has_filter()) + { + Item *fexpr= *sum_item->get_filter(); + if (!fexpr->const_item()) + { + if (fexpr->real_item()->type() == Item::FIELD_ITEM) + param->field_count++; + else + param->func_count++; + } + } } param->func_count++; } diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index dd742bc47f523..3b4a04d8b8f1e 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -879,6 +879,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize); %token FAULTS_SYM %token FEDERATED_SYM /* MariaDB privilege */ %token FILE_SYM +%token FILTER_SYM %token FIRST_SYM /* SQL-2003-N */ %token FIXED_SYM %token FLUSH_SYM @@ -1226,6 +1227,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize); %nonassoc NEG '~' NOT2_SYM BINARY %nonassoc COLLATE_SYM %nonassoc SUBQUERY_AS_EXPR +%nonassoc FILTER_SYM /* Tokens that can change their meaning from identifier to something else @@ -1582,6 +1584,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize); opt_versioning_interval_start json_default_literal set_expr_misc + unfiltered_sum_expr + opt_filter_expr %type opt_vers_auto_part @@ -11040,7 +11044,7 @@ function_call_generic: #endif } - opt_udf_expr_list_or_join_operator ')' opt_object_member_access + opt_udf_expr_list_or_join_operator ')' opt_object_member_access opt_filter_expr { const Type_handler *h; Create_func *builder; @@ -11130,6 +11134,26 @@ function_call_generic: } } + if (item && $7) + { + if (item->type() == Item::SUM_FUNC_ITEM) + { + ((Item_sum *)item)->set_filter($7); + } + else if (item->type() == Item::FUNC_ITEM && + static_cast(item)->functype() == + Item_func::FUNC_SP) + { + static_cast(item)->set_filter($7); + } + else + { + my_error(ER_WRONG_USAGE, MYF(0), "FILTER", + "NON-AGGREGATE FUNCTION"); + MYSQL_YYABORT; + } + } + if ($6.str && !allow_field_accessor) { Lex_ident_sys field_sys(thd, &$6); @@ -11309,7 +11333,30 @@ udf_expr: } ; +opt_filter_expr: + /* empty */ { $$= NULL; } %prec SUBQUERY_AS_EXPR + | FILTER_SYM '(' WHERE + { Select->in_sum_expr++; } + expr + { Select->in_sum_expr--; } + ')' + { + $$= $5; + } + ; + sum_expr: + unfiltered_sum_expr opt_filter_expr + { + if ($2) + { + ((Item_sum *)$1)->set_filter($2); + } + $$= $1; + } + ; + +unfiltered_sum_expr: AVG_SYM '(' in_sum_expr ')' { $$= new (thd->mem_root) Item_sum_avg(thd, $3, FALSE); @@ -16687,6 +16734,7 @@ keyword_func_sp_var_and_label: | FAST_SYM | FEDERATED_SYM | FILE_SYM + | FILTER_SYM | FIRST_SYM | FOUND_SYM | FULL diff --git a/storage/sequence/sequence.cc b/storage/sequence/sequence.cc index 2f34d7c1a2fb6..a7a2942a639e5 100644 --- a/storage/sequence/sequence.cc +++ b/storage/sequence/sequence.cc @@ -419,9 +419,10 @@ create_group_by_handler(THD *thd, Query *query) Field *field; if (item->type() != Item::SUM_FUNC_ITEM || (((Item_sum*) item)->sum_func() != Item_sum::SUM_FUNC && - ((Item_sum*) item)->sum_func() != Item_sum::COUNT_FUNC)) + ((Item_sum*) item)->sum_func() != Item_sum::COUNT_FUNC) || + ((Item_sum*) item)->has_filter()) - return 0; // Not a SUM() function + return 0; // Not a SUM() function or has FILTER arg0= ((Item_sum*) item)->get_arg(0); if (arg0->type() != Item::FIELD_ITEM) { From de6a8490899f0d7c8f3613e3e1bf3aee93e63405 Mon Sep 17 00:00:00 2001 From: KhaledR57 Date: Wed, 7 Jan 2026 16:49:51 +0200 Subject: [PATCH 2/3] Fix crash when first GROUP BY row is filtered by HAVING Avoid freeing SP query arena items when the aggregate function context was never initialized, which could happen if the first group is filtered out by HAVING. --- sql/item_sum.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/item_sum.cc b/sql/item_sum.cc index b4a460446bb1f..b138c7ecfe863 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -1518,6 +1518,8 @@ Item_sum_sp::add() void Item_sum_sp::clear() { + if (!func_ctx) + return; delete func_ctx; func_ctx= NULL; sp_query_arena->free_items(); From c6e3c6964b06b0a4f0fbb68bbb8e1b14be7c9c5a Mon Sep 17 00:00:00 2001 From: KhaledR57 Date: Sun, 25 Jan 2026 01:51:21 +0200 Subject: [PATCH 3/3] Fix uninitialized value in Item_func_collect MemorySanitizer reported a use-of-uninitialized-value error in Item_func_collect::val_str when using ST_COLLECT on an empty column. The variable 'has_cached_result' was not being initialized in the constructor, leading to undefined behavior when accessed. --- sql/item_sum.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/item_sum.cc b/sql/item_sum.cc index b138c7ecfe863..fa56751fec757 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -4808,6 +4808,7 @@ Item_func_collect::Item_func_collect(THD *thd, bool is_distinct, Item *item_par) group_collect_max_len(thd->variables.group_concat_max_len) { quick_group= false; + has_cached_result= false; } @@ -4818,6 +4819,7 @@ Item_func_collect::Item_func_collect(THD *thd, bool is_distinct, Item_func_colle group_collect_max_len(thd->variables.group_concat_max_len) { quick_group= false; + has_cached_result= false; }