diff --git a/apis/utils/supported_functions_in_all_dialects.json b/apis/utils/supported_functions_in_all_dialects.json index 412e8f2ba3..790e5e9597 100644 --- a/apis/utils/supported_functions_in_all_dialects.json +++ b/apis/utils/supported_functions_in_all_dialects.json @@ -767,6 +767,7 @@ "OBJECT_CONSTRUCT", "DATE_SUB", "GROUPING SETS", + "GROUP BY ALL", "ACOS", "ADD_MONTHS", "CEILING", @@ -791,7 +792,10 @@ "RAND", "CORR", "COVAR_POP", - "URL_DECODE", + "COVAR_SAMP", + "VARIANCE_SAMP", + "VAR_SAMP", + "URL_DECODE",, "TYPEOF", "TIMEDIFF", "INTERVAL" diff --git a/tests/dialects/test_e6.py b/tests/dialects/test_e6.py index 929913b6a6..2128ed16a1 100644 --- a/tests/dialects/test_e6.py +++ b/tests/dialects/test_e6.py @@ -1977,6 +1977,58 @@ def test_statistical_funcs(self): "databricks": "SELECT percentile_cont(0.50) WITHIN GROUP (ORDER BY col) FROM VALUES (0), (6), (6), (7), (9), (10) AS tab(col)" }, ) + + # Additional STDDEV tests from multiple dialects + self.validate_all( + "SELECT STDDEV(col) FROM (VALUES (1), (2), (3), (4)) AS tab(col)", + read={ + "databricks": "SELECT stddev(col) FROM VALUES (1), (2), (3), (4) AS tab(col)", + "snowflake": "SELECT STDDEV(col) FROM VALUES (1), (2), (3), (4) AS tab(col)", + "postgres": "SELECT STDDEV(col) FROM VALUES (1), (2), (3), (4) AS tab(col)" + }, + ) + self.validate_all( + "SELECT STDDEV_SAMP(col) FROM (VALUES (1), (2), (3), (4)) AS tab(col)", + read={ + "databricks": "SELECT stddev_samp(col) FROM VALUES (1), (2), (3), (4) AS tab(col)", + "snowflake": "SELECT STDDEV_SAMP(col) FROM VALUES (1), (2), (3), (4) AS tab(col)", + "postgres": "SELECT STDDEV_SAMP(col) FROM VALUES (1), (2), (3), (4) AS tab(col)" + }, + ) + + # COVAR_SAMP tests from multiple dialects + self.validate_all( + "SELECT COVAR_SAMP(x, y) FROM (VALUES (1, 10), (2, 20), (3, 30)) AS tab(x, y)", + read={ + "databricks": "SELECT covar_samp(x, y) FROM VALUES (1, 10), (2, 20), (3, 30) AS tab(x, y)", + "snowflake": "SELECT COVAR_SAMP(x, y) FROM VALUES (1, 10), (2, 20), (3, 30) AS tab(x, y)", + "postgres": "SELECT COVAR_SAMP(x, y) FROM VALUES (1, 10), (2, 20), (3, 30) AS tab(x, y)" + }, + ) + + # VARIANCE_SAMP tests from multiple dialects + self.validate_all( + "SELECT VARIANCE_SAMP(col) FROM (VALUES (1), (2), (3), (4), (5)) AS tab(col)", + read={ + "databricks": "SELECT variance_samp(col) FROM VALUES (1), (2), (3), (4), (5) AS tab(col)", + "snowflake": "SELECT VARIANCE_SAMP(col) FROM VALUES (1), (2), (3), (4), (5) AS tab(col)" + }, + ) + self.validate_all( + "SELECT VARIANCE_SAMP(DISTINCT col) FROM (VALUES (1), (2), (2), (3), (3), (3)) AS tab(col)", + read={ + "databricks": "SELECT variance_samp(DISTINCT col) FROM VALUES (1), (2), (2), (3), (3), (3) AS tab(col)" + }, + ) + + # VAR_SAMP tests from multiple dialects + self.validate_all( + "SELECT VAR_SAMP(col) FROM (VALUES (1), (2), (3), (4)) AS tab(col)", + read={ + "databricks": "SELECT var_samp(col) FROM VALUES (1), (2), (3), (4) AS tab(col)", + "snowflake": "SELECT VAR_SAMP(col) FROM VALUES (1), (2), (3), (4) AS tab(col)" + }, + ) def test_unixtime_functions(self): self.validate_all( @@ -2292,5 +2344,46 @@ def test_random(self): }, ) + def test_group_by_all(self): + # Basic GROUP BY ALL test + self.validate_all( + "SELECT category, brand, AVG(price) AS average_price FROM products GROUP BY ALL", + read={ + "databricks": "SELECT category, brand, AVG(price) AS average_price FROM products GROUP BY ALL" + } + ) + + # GROUP BY ALL with CTE + self.validate_all( + """WITH products AS (SELECT 'Electronics' AS category, 'BrandA' AS brand, 100 AS price UNION ALL SELECT 'Electronics' AS category, 'BrandA' AS brand, 150 AS price) SELECT category, brand, AVG(price) AS average_price FROM products GROUP BY ALL""", + read={ + "databricks": """WITH products AS (SELECT 'Electronics' AS category, 'BrandA' AS brand, 100 AS price UNION ALL SELECT 'Electronics' AS category, 'BrandA' AS brand, 150 AS price) SELECT category, brand, AVG(price) AS average_price FROM products GROUP BY ALL""" + } + ) + + # GROUP BY ALL with ORDER BY + self.validate_all( + "SELECT department, COUNT(*) AS employee_count FROM employees GROUP BY ALL ORDER BY employee_count DESC", + read={ + "databricks": "SELECT department, COUNT(*) AS employee_count FROM employees GROUP BY ALL ORDER BY employee_count DESC" + } + ) + + # GROUP BY ALL with HAVING clause + self.validate_all( + "SELECT region, SUM(sales) AS total_sales FROM sales_data GROUP BY ALL HAVING SUM(sales) > 1000", + read={ + "databricks": "SELECT region, SUM(sales) AS total_sales FROM sales_data GROUP BY ALL HAVING SUM(sales) > 1000" + } + ) + + # GROUP BY ALL with multiple aggregations + self.validate_all( + "SELECT product_category, COUNT(*) AS item_count, AVG(price) AS avg_price, MAX(price) AS max_price FROM inventory GROUP BY ALL", + read={ + "databricks": "SELECT product_category, COUNT(*) AS item_count, AVG(price) AS avg_price, MAX(price) AS max_price FROM inventory GROUP BY ALL" + } + ) + def test_keywords(self): self.validate_identity("""SELECT a."variant" FROM table AS a""")