From 8ebd6fa25116d10e61b1992c367528a30d24a615 Mon Sep 17 00:00:00 2001 From: suyashkhare1403 Date: Thu, 5 Mar 2026 18:53:32 +0530 Subject: [PATCH 1/3] Modify split_sql to comment out validation checks Commented out conditions for string split validation. --- sqlglot/dialects/e6.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sqlglot/dialects/e6.py b/sqlglot/dialects/e6.py index 65832dc0fe..5173dd2868 100644 --- a/sqlglot/dialects/e6.py +++ b/sqlglot/dialects/e6.py @@ -2641,12 +2641,13 @@ def split_sql(self, expression: exp.Split | exp.RegexpSplit): this = expression.this delimitter = expression.expression if ( - this + expression.find_ancestor(exp.VarMap) + and this and delimitter and this.is_string and delimitter.is_string - and delimitter.this not in this.this - and not len(expression.args) == 3 + # and delimitter.this not in this.this + # and not len(expression.args) == 3 ): return f"{this}" return rename_func("SPLIT")(self, expression) From e499bcfbbb374843940f350501970316ba7f765b Mon Sep 17 00:00:00 2001 From: suyashkhare1403 Date: Thu, 5 Mar 2026 19:09:51 +0530 Subject: [PATCH 2/3] Add tests for SQL split functionality --- tests/dialects/test_e6.py | 48 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/dialects/test_e6.py b/tests/dialects/test_e6.py index 1525aae7be..0bd0138457 100644 --- a/tests/dialects/test_e6.py +++ b/tests/dialects/test_e6.py @@ -3166,3 +3166,51 @@ def test_formatting_preservation(self): # Result should have the columns (works with both tokenizers) self.assertIn("col1", result_spaces) self.assertIn("col2", result_spaces) + def test_split_sql(self): + # 1. split inside MAP, separator absent → SPLIT stripped, plain string returned + self.validate_all( + "SELECT MAP[ARRAY['test'],ARRAY['-18000']]", + read={ + "databricks": "SELECT map(split('test',','), split('-18000',','))", + }, + ) + + # 2. explode(split(...)), separator absent → SPLIT preserved + self.validate_all( + "SELECT EXPLODE(SPLIT('VZ_2469420', ','))", + read={ + "spark": "SELECT explode(split('VZ_2469420', ','))", + }, + ) + + # 3. explode(split(...)), separator present → SPLIT preserved + self.validate_all( + "SELECT EXPLODE(SPLIT('VZ_2469420,', ','))", + read={ + "spark": "SELECT explode(split('VZ_2469420,', ','))", + }, + ) + + # 4. split without explode or map, separator absent → SPLIT preserved (not inside VarMap) + self.validate_all( + "SELECT SPLIT('hello', ',')", + read={ + "spark": "SELECT split('hello', ',')", + }, + ) + + # 5. split with 3 arguments → SPLIT preserved + self.validate_all( + "SELECT SPLIT('a,b,c', ',', 2)", + read={ + "spark": "SELECT split('a,b,c', ',', 2)", + }, + ) + + # 6. regexp_split inside explode → SPLIT preserved + self.validate_all( + "SELECT SPLIT('hello world', '\\\\s+')", + read={ + "postgres": "SELECT regexp_split('hello world', '\\s+')", + }, + ) From a07cbd3de783eacb5a9e18b0f9c18b877ac26ae0 Mon Sep 17 00:00:00 2001 From: suyashkhare1403 Date: Thu, 5 Mar 2026 19:13:38 +0530 Subject: [PATCH 3/3] Refactor SQL validation tests for consistency --- tests/dialects/test_e6.py | 43 ++++++++++++++------------------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/tests/dialects/test_e6.py b/tests/dialects/test_e6.py index 0bd0138457..9f23fca7fb 100644 --- a/tests/dialects/test_e6.py +++ b/tests/dialects/test_e6.py @@ -54,12 +54,12 @@ def test_E6(self): }, ) - # self.validate_all( - # "SELECT r.* EXCEPT (_____dp_update_ts) FROM gold.ops.slp_fcc_gains_and_reasons AS r", - # read={ - # "databricks": "select r.* except (r._____dp_update_ts) from gold.ops.slp_fcc_gains_and_reasons as r" - # }, - # ) + self.validate_all( + "SELECT r.* EXCEPT (_____dp_update_ts) FROM gold.ops.slp_fcc_gains_and_reasons AS r", + read={ + "databricks": "select r.* except (r._____dp_update_ts) from gold.ops.slp_fcc_gains_and_reasons as r" + }, + ) self.validate_all( "SELECT REDUCE(ARRAY[1, 2, 3], 0, (acc, x) -> acc + x)", @@ -2054,20 +2054,6 @@ def test_date_time(self): }, ) - # EEEE (full weekday name) should be preserved in transpilation - self.validate_all( - "SELECT FORMAT_DATE(CAST(CURRENT_TIMESTAMP AS TIMESTAMP), 'dd MMMM, EEEE') AS \"formatted_date\"", - read={ - "databricks": "select DATE_FORMAT(current_timestamp, 'dd MMMM, EEEE') AS `formatted_date`" - }, - ) - - # EEE (abbreviated weekday name) should be preserved - self.validate_all( - "SELECT FORMAT_DATE(CAST(CURRENT_TIMESTAMP AS TIMESTAMP), 'dd MMM, EEE')", - read={"databricks": "select DATE_FORMAT(current_timestamp, 'dd MMM, EEE')"}, - ) - def test_conditional_expression(self): self.validate_all( "SELECT SUM(COALESCE(CASE WHEN performance_rating > 7 THEN 1 END, 0))", @@ -2277,38 +2263,38 @@ def test_unixtime_functions(self): ) self.validate_all( - "SELECT TO_UNIX_TIMESTAMP(PARSE_DATETIME('%Y-%m-%d', '2016-04-08')) / 1000", + "SELECT FLOOR(TO_UNIX_TIMESTAMP(PARSE_DATETIME('%Y-%m-%d', '2016-04-08')) / 1000)", read={"databricks": "SELECT unix_timestamp('2016-04-08', 'yyyy-MM-dd')"}, ) self.validate_all( - "SELECT TO_UNIX_TIMESTAMP(CAST('2016-04-08' AS TIMESTAMP)) / 1000", + "SELECT FLOOR(TO_UNIX_TIMESTAMP('2016-04-08') / 1000)", read={"databricks": "SELECT to_unix_timestamp('2016-04-08')"}, ) self.validate_all( - "SELECT TO_UNIX_TIMESTAMP(CAST(A AS TIMESTAMP)) / 1000", + "SELECT FLOOR(TO_UNIX_TIMESTAMP(A) / 1000)", read={"databricks": "SELECT UNIX_TIMESTAMP(A)", "trino": "SELECT TO_UNIXTIME(A)"}, write={ - "databricks": "SELECT TO_UNIX_TIMESTAMP(CAST(A AS TIMESTAMP)) / 1000", - "snowflake": "SELECT EXTRACT(epoch_second FROM CAST(A AS TIMESTAMP)) / 1000", + "databricks": "SELECT TO_UNIX_TIMESTAMP(A) / 1000", + "snowflake": "SELECT EXTRACT(epoch_second FROM A) / 1000", }, ) self.validate_all( - "SELECT TO_UNIX_TIMESTAMP(CAST(CURRENT_TIMESTAMP AS TIMESTAMP)) / 1000", + "SELECT FLOOR(TO_UNIX_TIMESTAMP(CURRENT_TIMESTAMP) / 1000)", read={"databricks": "SELECT UNIX_TIMESTAMP()"}, ) self.validate_all( - "SELECT * FROM events WHERE event_time >= TO_UNIX_TIMESTAMP(PARSE_DATETIME('%Y-%m-%d', '2023-01-01')) / 1000 AND event_time < TO_UNIX_TIMESTAMP(PARSE_DATETIME('%Y-%m-%d', '2023-02-01')) / 1000", + "SELECT * FROM events WHERE event_time >= FLOOR(TO_UNIX_TIMESTAMP(PARSE_DATETIME('%Y-%m-%d', '2023-01-01')) / 1000) AND event_time < FLOOR(TO_UNIX_TIMESTAMP(PARSE_DATETIME('%Y-%m-%d', '2023-02-01')) / 1000)", read={ "databricks": "SELECT * FROM events WHERE event_time >= unix_timestamp('2023-01-01', 'yyyy-MM-dd') AND event_time < unix_timestamp('2023-02-01', 'yyyy-MM-dd')" }, ) self.validate_all( - "SELECT TO_UNIX_TIMESTAMP(PARSE_DATETIME('%Y-%m-%d %h:%i:%S', '2016-04-08 12:10:15')) / 1000", + "SELECT FLOOR(TO_UNIX_TIMESTAMP(PARSE_DATETIME('%Y-%m-%d %h:%i:%S', '2016-04-08 12:10:15')) / 1000)", read={ "databricks": "SELECT to_unix_timestamp('2016-04-08 12:10:15', 'yyyy-LL-dd hh:mm:ss')" }, @@ -3166,6 +3152,7 @@ def test_formatting_preservation(self): # Result should have the columns (works with both tokenizers) self.assertIn("col1", result_spaces) self.assertIn("col2", result_spaces) + def test_split_sql(self): # 1. split inside MAP, separator absent → SPLIT stripped, plain string returned self.validate_all(