From 45ce401463ea95e70bfd4d08552e9893a939a07c Mon Sep 17 00:00:00 2001 From: suyashkhare1403 Date: Thu, 5 Mar 2026 19:17:01 +0530 Subject: [PATCH 1/2] Add test_split_sql covering split_sql logic in E6 dialect --- tests/dialects/test_e6.py | 49 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tests/dialects/test_e6.py b/tests/dialects/test_e6.py index ce7b41b873..9f23fca7fb 100644 --- a/tests/dialects/test_e6.py +++ b/tests/dialects/test_e6.py @@ -3152,3 +3152,52 @@ def test_formatting_preservation(self): # Result should have the columns (works with both tokenizers) self.assertIn("col1", result_spaces) self.assertIn("col2", result_spaces) + + def test_split_sql(self): + # 1. split inside MAP, separator absent → SPLIT stripped, plain string returned + self.validate_all( + "SELECT MAP[ARRAY['test'],ARRAY['-18000']]", + read={ + "databricks": "SELECT map(split('test',','), split('-18000',','))", + }, + ) + + # 2. explode(split(...)), separator absent → SPLIT preserved + self.validate_all( + "SELECT EXPLODE(SPLIT('VZ_2469420', ','))", + read={ + "spark": "SELECT explode(split('VZ_2469420', ','))", + }, + ) + + # 3. explode(split(...)), separator present → SPLIT preserved + self.validate_all( + "SELECT EXPLODE(SPLIT('VZ_2469420,', ','))", + read={ + "spark": "SELECT explode(split('VZ_2469420,', ','))", + }, + ) + + # 4. split without explode or map, separator absent → SPLIT preserved (not inside VarMap) + self.validate_all( + "SELECT SPLIT('hello', ',')", + read={ + "spark": "SELECT split('hello', ',')", + }, + ) + + # 5. split with 3 arguments → SPLIT preserved + self.validate_all( + "SELECT SPLIT('a,b,c', ',', 2)", + read={ + "spark": "SELECT split('a,b,c', ',', 2)", + }, + ) + + # 6. regexp_split inside explode → SPLIT preserved + self.validate_all( + "SELECT SPLIT('hello world', '\\\\s+')", + read={ + "postgres": "SELECT regexp_split('hello world', '\\s+')", + }, + ) From c0e79d12c814414fe86694f1c0ea27066a2273d7 Mon Sep 17 00:00:00 2001 From: suyashkhare1403 Date: Thu, 5 Mar 2026 19:20:23 +0530 Subject: [PATCH 2/2] Modify split_sql method conditions Commented out conditions for delimitter checks in split_sql method. --- sqlglot/dialects/e6.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sqlglot/dialects/e6.py b/sqlglot/dialects/e6.py index 039c21f30c..91355410bb 100644 --- a/sqlglot/dialects/e6.py +++ b/sqlglot/dialects/e6.py @@ -2642,12 +2642,13 @@ def split_sql(self, expression: exp.Split | exp.RegexpSplit): this = expression.this delimitter = expression.expression if ( - this + expression.find_ancestor(exp.VarMap) + and this and delimitter and this.is_string and delimitter.is_string - and delimitter.this not in this.this - and not len(expression.args) == 3 + # and delimitter.this not in this.this + # and not len(expression.args) == 3 ): return f"{this}" return rename_func("SPLIT")(self, expression)