From 5a30359654cd5a009148e327e95cd1e48c496827 Mon Sep 17 00:00:00 2001 From: suyashkhare1403 Date: Thu, 5 Mar 2026 18:22:53 +0530 Subject: [PATCH 1/2] Modify split_sql method to disable delimitter checks Commented out conditions for delimitter checks in split_sql method. --- sqlglot/dialects/e6.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sqlglot/dialects/e6.py b/sqlglot/dialects/e6.py index 65832dc0fe..f78eda4468 100644 --- a/sqlglot/dialects/e6.py +++ b/sqlglot/dialects/e6.py @@ -2641,12 +2641,13 @@ def split_sql(self, expression: exp.Split | exp.RegexpSplit): this = expression.this delimitter = expression.expression if ( + expression.find_ancestor(exp.VarMap) this and delimitter and this.is_string and delimitter.is_string - and delimitter.this not in this.this - and not len(expression.args) == 3 + # and delimitter.this not in this.this + # and not len(expression.args) == 3 ): return f"{this}" return rename_func("SPLIT")(self, expression) From 72e485f7f243be0f1d5fa7ef25bd63ec00c08b2d Mon Sep 17 00:00:00 2001 From: suyashkhare1403 Date: Thu, 5 Mar 2026 19:06:14 +0530 Subject: [PATCH 2/2] Add tests for SQL split functionality --- tests/dialects/test_e6.py | 48 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/dialects/test_e6.py b/tests/dialects/test_e6.py index 1525aae7be..0bd0138457 100644 --- a/tests/dialects/test_e6.py +++ b/tests/dialects/test_e6.py @@ -3166,3 +3166,51 @@ def test_formatting_preservation(self): # Result should have the columns (works with both tokenizers) self.assertIn("col1", result_spaces) self.assertIn("col2", result_spaces) + def test_split_sql(self): + # 1. split inside MAP, separator absent → SPLIT stripped, plain string returned + self.validate_all( + "SELECT MAP[ARRAY['test'],ARRAY['-18000']]", + read={ + "databricks": "SELECT map(split('test',','), split('-18000',','))", + }, + ) + + # 2. explode(split(...)), separator absent → SPLIT preserved + self.validate_all( + "SELECT EXPLODE(SPLIT('VZ_2469420', ','))", + read={ + "spark": "SELECT explode(split('VZ_2469420', ','))", + }, + ) + + # 3. explode(split(...)), separator present → SPLIT preserved + self.validate_all( + "SELECT EXPLODE(SPLIT('VZ_2469420,', ','))", + read={ + "spark": "SELECT explode(split('VZ_2469420,', ','))", + }, + ) + + # 4. split without explode or map, separator absent → SPLIT preserved (not inside VarMap) + self.validate_all( + "SELECT SPLIT('hello', ',')", + read={ + "spark": "SELECT split('hello', ',')", + }, + ) + + # 5. split with 3 arguments → SPLIT preserved + self.validate_all( + "SELECT SPLIT('a,b,c', ',', 2)", + read={ + "spark": "SELECT split('a,b,c', ',', 2)", + }, + ) + + # 6. regexp_split inside explode → SPLIT preserved + self.validate_all( + "SELECT SPLIT('hello world', '\\\\s+')", + read={ + "postgres": "SELECT regexp_split('hello world', '\\s+')", + }, + )