diff --git a/sqlglot/dialects/e6.py b/sqlglot/dialects/e6.py index 039c21f30c..91355410bb 100644 --- a/sqlglot/dialects/e6.py +++ b/sqlglot/dialects/e6.py @@ -2642,12 +2642,13 @@ def split_sql(self, expression: exp.Split | exp.RegexpSplit): this = expression.this delimitter = expression.expression if ( - this + expression.find_ancestor(exp.VarMap) + and this and delimitter and this.is_string and delimitter.is_string - and delimitter.this not in this.this - and not len(expression.args) == 3 + # and delimitter.this not in this.this + # and not len(expression.args) == 3 ): return f"{this}" return rename_func("SPLIT")(self, expression) diff --git a/tests/dialects/test_e6.py b/tests/dialects/test_e6.py index ce7b41b873..9f23fca7fb 100644 --- a/tests/dialects/test_e6.py +++ b/tests/dialects/test_e6.py @@ -3152,3 +3152,52 @@ def test_formatting_preservation(self): # Result should have the columns (works with both tokenizers) self.assertIn("col1", result_spaces) self.assertIn("col2", result_spaces) + + def test_split_sql(self): + # 1. split inside MAP, separator absent → SPLIT stripped, plain string returned + self.validate_all( + "SELECT MAP[ARRAY['test'],ARRAY['-18000']]", + read={ + "databricks": "SELECT map(split('test',','), split('-18000',','))", + }, + ) + + # 2. explode(split(...)), separator absent → SPLIT preserved + self.validate_all( + "SELECT EXPLODE(SPLIT('VZ_2469420', ','))", + read={ + "spark": "SELECT explode(split('VZ_2469420', ','))", + }, + ) + + # 3. explode(split(...)), separator present → SPLIT preserved + self.validate_all( + "SELECT EXPLODE(SPLIT('VZ_2469420,', ','))", + read={ + "spark": "SELECT explode(split('VZ_2469420,', ','))", + }, + ) + + # 4. split without explode or map, separator absent → SPLIT preserved (not inside VarMap) + self.validate_all( + "SELECT SPLIT('hello', ',')", + read={ + "spark": "SELECT split('hello', ',')", + }, + ) + + # 5. split with 3 arguments → SPLIT preserved + self.validate_all( + "SELECT SPLIT('a,b,c', ',', 2)", + read={ + "spark": "SELECT split('a,b,c', ',', 2)", + }, + ) + + # 6. regexp_split inside explode → SPLIT preserved + self.validate_all( + "SELECT SPLIT('hello world', '\\\\s+')", + read={ + "postgres": "SELECT regexp_split('hello world', '\\s+')", + }, + )