From c2b7c7726d8fc250929c3931326f2b195f8ce232 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Wed, 11 Jun 2025 13:13:33 +0300 Subject: [PATCH 1/2] Feat(spark): support ALTER ADD PARTITION --- sqlglot/expressions.py | 4 ++++ sqlglot/generator.py | 3 +++ sqlglot/parser.py | 13 ++++++++++--- tests/dialects/test_spark.py | 1 + 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/sqlglot/expressions.py b/sqlglot/expressions.py index 1003ae5faa..f1a927a7c1 100644 --- a/sqlglot/expressions.py +++ b/sqlglot/expressions.py @@ -4926,6 +4926,10 @@ class AddConstraint(Expression): arg_types = {"expressions": True} +class AddPartition(Expression): + pass + + class AttachOption(Expression): arg_types = {"this": True, "expression": False} diff --git a/sqlglot/generator.py b/sqlglot/generator.py index a9e0545282..7c5f046395 100644 --- a/sqlglot/generator.py +++ b/sqlglot/generator.py @@ -3498,6 +3498,9 @@ def droppartition_sql(self, expression: exp.DropPartition) -> str: def addconstraint_sql(self, expression: exp.AddConstraint) -> str: return f"ADD {self.expressions(expression)}" + def addpartition_sql(self, expression: exp.AddPartition) -> str: + return f"ADD {self.sql(expression.this)}" + def distinct_sql(self, expression: exp.Distinct) -> str: this = self.expressions(expression, flat=True) diff --git a/sqlglot/parser.py b/sqlglot/parser.py index 04c1703aff..e67e954e58 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -7368,13 +7368,20 @@ def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPart ) def _parse_alter_table_add(self) -> t.List[exp.Expression]: - def _parse_add_column_or_constraint(): + def _parse_add_alteration() -> t.Optional[exp.Expression]: self._match_text_seq("ADD") if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): return self.expression( exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) ) - return self._parse_add_column() + + is_partition = self._match(TokenType.PARTITION, advance=False) + field = self._parse_add_column() + + if is_partition: + return self.expression(exp.AddPartition, this=field) + + return field if not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN or self._match_text_seq( "COLUMNS" @@ -7383,7 +7390,7 @@ def _parse_add_column_or_constraint(): return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) - return self._parse_csv(_parse_add_column_or_constraint) + return self._parse_csv(_parse_add_alteration) def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: if self._match_texts(self.ALTER_ALTER_PARSERS): diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py index 2ff616cfe9..fec7d41311 100644 --- a/tests/dialects/test_spark.py +++ b/tests/dialects/test_spark.py @@ -247,6 +247,7 @@ def test_spark(self): "REFRESH TABLE t", ) + self.validate_identity("ALTER TABLE foo ADD PARTITION(event = 'click')") self.validate_identity("IF(cond, foo AS bar, bla AS baz)") self.validate_identity("any_value(col, true)", "ANY_VALUE(col) IGNORE NULLS") self.validate_identity("first(col, true)", "FIRST(col) IGNORE NULLS") From 434b86cc6e932f27c499e5ef2665c831a29000a4 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Wed, 11 Jun 2025 13:41:46 +0300 Subject: [PATCH 2/2] PR feedback --- sqlglot/expressions.py | 2 +- sqlglot/generator.py | 3 ++- sqlglot/parser.py | 39 ++++++++++++++++++++++-------------- tests/dialects/test_spark.py | 1 + 4 files changed, 28 insertions(+), 17 deletions(-) diff --git a/sqlglot/expressions.py b/sqlglot/expressions.py index f1a927a7c1..4f879753a6 100644 --- a/sqlglot/expressions.py +++ b/sqlglot/expressions.py @@ -4927,7 +4927,7 @@ class AddConstraint(Expression): class AddPartition(Expression): - pass + arg_types = {"this": True, "exists": False} class AttachOption(Expression): diff --git a/sqlglot/generator.py b/sqlglot/generator.py index 7c5f046395..458256eb5b 100644 --- a/sqlglot/generator.py +++ b/sqlglot/generator.py @@ -3499,7 +3499,8 @@ def addconstraint_sql(self, expression: exp.AddConstraint) -> str: return f"ADD {self.expressions(expression)}" def addpartition_sql(self, expression: exp.AddPartition) -> str: - return f"ADD {self.sql(expression.this)}" + exists = "IF NOT EXISTS " if expression.args.get("exists") else "" + return f"ADD {exists}{self.sql(expression.this)}" def distinct_sql(self, expression: exp.Distinct) -> str: this = self.expressions(expression, flat=True) diff --git a/sqlglot/parser.py b/sqlglot/parser.py index e67e954e58..acc18039ed 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -7334,24 +7334,29 @@ def _parse_refresh(self) -> exp.Refresh: self._match(TokenType.TABLE) return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) - def _parse_add_column(self) -> t.Optional[exp.Expression]: + def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: if not self._prev.text.upper() == "ADD": return None + start = self._index self._match(TokenType.COLUMN) + exists_column = self._parse_exists(not_=True) expression = self._parse_field_def() - if expression: - expression.set("exists", exists_column) + if not isinstance(expression, exp.ColumnDef): + self._retreat(start) + return None - # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns - if self._match_texts(("FIRST", "AFTER")): - position = self._prev.text - column_position = self.expression( - exp.ColumnPosition, this=self._parse_column(), position=position - ) - expression.set("position", column_position) + expression.set("exists", exists_column) + + # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns + if self._match_texts(("FIRST", "AFTER")): + position = self._prev.text + column_position = self.expression( + exp.ColumnPosition, this=self._parse_column(), position=position + ) + expression.set("position", column_position) return expression @@ -7375,13 +7380,17 @@ def _parse_add_alteration() -> t.Optional[exp.Expression]: exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) ) - is_partition = self._match(TokenType.PARTITION, advance=False) - field = self._parse_add_column() + column_def = self._parse_add_column() + if isinstance(column_def, exp.ColumnDef): + return column_def - if is_partition: - return self.expression(exp.AddPartition, this=field) + exists = self._parse_exists(not_=True) + if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): + return self.expression( + exp.AddPartition, exists=exists, this=self._parse_field(any_token=True) + ) - return field + return None if not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN or self._match_text_seq( "COLUMNS" diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py index fec7d41311..75e059b145 100644 --- a/tests/dialects/test_spark.py +++ b/tests/dialects/test_spark.py @@ -248,6 +248,7 @@ def test_spark(self): ) self.validate_identity("ALTER TABLE foo ADD PARTITION(event = 'click')") + self.validate_identity("ALTER TABLE foo ADD IF NOT EXISTS PARTITION(event = 'click')") self.validate_identity("IF(cond, foo AS bar, bla AS baz)") self.validate_identity("any_value(col, true)", "ANY_VALUE(col) IGNORE NULLS") self.validate_identity("first(col, true)", "FIRST(col) IGNORE NULLS")