-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-51441][SQL] Add DSv2 APIs for constraints #50253
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
dc930c5
3711db7
3677054
2e61e44
9fec4e2
2cfa501
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,137 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.connector.catalog.constraints; | ||
|
|
||
| import java.util.StringJoiner; | ||
|
|
||
| import org.apache.spark.sql.connector.expressions.NamedReference; | ||
|
|
||
| abstract class BaseConstraint implements Constraint { | ||
|
|
||
| private final String name; | ||
| private final boolean enforced; | ||
| private final ValidationStatus validationStatus; | ||
| private final boolean rely; | ||
|
|
||
| protected BaseConstraint( | ||
| String name, | ||
| boolean enforced, | ||
| ValidationStatus validationStatus, | ||
| boolean rely) { | ||
| this.name = name; | ||
| this.enforced = enforced; | ||
| this.validationStatus = validationStatus; | ||
| this.rely = rely; | ||
| } | ||
|
|
||
| protected abstract String definition(); | ||
|
|
||
| @Override | ||
| public String name() { | ||
| return name; | ||
| } | ||
|
|
||
| @Override | ||
| public boolean enforced() { | ||
| return enforced; | ||
| } | ||
|
|
||
| @Override | ||
| public ValidationStatus validationStatus() { | ||
| return validationStatus; | ||
| } | ||
|
|
||
| @Override | ||
| public boolean rely() { | ||
| return rely; | ||
| } | ||
|
|
||
| @Override | ||
| public String toDDL() { | ||
| return String.format( | ||
| "CONSTRAINT %s %s %s %s %s", | ||
| name, | ||
| definition(), | ||
| enforced ? "ENFORCED" : "NOT ENFORCED", | ||
| validationStatus, | ||
| rely ? "RELY" : "NORELY"); | ||
| } | ||
|
|
||
| @Override | ||
| public String toString() { | ||
| return toDDL(); | ||
| } | ||
|
|
||
| protected String toDDL(NamedReference[] columns) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am sure it would be more descriptive. I feel |
||
| StringJoiner joiner = new StringJoiner(", "); | ||
|
|
||
| for (NamedReference column : columns) { | ||
| joiner.add(column.toString()); | ||
| } | ||
|
|
||
| return joiner.toString(); | ||
| } | ||
|
|
||
| abstract static class Builder<B, C> { | ||
| private final String name; | ||
| private boolean enforced = true; | ||
| private ValidationStatus validationStatus = ValidationStatus.UNVALIDATED; | ||
| private boolean rely = false; | ||
|
|
||
| Builder(String name) { | ||
| this.name = name; | ||
| } | ||
|
|
||
| protected abstract B self(); | ||
|
|
||
| public abstract C build(); | ||
|
|
||
| public String name() { | ||
| return name; | ||
| } | ||
|
|
||
| public B enforced(boolean enforced) { | ||
| this.enforced = enforced; | ||
| return self(); | ||
| } | ||
|
|
||
| public boolean enforced() { | ||
| return enforced; | ||
| } | ||
|
|
||
| public B validationStatus(ValidationStatus validationStatus) { | ||
| if (validationStatus != null) { | ||
| this.validationStatus = validationStatus; | ||
| } | ||
| return self(); | ||
| } | ||
|
|
||
| public ValidationStatus validationStatus() { | ||
| return validationStatus; | ||
| } | ||
|
|
||
| public B rely(boolean rely) { | ||
| this.rely = rely; | ||
| return self(); | ||
| } | ||
|
|
||
| public boolean rely() { | ||
| return rely; | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,134 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.connector.catalog.constraints; | ||
|
|
||
| import java.util.Map; | ||
| import java.util.Objects; | ||
|
|
||
| import org.apache.spark.SparkIllegalArgumentException; | ||
| import org.apache.spark.annotation.Evolving; | ||
| import org.apache.spark.sql.connector.expressions.filter.Predicate; | ||
|
|
||
| /** | ||
| * A CHECK constraint. | ||
| * <p> | ||
| * A CHECK constraint defines a condition each row in a table must satisfy. Connectors can define | ||
| * such constraints either in SQL (Spark SQL dialect) or using a {@link Predicate predicate} if the | ||
| * condition can be expressed using a supported expression. A CHECK constraint can reference one or | ||
| * more columns. Such constraint is considered violated if its condition evaluates to {@code FALSE}, | ||
| * but not {@code NULL}. The search condition must be deterministic and cannot contain subqueries | ||
| * and certain functions like aggregates or UDFs. | ||
| * <p> | ||
| * Spark supports enforced and not enforced CHECK constraints, allowing connectors to control | ||
| * whether data modifications that violate the constraint must fail. Each constraint is either | ||
| * valid (the existing data is guaranteed to satisfy the constraint), invalid (some records violate | ||
| * the constraint), or unvalidated (the validity is unknown). If the validity is unknown, Spark | ||
| * will check {@link #rely()} to see whether the constraint is believed to be true and can be used | ||
| * for query optimization. | ||
| * | ||
| * @since 4.1.0 | ||
| */ | ||
| @Evolving | ||
| public class Check extends BaseConstraint { | ||
|
||
|
|
||
| private final String predicateSql; | ||
| private final Predicate predicate; | ||
|
|
||
| private Check( | ||
| String name, | ||
| String predicateSql, | ||
| Predicate predicate, | ||
| boolean enforced, | ||
| ValidationStatus validationStatus, | ||
| boolean rely) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shall we put the parameters of base class first? And then the subclass's parameters.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It feels more natural to me to follow the order of importance/definition in SQL. |
||
| super(name, enforced, validationStatus, rely); | ||
| this.predicateSql = predicateSql; | ||
| this.predicate = predicate; | ||
| } | ||
|
|
||
| /** | ||
| * Returns the SQL representation of the search condition (Spark SQL dialect). | ||
| */ | ||
| public String predicateSql() { | ||
| return predicateSql; | ||
| } | ||
|
|
||
| /** | ||
| * Returns the search condition. | ||
| */ | ||
| public Predicate predicate() { | ||
| return predicate; | ||
| } | ||
|
|
||
| @Override | ||
| protected String definition() { | ||
| return String.format("CHECK (%s)", predicateSql != null ? predicateSql : predicate); | ||
| } | ||
|
|
||
| @Override | ||
| public boolean equals(Object other) { | ||
| if (this == other) return true; | ||
| if (other == null || getClass() != other.getClass()) return false; | ||
| Check that = (Check) other; | ||
| return Objects.equals(name(), that.name()) && | ||
| Objects.equals(predicateSql, that.predicateSql) && | ||
| Objects.equals(predicate, that.predicate) && | ||
| enforced() == that.enforced() && | ||
| Objects.equals(validationStatus(), that.validationStatus()) && | ||
| rely() == that.rely(); | ||
| } | ||
|
|
||
| @Override | ||
| public int hashCode() { | ||
| return Objects.hash(name(), predicateSql, predicate, enforced(), validationStatus(), rely()); | ||
| } | ||
|
|
||
| public static class Builder extends BaseConstraint.Builder<Builder, Check> { | ||
|
|
||
| private String predicateSql; | ||
| private Predicate predicate; | ||
|
|
||
| Builder(String name) { | ||
| super(name); | ||
| } | ||
|
|
||
| @Override | ||
| protected Builder self() { | ||
| return this; | ||
| } | ||
|
|
||
| public Builder predicateSql(String predicateSql) { | ||
| this.predicateSql = predicateSql; | ||
| return this; | ||
| } | ||
|
|
||
| public Builder predicate(Predicate predicate) { | ||
| this.predicate = predicate; | ||
| return this; | ||
| } | ||
|
|
||
| public Check build() { | ||
| if (predicateSql == null && predicate == null) { | ||
| throw new SparkIllegalArgumentException( | ||
| "INTERNAL_ERROR", | ||
| Map.of("message", "Predicate SQL and expression can't be both null in CHECK")); | ||
| } | ||
| return new Check(name(), predicateSql, predicate, enforced(), validationStatus(), rely()); | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,121 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.connector.catalog.constraints; | ||
|
|
||
| import org.apache.spark.annotation.Evolving; | ||
| import org.apache.spark.sql.connector.catalog.Identifier; | ||
| import org.apache.spark.sql.connector.expressions.NamedReference; | ||
|
|
||
| /** | ||
| * A constraint that restricts states of data in a table. | ||
| * | ||
| * @since 4.1.0 | ||
| */ | ||
| @Evolving | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. qq, why is this evolving and not others? Not sure the convention in Spark
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wasn't sure about annotating classes that implement this interface. |
||
| public interface Constraint { | ||
| /** | ||
| * Returns the name of this constraint. | ||
| */ | ||
| String name(); | ||
|
|
||
| /** | ||
| * Indicates whether this constraint is actively enforced. If enforced, data modifications | ||
| * that violate the constraint fail with a constraint violation error. | ||
| */ | ||
| boolean enforced(); | ||
|
|
||
| /** | ||
| * Indicates whether the existing data in the table satisfies this constraint. The constraint | ||
| * can be valid (the data is guaranteed to satisfy the constraint), invalid (some records violate | ||
| * the constraint), or unvalidated (the validity is unknown). The validation status is usually | ||
| * managed by the system and can't be modified by the user. | ||
| */ | ||
| ValidationStatus validationStatus(); | ||
|
|
||
| /** | ||
| * Indicates whether this constraint is assumed to hold true if the validity is unknown. Unlike | ||
| * the validation status, this flag is usually provided by the user as a hint to the system. | ||
| */ | ||
| boolean rely(); | ||
|
|
||
| /** | ||
| * Returns the definition of this constraint in the DDL format. | ||
| */ | ||
| String toDDL(); | ||
|
|
||
| /** | ||
| * Instantiates a builder for a CHECK constraint. | ||
| * | ||
| * @param name the constraint name | ||
| * @return a CHECK constraint builder | ||
| */ | ||
| static Check.Builder check(String name) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I personally prefer shorter names whenever the usage/context is obvious enough. This reads well to me and matches what we did for
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 with @aokolnychyi , |
||
| return new Check.Builder(name); | ||
| } | ||
|
|
||
| /** | ||
| * Instantiates a builder for a UNIQUE constraint. | ||
| * | ||
| * @param name the constraint name | ||
| * @param columns columns that comprise the unique key | ||
| * @return a UNIQUE constraint builder | ||
| */ | ||
| static Unique.Builder unique(String name, NamedReference[] columns) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as in CHECK. |
||
| return new Unique.Builder(name, columns); | ||
| } | ||
|
|
||
| /** | ||
| * Instantiates a builder for a PRIMARY KEY constraint. | ||
| * | ||
| * @param name the constraint name | ||
| * @param columns columns that comprise the primary key | ||
| * @return a PRIMARY KEY constraint builder | ||
| */ | ||
| static PrimaryKey.Builder primaryKey(String name, NamedReference[] columns) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as in CHECK. |
||
| return new PrimaryKey.Builder(name, columns); | ||
| } | ||
|
|
||
| /** | ||
| * Instantiates a builder for a FOREIGN KEY constraint. | ||
| * | ||
| * @param name the constraint name | ||
| * @param columns the referencing columns | ||
| * @param refTable the referenced table identifier | ||
| * @param refColumns the referenced columns in the referenced table | ||
| * @return a FOREIGN KEY constraint builder | ||
| */ | ||
| static ForeignKey.Builder foreignKey( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. createForeignKeyBuilder
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as in CHECK. |
||
| String name, | ||
| NamedReference[] columns, | ||
| Identifier refTable, | ||
| NamedReference[] refColumns) { | ||
| return new ForeignKey.Builder(name, columns, refTable, refColumns); | ||
| } | ||
|
|
||
| /** | ||
| * An indicator of the validity of the constraint. | ||
| * <p> | ||
| * A constraint may be validated independently of enforcement, meaning it can be validated | ||
| * without being actively enforced, or vice versa. A constraint can be valid (the data is | ||
| * guaranteed to satisfy the constraint), invalid (some records violate the constraint), | ||
| * or unvalidated (the validity is unknown). | ||
| */ | ||
| enum ValidationStatus { | ||
| VALID, INVALID, UNVALIDATED | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.