Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.connector.catalog.constraints;

import java.util.StringJoiner;

import org.apache.spark.sql.connector.expressions.NamedReference;

abstract class BaseConstraint implements Constraint {

private final String name;
private final boolean enforced;
private final ValidationStatus validationStatus;
private final boolean rely;

protected BaseConstraint(
String name,
boolean enforced,
ValidationStatus validationStatus,
boolean rely) {
this.name = name;
this.enforced = enforced;
this.validationStatus = validationStatus;
this.rely = rely;
}

protected abstract String definition();

@Override
public String name() {
return name;
}

@Override
public boolean enforced() {
return enforced;
}

@Override
public ValidationStatus validationStatus() {
return validationStatus;
}

@Override
public boolean rely() {
return rely;
}

@Override
public String toDDL() {
return String.format(
"CONSTRAINT %s %s %s %s %s",
name,
definition(),
enforced ? "ENFORCED" : "NOT ENFORCED",
validationStatus,
rely ? "RELY" : "NORELY");
}

@Override
public String toString() {
return toDDL();
}

protected String toDDL(NamedReference[] columns) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about joinColumns ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am sure it would be more descriptive. I feel toDDL makes sense as it formats columns to be used in DDL.

StringJoiner joiner = new StringJoiner(", ");

for (NamedReference column : columns) {
joiner.add(column.toString());
}

return joiner.toString();
}

abstract static class Builder<B, C> {
private final String name;
private boolean enforced = true;
private ValidationStatus validationStatus = ValidationStatus.UNVALIDATED;
private boolean rely = false;

Builder(String name) {
this.name = name;
}

protected abstract B self();

public abstract C build();

public String name() {
return name;
}

public B enforced(boolean enforced) {
this.enforced = enforced;
return self();
}

public boolean enforced() {
return enforced;
}

public B validationStatus(ValidationStatus validationStatus) {
if (validationStatus != null) {
this.validationStatus = validationStatus;
}
return self();
}

public ValidationStatus validationStatus() {
return validationStatus;
}

public B rely(boolean rely) {
this.rely = rely;
return self();
}

public boolean rely() {
return rely;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.connector.catalog.constraints;

import java.util.Map;
import java.util.Objects;

import org.apache.spark.SparkIllegalArgumentException;
import org.apache.spark.annotation.Evolving;
import org.apache.spark.sql.connector.expressions.filter.Predicate;

/**
* A CHECK constraint.
* <p>
* A CHECK constraint defines a condition each row in a table must satisfy. Connectors can define
* such constraints either in SQL (Spark SQL dialect) or using a {@link Predicate predicate} if the
* condition can be expressed using a supported expression. A CHECK constraint can reference one or
* more columns. Such constraint is considered violated if its condition evaluates to {@code FALSE},
* but not {@code NULL}. The search condition must be deterministic and cannot contain subqueries
* and certain functions like aggregates or UDFs.
* <p>
* Spark supports enforced and not enforced CHECK constraints, allowing connectors to control
* whether data modifications that violate the constraint must fail. Each constraint is either
* valid (the existing data is guaranteed to satisfy the constraint), invalid (some records violate
* the constraint), or unvalidated (the validity is unknown). If the validity is unknown, Spark
* will check {@link #rely()} to see whether the constraint is believed to be true and can be used
* for query optimization.
*
* @since 4.1.0
*/
@Evolving
public class Check extends BaseConstraint {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we override the enforced()/rely()/validationStatus() method or variable? It will help developers to understand them.
Otherwise, the default values will only exist in docs and spark internal code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure it is a good idea. Those methods existing in the parent interface. The method will show up in Javadoc and IDE will suggest them as well. I am worried about all code duplication because of this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a compromise, we can add more Javadoc at the top of this class. What do you think?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding more java doc seems ok.
If we change the default value in these classes, it can make the internal implementation simpler too. For example, we don't need to have the default ConstraintCharacteristic for each internal parsed constraints https://github.com/gengliangwang/spark/pull/13/files#diff-03507453aabc732a7e3efadc81cd840e436a392b1c62d5aa50647266bc3a9199R38

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added more Javadoc.

If we change the default value in these classes, it can make the internal implementation simpler too. For example, we don't need to have the default ConstraintCharacteristic for each internal parsed constraints

Let's re-evaluate this in the implementation PR. I think we can either remove the common builder for constraints or handle this in the parser.


private final String predicateSql;
private final Predicate predicate;

private Check(
String name,
String predicateSql,
Predicate predicate,
boolean enforced,
ValidationStatus validationStatus,
boolean rely) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we put the parameters of base class first? And then the subclass's parameters.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It feels more natural to me to follow the order of importance/definition in SQL.

CONSTRAINT name CHECK (predicate) [NOT] ENFORCED [NO]RELY

super(name, enforced, validationStatus, rely);
this.predicateSql = predicateSql;
this.predicate = predicate;
}

/**
* Returns the SQL representation of the search condition (Spark SQL dialect).
*/
public String predicateSql() {
return predicateSql;
}

/**
* Returns the search condition.
*/
public Predicate predicate() {
return predicate;
}

@Override
protected String definition() {
return String.format("CHECK (%s)", predicateSql != null ? predicateSql : predicate);
}

@Override
public boolean equals(Object other) {
if (this == other) return true;
if (other == null || getClass() != other.getClass()) return false;
Check that = (Check) other;
return Objects.equals(name(), that.name()) &&
Objects.equals(predicateSql, that.predicateSql) &&
Objects.equals(predicate, that.predicate) &&
enforced() == that.enforced() &&
Objects.equals(validationStatus(), that.validationStatus()) &&
rely() == that.rely();
}

@Override
public int hashCode() {
return Objects.hash(name(), predicateSql, predicate, enforced(), validationStatus(), rely());
}

public static class Builder extends BaseConstraint.Builder<Builder, Check> {

private String predicateSql;
private Predicate predicate;

Builder(String name) {
super(name);
}

@Override
protected Builder self() {
return this;
}

public Builder predicateSql(String predicateSql) {
this.predicateSql = predicateSql;
return this;
}

public Builder predicate(Predicate predicate) {
this.predicate = predicate;
return this;
}

public Check build() {
if (predicateSql == null && predicate == null) {
throw new SparkIllegalArgumentException(
"INTERNAL_ERROR",
Map.of("message", "Predicate SQL and expression can't be both null in CHECK"));
}
return new Check(name(), predicateSql, predicate, enforced(), validationStatus(), rely());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.connector.catalog.constraints;

import org.apache.spark.annotation.Evolving;
import org.apache.spark.sql.connector.catalog.Identifier;
import org.apache.spark.sql.connector.expressions.NamedReference;

/**
* A constraint that restricts states of data in a table.
*
* @since 4.1.0
*/
@Evolving
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

qq, why is this evolving and not others? Not sure the convention in Spark

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't sure about annotating classes that implement this interface.
I added to be safe, however. Should be everywhere now.

public interface Constraint {
/**
* Returns the name of this constraint.
*/
String name();

/**
* Indicates whether this constraint is actively enforced. If enforced, data modifications
* that violate the constraint fail with a constraint violation error.
*/
boolean enforced();

/**
* Indicates whether the existing data in the table satisfies this constraint. The constraint
* can be valid (the data is guaranteed to satisfy the constraint), invalid (some records violate
* the constraint), or unvalidated (the validity is unknown). The validation status is usually
* managed by the system and can't be modified by the user.
*/
ValidationStatus validationStatus();

/**
* Indicates whether this constraint is assumed to hold true if the validity is unknown. Unlike
* the validation status, this flag is usually provided by the user as a hint to the system.
*/
boolean rely();

/**
* Returns the definition of this constraint in the DDL format.
*/
String toDDL();

/**
* Instantiates a builder for a CHECK constraint.
*
* @param name the constraint name
* @return a CHECK constraint builder
*/
static Check.Builder check(String name) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about createCheckBuilder ?

Copy link
Contributor Author

@aokolnychyi aokolnychyi Mar 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I personally prefer shorter names whenever the usage/context is obvious enough.

Constraint.check("con1").predicateSql("id > 0").enforced(true).build();

This reads well to me and matches what we did for ProcedureParameter.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 with @aokolnychyi , check is neat

return new Check.Builder(name);
}

/**
* Instantiates a builder for a UNIQUE constraint.
*
* @param name the constraint name
* @param columns columns that comprise the unique key
* @return a UNIQUE constraint builder
*/
static Unique.Builder unique(String name, NamedReference[] columns) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

createUniqueBuilder ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as in CHECK.

return new Unique.Builder(name, columns);
}

/**
* Instantiates a builder for a PRIMARY KEY constraint.
*
* @param name the constraint name
* @param columns columns that comprise the primary key
* @return a PRIMARY KEY constraint builder
*/
static PrimaryKey.Builder primaryKey(String name, NamedReference[] columns) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

createPrimaryKeyBuilder

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as in CHECK.

return new PrimaryKey.Builder(name, columns);
}

/**
* Instantiates a builder for a FOREIGN KEY constraint.
*
* @param name the constraint name
* @param columns the referencing columns
* @param refTable the referenced table identifier
* @param refColumns the referenced columns in the referenced table
* @return a FOREIGN KEY constraint builder
*/
static ForeignKey.Builder foreignKey(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

createForeignKeyBuilder

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as in CHECK.

String name,
NamedReference[] columns,
Identifier refTable,
NamedReference[] refColumns) {
return new ForeignKey.Builder(name, columns, refTable, refColumns);
}

/**
* An indicator of the validity of the constraint.
* <p>
* A constraint may be validated independently of enforcement, meaning it can be validated
* without being actively enforced, or vice versa. A constraint can be valid (the data is
* guaranteed to satisfy the constraint), invalid (some records violate the constraint),
* or unvalidated (the validity is unknown).
*/
enum ValidationStatus {
VALID, INVALID, UNVALIDATED
}
}
Loading