From 657368091157286c8945ba330be4fe0eb58f333f Mon Sep 17 00:00:00 2001 From: Ben Bellick Date: Wed, 22 Oct 2025 11:13:07 -0400 Subject: [PATCH 1/7] test: introduce test to capture bug #562 This commit introduces a test to capture the bug where name-colliding extension functions with different URNs cause an exception to be thrown on `FunctionConverter` construction. --- .../isthmus/DuplicateFunctionUrnTest.java | 73 +++++++++++++++++++ .../extensions/functions_duplicate_urn1.yaml | 25 +++++++ .../extensions/functions_duplicate_urn2.yaml | 25 +++++++ 3 files changed, 123 insertions(+) create mode 100644 isthmus/src/test/java/io/substrait/isthmus/DuplicateFunctionUrnTest.java create mode 100644 isthmus/src/test/resources/extensions/functions_duplicate_urn1.yaml create mode 100644 isthmus/src/test/resources/extensions/functions_duplicate_urn2.yaml diff --git a/isthmus/src/test/java/io/substrait/isthmus/DuplicateFunctionUrnTest.java b/isthmus/src/test/java/io/substrait/isthmus/DuplicateFunctionUrnTest.java new file mode 100644 index 000000000..156eafd95 --- /dev/null +++ b/isthmus/src/test/java/io/substrait/isthmus/DuplicateFunctionUrnTest.java @@ -0,0 +1,73 @@ +package io.substrait.isthmus; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import io.substrait.extension.SimpleExtension; +import io.substrait.isthmus.expression.AggregateFunctionConverter; +import io.substrait.isthmus.expression.ScalarFunctionConverter; +import io.substrait.isthmus.expression.WindowFunctionConverter; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.List; +import org.junit.jupiter.api.Test; + +/** Tests to reproduce #562 */ +public class DuplicateFunctionUrnTest extends PlanTestBase { + + static final SimpleExtension.ExtensionCollection collection; + + static { + try { + String extensions1 = asString("extensions/functions_duplicate_urn1.yaml"); + String extensions2 = asString("extensions/functions_duplicate_urn2.yaml"); + SimpleExtension.ExtensionCollection collection1 = + SimpleExtension.load("urn1://functions", extensions1); + SimpleExtension.ExtensionCollection collection2 = + SimpleExtension.load("urn2://functions", extensions2); + collection = collection1.merge(collection2); + + // Verify that the merged collection contains duplicate functions with different URNs + // This is a precondition for the tests - if this fails, the tests don't make sense + List ltrimFunctions = + collection.scalarFunctions().stream().filter(f -> f.name().equals("ltrim")).toList(); + + if (ltrimFunctions.size() != 2) { + throw new IllegalStateException( + "Expected 2 ltrim functions in merged collection, but found: " + ltrimFunctions.size()); + } + + String urn1 = ltrimFunctions.get(0).getAnchor().urn(); + String urn2 = ltrimFunctions.get(1).getAnchor().urn(); + if (urn1.equals(urn2)) { + throw new IllegalStateException( + "Expected different URNs for the two ltrim functions, but both were: " + urn1); + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Test + void testDuplicateFunctionWithDifferentUrns() { + ScalarFunctionConverter converter = + new ScalarFunctionConverter(collection.scalarFunctions(), typeFactory); + + assertNotNull(converter); + } + + @Test + void testDuplicateAggregateFunctionWithDifferentUrns() { + AggregateFunctionConverter converter = + new AggregateFunctionConverter(collection.aggregateFunctions(), typeFactory); + + assertNotNull(converter); + } + + @Test + void testDuplicateWindowFunctionWithDifferentUrns() { + WindowFunctionConverter converter = + new WindowFunctionConverter(collection.windowFunctions(), typeFactory); + + assertNotNull(converter); + } +} diff --git a/isthmus/src/test/resources/extensions/functions_duplicate_urn1.yaml b/isthmus/src/test/resources/extensions/functions_duplicate_urn1.yaml new file mode 100644 index 000000000..c867441a2 --- /dev/null +++ b/isthmus/src/test/resources/extensions/functions_duplicate_urn1.yaml @@ -0,0 +1,25 @@ +%YAML 1.2 +--- +urn: extension:io.substrait:functions_string + +scalar_functions: + - name: "ltrim" + description: "left trim from standard functions" + impls: + - args: + - name: str + value: string + - name: chars + value: string + return: string + - name: "concat" + description: "concatenate strings" + impls: + - args: + - name: str1 + value: string + - name: str2 + value: string + variadic: + min: 0 + return: string diff --git a/isthmus/src/test/resources/extensions/functions_duplicate_urn2.yaml b/isthmus/src/test/resources/extensions/functions_duplicate_urn2.yaml new file mode 100644 index 000000000..85d9d052d --- /dev/null +++ b/isthmus/src/test/resources/extensions/functions_duplicate_urn2.yaml @@ -0,0 +1,25 @@ +%YAML 1.2 +--- +urn: extension:com.domain:string + +scalar_functions: + - name: "ltrim" + description: "left trim from custom domain" + impls: + - args: + - name: str + value: string + - name: chars + value: string + return: string + - name: "concat" + description: "concatenate strings from custom domain" + impls: + - args: + - name: str1 + value: string + - name: str2 + value: string + variadic: + min: 0 + return: string From 1dc0ec94ef36c34980ac7878bc02e64f692926c2 Mon Sep 17 00:00:00 2001 From: Ben Bellick Date: Wed, 22 Oct 2025 11:14:08 -0400 Subject: [PATCH 2/7] refactor: rename alm to nameToFnMap in FunctionConverter This is a small change to make the meaning of a variable clearer in the `FunctionConverter.java` implementation. --- .../isthmus/expression/FunctionConverter.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java index 851230a36..7ef173a2e 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java @@ -75,9 +75,9 @@ public FunctionConverter( this.typeFactory = typeFactory; this.substraitFuncKeyToSqlOperatorMap = ArrayListMultimap.create(); - ArrayListMultimap alm = ArrayListMultimap.create(); + ArrayListMultimap nameToFn = ArrayListMultimap.create(); for (F f : functions) { - alm.put(f.name().toLowerCase(Locale.ROOT), f); + nameToFn.put(f.name().toLowerCase(Locale.ROOT), f); } Multimap calciteOperators = @@ -87,21 +87,21 @@ public FunctionConverter( FunctionMappings.Sig::name, Function.identity(), ArrayListMultimap::create)); IdentityHashMap matcherMap = new IdentityHashMap(); - for (String key : alm.keySet()) { + for (String key : nameToFn.keySet()) { Collection sigs = calciteOperators.get(key); if (sigs.isEmpty()) { LOGGER.atDebug().log("No binding for function: {}", key); } for (Sig sig : sigs) { - List implList = alm.get(key); + List implList = nameToFn.get(key); if (!implList.isEmpty()) { matcherMap.put(sig.operator(), new FunctionFinder(key, sig.operator(), implList)); } } } - for (Entry entry : alm.entries()) { + for (Entry entry : nameToFn.entries()) { String key = entry.getKey(); F func = entry.getValue(); for (FunctionMappings.Sig sig : calciteOperators.get(key)) { From f97fde7e18484266a6c2401f5c02d2849d922491 Mon Sep 17 00:00:00 2001 From: Ben Bellick Date: Wed, 22 Oct 2025 14:59:14 -0400 Subject: [PATCH 3/7] fix: allow multiple same-named functions in FunctionConverter --- .../isthmus/expression/FunctionConverter.java | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java index 7ef173a2e..d2bedc53c 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java @@ -2,7 +2,8 @@ import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableListMultimap; +import com.google.common.collect.ListMultimap; import com.google.common.collect.Multimap; import com.google.common.collect.Multimaps; import com.google.common.collect.Streams; @@ -155,7 +156,7 @@ protected class FunctionFinder { private final String substraitName; private final SqlOperator operator; private final List functions; - private final Map directMap; + private final ListMultimap directMap; private final Optional> singularInputType; private final Util.IntRange argRange; @@ -168,7 +169,7 @@ public FunctionFinder(String substraitName, SqlOperator operator, List functi functions.stream().mapToInt(t -> t.getRange().getStartInclusive()).min().getAsInt(), functions.stream().mapToInt(t -> t.getRange().getEndExclusive()).max().getAsInt()); this.singularInputType = getSingularInputType(functions); - ImmutableMap.Builder directMap = ImmutableMap.builder(); + ImmutableListMultimap.Builder directMap = ImmutableListMultimap.builder(); for (F func : functions) { String key = func.key(); directMap.put(key, func); @@ -349,6 +350,9 @@ public Optional attemptMatch(C call, Function topLevelCo * Not enough context here to construct a substrait EnumArg. * Once a FunctionVariant is resolved we can map the String Literal * to a EnumArg. + * + * Note that if there are multiple registered function extensions which can match a particular Call, + * the last one added to the extension collection will be matched. */ List operandsList = call.getOperands().collect(Collectors.toList()); List operands = @@ -369,7 +373,13 @@ public Optional attemptMatch(C call, Function topLevelCo .findFirst(); if (directMatchKey.isPresent()) { - F variant = directMap.get(directMatchKey.get()); + List variants = directMap.get(directMatchKey.get()); + if (variants.isEmpty()) { + + return Optional.empty(); + } + + F variant = variants.get(variants.size() - 1); variant.validateOutputType(operands, outputType); List funcArgs = IntStream.range(0, operandsList.size()) From 03b0f241bfae024b0ae689ef7b7bd641d661979d Mon Sep 17 00:00:00 2001 From: Ben Bellick Date: Wed, 22 Oct 2025 15:10:35 -0400 Subject: [PATCH 4/7] test: that order added determines precedence in FunctionConverter --- .../isthmus/DuplicateFunctionUrnTest.java | 63 +++++++++++++++++-- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/isthmus/src/test/java/io/substrait/isthmus/DuplicateFunctionUrnTest.java b/isthmus/src/test/java/io/substrait/isthmus/DuplicateFunctionUrnTest.java index 156eafd95..24e9bb792 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/DuplicateFunctionUrnTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/DuplicateFunctionUrnTest.java @@ -1,7 +1,9 @@ package io.substrait.isthmus; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; +import io.substrait.expression.Expression; import io.substrait.extension.SimpleExtension; import io.substrait.isthmus.expression.AggregateFunctionConverter; import io.substrait.isthmus.expression.ScalarFunctionConverter; @@ -9,21 +11,26 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.util.List; +import java.util.Optional; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.junit.jupiter.api.Test; /** Tests to reproduce #562 */ public class DuplicateFunctionUrnTest extends PlanTestBase { + static final SimpleExtension.ExtensionCollection collection1; + static final SimpleExtension.ExtensionCollection collection2; static final SimpleExtension.ExtensionCollection collection; static { try { String extensions1 = asString("extensions/functions_duplicate_urn1.yaml"); String extensions2 = asString("extensions/functions_duplicate_urn2.yaml"); - SimpleExtension.ExtensionCollection collection1 = - SimpleExtension.load("urn1://functions", extensions1); - SimpleExtension.ExtensionCollection collection2 = - SimpleExtension.load("urn2://functions", extensions2); + collection1 = SimpleExtension.load("urn1://functions", extensions1); + collection2 = SimpleExtension.load("urn2://functions", extensions2); collection = collection1.merge(collection2); // Verify that the merged collection contains duplicate functions with different URNs @@ -70,4 +77,52 @@ void testDuplicateWindowFunctionWithDifferentUrns() { assertNotNull(converter); } + + @Test + void testMergeOrderDeterminesFunctionPrecedence() { + // This test verifies that when multiple extension collections contain functions with + // the same name and signature but different URNs, the merge order determines precedence. + // The FunctionConverter uses a "last-wins" strategy: the last function added to the + // extension collection will be matched when converting from Calcite to Substrait. + + SimpleExtension.ExtensionCollection reverseCollection = collection2.merge(collection1); + ScalarFunctionConverter converterA = + new ScalarFunctionConverter(collection.scalarFunctions(), typeFactory); + ScalarFunctionConverter converterB = + new ScalarFunctionConverter(reverseCollection.scalarFunctions(), typeFactory); + + RexBuilder rexBuilder = new RexBuilder(typeFactory); + RexNode arg1 = rexBuilder.makeLiteral("hello"); + RexNode arg2 = rexBuilder.makeLiteral("world"); + RexCall concatCall = (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.CONCAT, arg1, arg2); + + // Create a simple topLevelConverter that converts literals to Substrait expressions + java.util.function.Function topLevelConverter = + rexNode -> { + if (rexNode instanceof org.apache.calcite.rex.RexLiteral) { + org.apache.calcite.rex.RexLiteral lit = (org.apache.calcite.rex.RexLiteral) rexNode; + return Expression.StrLiteral.builder() + .value(lit.getValueAs(String.class)) + .nullable(false) + .build(); + } + throw new UnsupportedOperationException("Only literals supported in test"); + }; + + Optional exprA = converterA.convert(concatCall, topLevelConverter); + Optional exprB = converterB.convert(concatCall, topLevelConverter); + + Expression.ScalarFunctionInvocation funcA = (Expression.ScalarFunctionInvocation) exprA.get(); + Expression.ScalarFunctionInvocation funcB = (Expression.ScalarFunctionInvocation) exprB.get(); + + assertEquals( + "extension:com.domain:string", + funcA.declaration().getAnchor().urn(), + "converterA should use last concat function (from collection2)"); + + assertEquals( + "extension:io.substrait:functions_string", + funcB.declaration().getAnchor().urn(), + "converterB should use last concat function (from collection1)"); + } } From afa6fdf5749e64225c4aa4bbdb9d0e96466da614 Mon Sep 17 00:00:00 2001 From: Ben Bellick Date: Wed, 22 Oct 2025 15:59:19 -0400 Subject: [PATCH 5/7] test: simplify tests and add special test for overlapping default ext Added a test for ltrim to ensure there is no issue specifically with the default extension collection functions (which have some special handling in isthmus) and an introduced ltrim function. --- .../isthmus/DuplicateFunctionUrnTest.java | 97 +++++++++++++++---- .../extensions/functions_duplicate_urn1.yaml | 9 -- 2 files changed, 78 insertions(+), 28 deletions(-) diff --git a/isthmus/src/test/java/io/substrait/isthmus/DuplicateFunctionUrnTest.java b/isthmus/src/test/java/io/substrait/isthmus/DuplicateFunctionUrnTest.java index 24e9bb792..8c6534160 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/DuplicateFunctionUrnTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/DuplicateFunctionUrnTest.java @@ -16,6 +16,7 @@ import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.fun.SqlTrimFunction.Flag; import org.junit.jupiter.api.Test; /** Tests to reproduce #562 */ @@ -33,21 +34,22 @@ public class DuplicateFunctionUrnTest extends PlanTestBase { collection2 = SimpleExtension.load("urn2://functions", extensions2); collection = collection1.merge(collection2); - // Verify that the merged collection contains duplicate functions with different URNs + // Verify that the merged collection contains duplicate concat functions with different URNs // This is a precondition for the tests - if this fails, the tests don't make sense - List ltrimFunctions = - collection.scalarFunctions().stream().filter(f -> f.name().equals("ltrim")).toList(); + List concatFunctions = + collection.scalarFunctions().stream().filter(f -> f.name().equals("concat")).toList(); - if (ltrimFunctions.size() != 2) { + if (concatFunctions.size() != 2) { throw new IllegalStateException( - "Expected 2 ltrim functions in merged collection, but found: " + ltrimFunctions.size()); + "Expected 2 concat functions in merged collection, but found: " + + concatFunctions.size()); } - String urn1 = ltrimFunctions.get(0).getAnchor().urn(); - String urn2 = ltrimFunctions.get(1).getAnchor().urn(); + String urn1 = concatFunctions.get(0).getAnchor().urn(); + String urn2 = concatFunctions.get(1).getAnchor().urn(); if (urn1.equals(urn2)) { throw new IllegalStateException( - "Expected different URNs for the two ltrim functions, but both were: " + urn1); + "Expected different URNs for the two concat functions, but both were: " + urn1); } } catch (IOException e) { throw new UncheckedIOException(e); @@ -92,21 +94,21 @@ void testMergeOrderDeterminesFunctionPrecedence() { new ScalarFunctionConverter(reverseCollection.scalarFunctions(), typeFactory); RexBuilder rexBuilder = new RexBuilder(typeFactory); - RexNode arg1 = rexBuilder.makeLiteral("hello"); - RexNode arg2 = rexBuilder.makeLiteral("world"); - RexCall concatCall = (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.CONCAT, arg1, arg2); + RexCall concatCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CONCAT, + rexBuilder.makeLiteral("hello"), + rexBuilder.makeLiteral("world")); // Create a simple topLevelConverter that converts literals to Substrait expressions java.util.function.Function topLevelConverter = rexNode -> { - if (rexNode instanceof org.apache.calcite.rex.RexLiteral) { - org.apache.calcite.rex.RexLiteral lit = (org.apache.calcite.rex.RexLiteral) rexNode; - return Expression.StrLiteral.builder() - .value(lit.getValueAs(String.class)) - .nullable(false) - .build(); - } - throw new UnsupportedOperationException("Only literals supported in test"); + org.apache.calcite.rex.RexLiteral lit = (org.apache.calcite.rex.RexLiteral) rexNode; + return Expression.StrLiteral.builder() + .value(lit.getValueAs(String.class)) + .nullable(false) + .build(); }; Optional exprA = converterA.convert(concatCall, topLevelConverter); @@ -125,4 +127,61 @@ void testMergeOrderDeterminesFunctionPrecedence() { funcB.declaration().getAnchor().urn(), "converterB should use last concat function (from collection1)"); } + + @Test + void testLtrimMergeOrderWithDefaultExtensions() { + // This test verifies precedence between a custom ltrim (from collection2 with + // extension:com.domain:string) and the default extension catalog's ltrim + // (extension:io.substrait:functions_string). + // The FunctionConverter uses a "last-wins" strategy. + + // Merge default extensions with collection2 - collection2's ltrim should be last + SimpleExtension.ExtensionCollection defaultWithCustom = extensions.merge(collection2); + + // Merge collection2 with default extensions - default ltrim should be last + SimpleExtension.ExtensionCollection customWithDefault = collection2.merge(extensions); + + ScalarFunctionConverter converterA = + new ScalarFunctionConverter(defaultWithCustom.scalarFunctions(), typeFactory); + ScalarFunctionConverter converterB = + new ScalarFunctionConverter(customWithDefault.scalarFunctions(), typeFactory); + + // Create a TRIM(LEADING ' ' FROM 'test') call which uses TrimFunctionMapper to map to ltrim + RexBuilder rexBuilder = new RexBuilder(typeFactory); + RexCall trimCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.TRIM, + rexBuilder.makeFlag(Flag.LEADING), + rexBuilder.makeLiteral(" "), + rexBuilder.makeLiteral("test")); + + java.util.function.Function topLevelConverter = + rexNode -> { + org.apache.calcite.rex.RexLiteral lit = (org.apache.calcite.rex.RexLiteral) rexNode; + Object value = lit.getValue(); + if (value == null) { + return Expression.StrLiteral.builder().value("").nullable(true).build(); + } + // Convert any literal value to string + return Expression.StrLiteral.builder().value(value.toString()).nullable(false).build(); + }; + + Optional exprA = converterA.convert(trimCall, topLevelConverter); + Optional exprB = converterB.convert(trimCall, topLevelConverter); + + Expression.ScalarFunctionInvocation funcA = (Expression.ScalarFunctionInvocation) exprA.get(); + // converterA should use collection2's custom ltrim (last) + assertEquals( + "extension:com.domain:string", + funcA.declaration().getAnchor().urn(), + "converterA should use last ltrim (custom from collection2)"); + + Expression.ScalarFunctionInvocation funcB = (Expression.ScalarFunctionInvocation) exprB.get(); + // converterB should use default extensions' ltrim (last) + assertEquals( + "extension:io.substrait:functions_string", + funcB.declaration().getAnchor().urn(), + "converterB should use last ltrim (from default extensions)"); + } } diff --git a/isthmus/src/test/resources/extensions/functions_duplicate_urn1.yaml b/isthmus/src/test/resources/extensions/functions_duplicate_urn1.yaml index c867441a2..c7b9d98f8 100644 --- a/isthmus/src/test/resources/extensions/functions_duplicate_urn1.yaml +++ b/isthmus/src/test/resources/extensions/functions_duplicate_urn1.yaml @@ -3,15 +3,6 @@ urn: extension:io.substrait:functions_string scalar_functions: - - name: "ltrim" - description: "left trim from standard functions" - impls: - - args: - - name: str - value: string - - name: chars - value: string - return: string - name: "concat" description: "concatenate strings" impls: From db134256839157676002fd9f08cfc94d382ae51b Mon Sep 17 00:00:00 2001 From: Ben Bellick Date: Thu, 23 Oct 2025 09:53:30 -0400 Subject: [PATCH 6/7] fix: address @nielspardon comments on PR #564 This commit: - improves the documentation around the `FunctionConverter.java` class - fixes some minor stylistic comments around the tests --- .../isthmus/expression/FunctionConverter.java | 67 +++++++++++++++++++ .../isthmus/DuplicateFunctionUrnTest.java | 27 +++----- 2 files changed, 78 insertions(+), 16 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java index d2bedc53c..a9f8bf334 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java @@ -46,6 +46,27 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + * Abstract base class for converting between Calcite SqlOperators and Substrait function + * invocations. + * + *

This class handles bidirectional conversion: + * + *

    + *
  • Calcite → Substrait: Subclasses implement {@code convert()} methods to convert + * Calcite calls to Substrait function invocations + *
  • Substrait → Calcite: {@link #getSqlOperatorFromSubstraitFunc} converts Substrait + * function keys to Calcite SqlOperators + *
+ * + *

When multiple functions with the same name and signature are passed into the constructor, a + * last-wins precedence strategy is used for resolution. The last function in the input list + * takes precedence during Calcite to Substrait conversion. + * + * @param the function type (ScalarFunctionVariant, AggregateFunctionVariant, etc.) + * @param the return type for Calcite→Substrait conversion + * @param the call type being converted + */ public abstract class FunctionConverter< F extends SimpleExtension.Function, T, C extends FunctionConverter.GenericCall> { @@ -58,10 +79,32 @@ public abstract class FunctionConverter< protected final Multimap substraitFuncKeyToSqlOperatorMap; + /** + * Creates a FunctionConverter with the given functions. + * + *

If there are multiple functions provided with the same name and signature (e.g., from + * different extension URNs), the last one in the list will be given precedence during Calcite to + * Substrait conversion. + * + * @param functions the list of function variants to register + * @param typeFactory the Calcite type factory + */ public FunctionConverter(List functions, RelDataTypeFactory typeFactory) { this(functions, Collections.EMPTY_LIST, typeFactory, TypeConverter.DEFAULT); } + /** + * Creates a FunctionConverter with the given functions and additional signatures. + * + *

If there are multiple functions provided with the same name and signature (e.g., from + * different extension URNs), the last one in the list will be given precedence during Calcite to + * Substrait conversion. + * + * @param functions the list of function variants to register + * @param additionalSignatures additional Calcite operator signatures to map + * @param typeFactory the Calcite type factory + * @param typeConverter the type converter to use + */ public FunctionConverter( List functions, List additionalSignatures, @@ -113,6 +156,17 @@ public FunctionConverter( this.signatures = matcherMap; } + /** + * Converts a Substrait function to a Calcite SqlOperator (Substrait → Calcite direction). + * + *

Given a Substrait function key (e.g., "concat:str_str") and output type, this method finds + * the corresponding Calcite SqlOperator. When multiple operators match, the output type is used + * to disambiguate. + * + * @param key the Substrait function key (function name with type signature) + * @param outputType the expected output type + * @return the matching SqlOperator, or empty if no match found + */ public Optional getSqlOperatorFromSubstraitFunc(String key, Type outputType) { Map resolver = getTypeBasedResolver(); Collection operators = substraitFuncKeyToSqlOperatorMap.get(key); @@ -343,6 +397,19 @@ private Stream matchKeys(List rexOperands, List opTypes } } + /** + * Converts a Calcite call to a Substrait function invocation (Calcite → Substrait direction). + * + *

This method tries to find a matching Substrait function for the given Calcite call using + * direct signature matching, type coercion, and least-restrictive type resolution. + * + *

If multiple registered function extensions have the same name and signature, the last one + * in the list passed into the constructor will be matched. + * + * @param call the Calcite call to match + * @param topLevelConverter function to convert RexNode operands to Substrait Expressions + * @return the matched Substrait function binding, or empty if no match found + */ public Optional attemptMatch(C call, Function topLevelConverter) { /* diff --git a/isthmus/src/test/java/io/substrait/isthmus/DuplicateFunctionUrnTest.java b/isthmus/src/test/java/io/substrait/isthmus/DuplicateFunctionUrnTest.java index 8c6534160..1d8cca4cc 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/DuplicateFunctionUrnTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/DuplicateFunctionUrnTest.java @@ -1,7 +1,7 @@ package io.substrait.isthmus; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; import io.substrait.expression.Expression; import io.substrait.extension.SimpleExtension; @@ -20,7 +20,7 @@ import org.junit.jupiter.api.Test; /** Tests to reproduce #562 */ -public class DuplicateFunctionUrnTest extends PlanTestBase { +class DuplicateFunctionUrnTest extends PlanTestBase { static final SimpleExtension.ExtensionCollection collection1; static final SimpleExtension.ExtensionCollection collection2; @@ -30,8 +30,9 @@ public class DuplicateFunctionUrnTest extends PlanTestBase { try { String extensions1 = asString("extensions/functions_duplicate_urn1.yaml"); String extensions2 = asString("extensions/functions_duplicate_urn2.yaml"); - collection1 = SimpleExtension.load("urn1://functions", extensions1); - collection2 = SimpleExtension.load("urn2://functions", extensions2); + collection1 = + SimpleExtension.load("urn:extension:io.substrait:functions_string", extensions1); + collection2 = SimpleExtension.load("urn:extension:com.domain:string", extensions2); collection = collection1.merge(collection2); // Verify that the merged collection contains duplicate concat functions with different URNs @@ -58,26 +59,20 @@ public class DuplicateFunctionUrnTest extends PlanTestBase { @Test void testDuplicateFunctionWithDifferentUrns() { - ScalarFunctionConverter converter = - new ScalarFunctionConverter(collection.scalarFunctions(), typeFactory); - - assertNotNull(converter); + assertDoesNotThrow( + () -> new ScalarFunctionConverter(collection.scalarFunctions(), typeFactory)); } @Test void testDuplicateAggregateFunctionWithDifferentUrns() { - AggregateFunctionConverter converter = - new AggregateFunctionConverter(collection.aggregateFunctions(), typeFactory); - - assertNotNull(converter); + assertDoesNotThrow( + () -> new AggregateFunctionConverter(collection.aggregateFunctions(), typeFactory)); } @Test void testDuplicateWindowFunctionWithDifferentUrns() { - WindowFunctionConverter converter = - new WindowFunctionConverter(collection.windowFunctions(), typeFactory); - - assertNotNull(converter); + assertDoesNotThrow( + () -> new WindowFunctionConverter(collection.windowFunctions(), typeFactory)); } @Test From d21ef7a44178c25706add5b16860f03e9cae54b8 Mon Sep 17 00:00:00 2001 From: Ben Bellick Date: Thu, 23 Oct 2025 10:09:34 -0400 Subject: [PATCH 7/7] docs: wrap mentions of SqlOperator in @link --- .../isthmus/expression/FunctionConverter.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java index a9f8bf334..d738ef157 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java @@ -47,7 +47,7 @@ import org.slf4j.LoggerFactory; /** - * Abstract base class for converting between Calcite SqlOperators and Substrait function + * Abstract base class for converting between Calcite {@link SqlOperator}s and Substrait function * invocations. * *

This class handles bidirectional conversion: @@ -56,7 +56,7 @@ *

  • Calcite → Substrait: Subclasses implement {@code convert()} methods to convert * Calcite calls to Substrait function invocations *
  • Substrait → Calcite: {@link #getSqlOperatorFromSubstraitFunc} converts Substrait - * function keys to Calcite SqlOperators + * function keys to Calcite {@link SqlOperator}s * * *

    When multiple functions with the same name and signature are passed into the constructor, a @@ -157,15 +157,15 @@ public FunctionConverter( } /** - * Converts a Substrait function to a Calcite SqlOperator (Substrait → Calcite direction). + * Converts a Substrait function to a Calcite {@link SqlOperator} (Substrait → Calcite direction). * *

    Given a Substrait function key (e.g., "concat:str_str") and output type, this method finds - * the corresponding Calcite SqlOperator. When multiple operators match, the output type is used - * to disambiguate. + * the corresponding Calcite {@link SqlOperator}. When multiple operators match, the output type + * is used to disambiguate. * * @param key the Substrait function key (function name with type signature) * @param outputType the expected output type - * @return the matching SqlOperator, or empty if no match found + * @return the matching {@link SqlOperator}, or empty if no match found */ public Optional getSqlOperatorFromSubstraitFunc(String key, Type outputType) { Map resolver = getTypeBasedResolver();