diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java b/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java index 382ef325ff3..bd57d0a8a68 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java @@ -29,11 +29,13 @@ package org.opensearch.sql.ast.expression; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_BOOLEAN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_BYTE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_DATE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_DOUBLE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_FLOAT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_INT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_LONG; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_SHORT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_STRING; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_TIME; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_TIMESTAMP; @@ -49,6 +51,7 @@ import lombok.ToString; import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.Node; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.function.FunctionName; /** @@ -60,9 +63,11 @@ @ToString public class Cast extends UnresolvedExpression { - private static Map CONVERTED_TYPE_FUNCTION_NAME_MAP = + private static final Map CONVERTED_TYPE_FUNCTION_NAME_MAP = new ImmutableMap.Builder() .put("string", CAST_TO_STRING.getName()) + .put("byte", CAST_TO_BYTE.getName()) + .put("short", CAST_TO_SHORT.getName()) .put("int", CAST_TO_INT.getName()) .put("integer", CAST_TO_INT.getName()) .put("long", CAST_TO_LONG.getName()) @@ -84,6 +89,25 @@ public class Cast extends UnresolvedExpression { */ private final UnresolvedExpression convertedType; + /** + * Check if the given function name is a cast function or not. + * @param name function name + * @return true if cast function, otherwise false. + */ + public static boolean isCastFunction(FunctionName name) { + return CONVERTED_TYPE_FUNCTION_NAME_MAP.containsValue(name); + } + + /** + * Get the cast function name for a given target data type. + * @param targetType target data type + * @return cast function name corresponding + */ + public static FunctionName getCastFunctionName(ExprType targetType) { + String type = targetType.typeName().toLowerCase(Locale.ROOT); + return CONVERTED_TYPE_FUNCTION_NAME_MAP.get(type); + } + /** * Get the converted type. * diff --git a/core/src/main/java/org/opensearch/sql/data/type/ExprCoreType.java b/core/src/main/java/org/opensearch/sql/data/type/ExprCoreType.java index 3b37cfbf31a..92da09490cd 100644 --- a/core/src/main/java/org/opensearch/sql/data/type/ExprCoreType.java +++ b/core/src/main/java/org/opensearch/sql/data/type/ExprCoreType.java @@ -28,12 +28,13 @@ package org.opensearch.sql.data.type; -import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; /** @@ -62,16 +63,15 @@ public enum ExprCoreType implements ExprType { FLOAT(LONG), DOUBLE(FLOAT), - /** - * Boolean. - */ - BOOLEAN(UNDEFINED), - /** * String. */ STRING(UNDEFINED), + /** + * Boolean. + */ + BOOLEAN(STRING), /** * Date. @@ -108,6 +108,16 @@ public enum ExprCoreType implements ExprType { .put(STRING, "keyword") .build(); + private static final Set NUMBER_TYPES = + new ImmutableSet.Builder() + .add(BYTE) + .add(SHORT) + .add(INTEGER) + .add(LONG) + .add(FLOAT) + .add(DOUBLE) + .build(); + ExprCoreType(ExprCoreType... compatibleTypes) { for (ExprCoreType subType : compatibleTypes) { subType.parents.add(this); @@ -139,7 +149,7 @@ public static List coreTypes() { .collect(Collectors.toList()); } - public static List numberTypes() { - return ImmutableList.of(INTEGER, LONG, FLOAT, DOUBLE); + public static Set numberTypes() { + return NUMBER_TYPES; } } diff --git a/core/src/main/java/org/opensearch/sql/data/type/ExprType.java b/core/src/main/java/org/opensearch/sql/data/type/ExprType.java index a26f758b201..97c46ca4e59 100644 --- a/core/src/main/java/org/opensearch/sql/data/type/ExprType.java +++ b/core/src/main/java/org/opensearch/sql/data/type/ExprType.java @@ -58,6 +58,16 @@ default boolean isCompatible(ExprType other) { } } + /** + * Should cast this type to other type or not. By default, cast is always required + * if the given type is different from this type. + * @param other other data type + * @return true if cast is required, otherwise false + */ + default boolean shouldCast(ExprType other) { + return !this.equals(other); + } + /** * Get the parent type. */ diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index 560414592cd..42a49db2eef 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -592,6 +592,16 @@ public FunctionExpression castString(Expression value) { .compile(BuiltinFunctionName.CAST_TO_STRING.getName(), Arrays.asList(value)); } + public FunctionExpression castByte(Expression value) { + return (FunctionExpression) repository + .compile(BuiltinFunctionName.CAST_TO_BYTE.getName(), Arrays.asList(value)); + } + + public FunctionExpression castShort(Expression value) { + return (FunctionExpression) repository + .compile(BuiltinFunctionName.CAST_TO_SHORT.getName(), Arrays.asList(value)); + } + public FunctionExpression castInt(Expression value) { return (FunctionExpression) repository .compile(BuiltinFunctionName.CAST_TO_INT.getName(), Arrays.asList(value)); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 24e65d4b5d5..0f6feeb94ac 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -177,6 +177,8 @@ public enum BuiltinFunctionName { * Data Type Convert Function. */ CAST_TO_STRING(FunctionName.of("cast_to_string")), + CAST_TO_BYTE(FunctionName.of("cast_to_byte")), + CAST_TO_SHORT(FunctionName.of("cast_to_short")), CAST_TO_INT(FunctionName.of("cast_to_int")), CAST_TO_LONG(FunctionName.of("cast_to_long")), CAST_TO_FLOAT(FunctionName.of("cast_to_float")), diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java index ebb432d7f03..3898af66828 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java @@ -11,10 +11,19 @@ package org.opensearch.sql.expression.function; +import static org.opensearch.sql.ast.expression.Cast.getCastFunctionName; +import static org.opensearch.sql.ast.expression.Cast.isCastFunction; + +import com.google.common.collect.ImmutableList; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; +import org.apache.commons.lang3.tuple.Pair; +import org.opensearch.sql.common.utils.StringUtils; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.expression.Expression; @@ -47,15 +56,70 @@ public FunctionImplementation compile(FunctionName functionName, List resolvedSignature = + functionResolverMap.get(functionName).resolve(functionSignature); + + List sourceTypes = functionSignature.getParamTypeList(); + List targetTypes = resolvedSignature.getKey().getParamTypeList(); + FunctionBuilder funcBuilder = resolvedSignature.getValue(); + if (isCastFunction(functionName) || sourceTypes.equals(targetTypes)) { + return funcBuilder; + } + return castArguments(sourceTypes, targetTypes, funcBuilder); } else { throw new ExpressionEvaluationException( String.format("unsupported function name: %s", functionName.getFunctionName())); } } + + /** + * Wrap resolved function builder's arguments by cast function to cast input expression value + * to value of target type at runtime. For example, suppose unresolved signature is + * equal(BOOL,STRING) and its resolved function builder is F with signature equal(BOOL,BOOL). + * In this case, wrap F and return equal(BOOL, cast_to_bool(STRING)). + */ + private FunctionBuilder castArguments(List sourceTypes, + List targetTypes, + FunctionBuilder funcBuilder) { + return arguments -> { + List argsCasted = new ArrayList<>(); + for (int i = 0; i < arguments.size(); i++) { + Expression arg = arguments.get(i); + ExprType sourceType = sourceTypes.get(i); + ExprType targetType = targetTypes.get(i); + + if (isCastRequired(sourceType, targetType)) { + argsCasted.add(cast(arg, targetType)); + } else { + argsCasted.add(arg); + } + } + return funcBuilder.apply(argsCasted); + }; + } + + private boolean isCastRequired(ExprType sourceType, ExprType targetType) { + // TODO: Remove this special case after fixing all failed UTs + if (ExprCoreType.numberTypes().contains(sourceType) + && ExprCoreType.numberTypes().contains(targetType)) { + return false; + } + return sourceType.shouldCast(targetType); + } + + private Expression cast(Expression arg, ExprType targetType) { + FunctionName castFunctionName = getCastFunctionName(targetType); + if (castFunctionName == null) { + throw new ExpressionEvaluationException(StringUtils.format( + "Type conversion to type %s is not supported", targetType)); + } + return (Expression) compile(castFunctionName, ImmutableList.of(arg)); + } + } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/FunctionResolver.java b/core/src/main/java/org/opensearch/sql/expression/function/FunctionResolver.java index d9d01be891b..5bd63015a58 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/FunctionResolver.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/FunctionResolver.java @@ -20,6 +20,7 @@ import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.Singular; +import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.exception.ExpressionEvaluationException; /** @@ -41,8 +42,10 @@ public class FunctionResolver { * If the {@link FunctionBuilder} exactly match the input {@link FunctionSignature}, return it. * If applying the widening rule, found the most match one, return it. * If nothing found, throw {@link ExpressionEvaluationException} + * + * @return function signature and its builder */ - public FunctionBuilder resolve(FunctionSignature unresolvedSignature) { + public Pair resolve(FunctionSignature unresolvedSignature) { PriorityQueue> functionMatchQueue = new PriorityQueue<>( Map.Entry.comparingByKey()); @@ -59,7 +62,8 @@ public FunctionBuilder resolve(FunctionSignature unresolvedSignature) { unresolvedSignature.formatTypes() )); } else { - return functionBundle.get(bestMatchEntry.getValue()); + FunctionSignature resolvedSignature = bestMatchEntry.getValue(); + return Pair.of(resolvedSignature, functionBundle.get(resolvedSignature)); } } diff --git a/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperator.java b/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperator.java index df6b6f935f7..5f94eb63ee8 100644 --- a/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperator.java +++ b/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperator.java @@ -48,11 +48,13 @@ import java.util.stream.Stream; import lombok.experimental.UtilityClass; import org.opensearch.sql.data.model.ExprBooleanValue; +import org.opensearch.sql.data.model.ExprByteValue; import org.opensearch.sql.data.model.ExprDateValue; import org.opensearch.sql.data.model.ExprDoubleValue; import org.opensearch.sql.data.model.ExprFloatValue; import org.opensearch.sql.data.model.ExprIntegerValue; import org.opensearch.sql.data.model.ExprLongValue; +import org.opensearch.sql.data.model.ExprShortValue; import org.opensearch.sql.data.model.ExprStringValue; import org.opensearch.sql.data.model.ExprTimeValue; import org.opensearch.sql.data.model.ExprTimestampValue; @@ -68,6 +70,8 @@ public class TypeCastOperator { */ public static void register(BuiltinFunctionRepository repository) { repository.register(castToString()); + repository.register(castToByte()); + repository.register(castToShort()); repository.register(castToInt()); repository.register(castToLong()); repository.register(castToFloat()); @@ -92,6 +96,28 @@ private static FunctionResolver castToString() { ); } + private static FunctionResolver castToByte() { + return FunctionDSL.define(BuiltinFunctionName.CAST_TO_BYTE.getName(), + impl(nullMissingHandling( + (v) -> new ExprByteValue(Short.valueOf(v.stringValue()))), BYTE, STRING), + impl(nullMissingHandling( + (v) -> new ExprByteValue(v.shortValue())), BYTE, DOUBLE), + impl(nullMissingHandling( + (v) -> new ExprByteValue(v.booleanValue() ? 1 : 0)), BYTE, BOOLEAN) + ); + } + + private static FunctionResolver castToShort() { + return FunctionDSL.define(BuiltinFunctionName.CAST_TO_SHORT.getName(), + impl(nullMissingHandling( + (v) -> new ExprShortValue(Short.valueOf(v.stringValue()))), SHORT, STRING), + impl(nullMissingHandling( + (v) -> new ExprShortValue(v.shortValue())), SHORT, DOUBLE), + impl(nullMissingHandling( + (v) -> new ExprShortValue(v.booleanValue() ? 1 : 0)), SHORT, BOOLEAN) + ); + } + private static FunctionResolver castToInt() { return FunctionDSL.define(BuiltinFunctionName.CAST_TO_INT.getName(), impl(nullMissingHandling( diff --git a/core/src/test/java/org/opensearch/sql/data/type/ExprTypeTest.java b/core/src/test/java/org/opensearch/sql/data/type/ExprTypeTest.java index 0dc8b8f4cf5..9beb11eb07a 100644 --- a/core/src/test/java/org/opensearch/sql/data/type/ExprTypeTest.java +++ b/core/src/test/java/org/opensearch/sql/data/type/ExprTypeTest.java @@ -33,6 +33,7 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; +import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.FLOAT; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; @@ -58,6 +59,11 @@ public void isCompatible() { assertTrue(FLOAT.isCompatible(LONG)); assertTrue(FLOAT.isCompatible(INTEGER)); assertTrue(FLOAT.isCompatible(SHORT)); + assertTrue(BOOLEAN.isCompatible(STRING)); + } + + @Test + public void isNotCompatible() { assertFalse(INTEGER.isCompatible(DOUBLE)); assertFalse(STRING.isCompatible(DOUBLE)); assertFalse(INTEGER.isCompatible(UNKNOWN)); @@ -69,6 +75,13 @@ public void isCompatibleWithUndefined() { ExprCoreType.coreTypes().forEach(type -> assertFalse(UNDEFINED.isCompatible(type))); } + @Test + public void shouldCast() { + assertTrue(UNDEFINED.shouldCast(STRING)); + assertTrue(STRING.shouldCast(BOOLEAN)); + assertFalse(STRING.shouldCast(STRING)); + } + @Test public void getParent() { assertThat(((ExprType) () -> "test").getParent(), Matchers.contains(UNKNOWN)); diff --git a/core/src/test/java/org/opensearch/sql/expression/function/BuiltinFunctionRepositoryTest.java b/core/src/test/java/org/opensearch/sql/expression/function/BuiltinFunctionRepositoryTest.java index 6f8b3600ea9..d6b372a12a1 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/BuiltinFunctionRepositoryTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/BuiltinFunctionRepositoryTest.java @@ -29,20 +29,39 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.lenient; +import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; +import static org.opensearch.sql.data.type.ExprCoreType.BYTE; +import static org.opensearch.sql.data.type.ExprCoreType.DATETIME; +import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; +import static org.opensearch.sql.data.type.ExprCoreType.UNDEFINED; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_BOOLEAN; +import com.google.common.collect.ImmutableList; import java.util.Arrays; +import java.util.List; import java.util.Map; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.FunctionExpression; import org.opensearch.sql.expression.env.Environment; @ExtendWith(MockitoExtension.class) @@ -62,6 +81,13 @@ class BuiltinFunctionRepositoryTest { @Mock private Environment emptyEnv; + private BuiltinFunctionRepository repo; + + @BeforeEach + void setUp() { + repo = new BuiltinFunctionRepository(mockMap); + } + @Test void register() { BuiltinFunctionRepository repo = new BuiltinFunctionRepository(mockMap); @@ -73,8 +99,11 @@ void register() { @Test void compile() { + when(mockExpression.type()).thenReturn(UNDEFINED); + when(functionSignature.getParamTypeList()).thenReturn(Arrays.asList(UNDEFINED)); when(mockfunctionResolver.getFunctionName()).thenReturn(mockFunctionName); - when(mockfunctionResolver.resolve(any())).thenReturn(functionExpressionBuilder); + when(mockfunctionResolver.resolve(any())).thenReturn( + Pair.of(functionSignature, functionExpressionBuilder)); when(mockMap.containsKey(any())).thenReturn(true); when(mockMap.get(any())).thenReturn(mockfunctionResolver); BuiltinFunctionRepository repo = new BuiltinFunctionRepository(mockMap); @@ -89,7 +118,8 @@ void compile() { void resolve() { when(functionSignature.getFunctionName()).thenReturn(mockFunctionName); when(mockfunctionResolver.getFunctionName()).thenReturn(mockFunctionName); - when(mockfunctionResolver.resolve(functionSignature)).thenReturn(functionExpressionBuilder); + when(mockfunctionResolver.resolve(functionSignature)).thenReturn( + Pair.of(functionSignature, functionExpressionBuilder)); when(mockMap.containsKey(mockFunctionName)).thenReturn(true); when(mockMap.get(mockFunctionName)).thenReturn(mockfunctionResolver); BuiltinFunctionRepository repo = new BuiltinFunctionRepository(mockMap); @@ -98,6 +128,60 @@ void resolve() { assertEquals(functionExpressionBuilder, repo.resolve(functionSignature)); } + @Test + void resolve_should_not_cast_arguments_in_cast_function() { + when(mockExpression.toString()).thenReturn("string"); + FunctionImplementation function = + repo.resolve(registerFunctionResolver(CAST_TO_BOOLEAN.getName(), DATETIME, BOOLEAN)) + .apply(ImmutableList.of(mockExpression)); + assertEquals("cast_to_boolean(string)", function.toString()); + } + + @Test + void resolve_should_not_cast_arguments_if_same_type() { + when(mockFunctionName.getFunctionName()).thenReturn("mock"); + when(mockExpression.toString()).thenReturn("string"); + FunctionImplementation function = + repo.resolve(registerFunctionResolver(mockFunctionName, STRING, STRING)) + .apply(ImmutableList.of(mockExpression)); + assertEquals("mock(string)", function.toString()); + } + + @Test + void resolve_should_not_cast_arguments_if_both_numbers() { + when(mockFunctionName.getFunctionName()).thenReturn("mock"); + when(mockExpression.toString()).thenReturn("byte"); + FunctionImplementation function = + repo.resolve(registerFunctionResolver(mockFunctionName, BYTE, INTEGER)) + .apply(ImmutableList.of(mockExpression)); + assertEquals("mock(byte)", function.toString()); + } + + @Test + void resolve_should_cast_arguments() { + when(mockFunctionName.getFunctionName()).thenReturn("mock"); + when(mockExpression.toString()).thenReturn("string"); + when(mockExpression.type()).thenReturn(STRING); + + FunctionSignature signature = + registerFunctionResolver(mockFunctionName, STRING, BOOLEAN); + registerFunctionResolver(CAST_TO_BOOLEAN.getName(), STRING, STRING); + + FunctionImplementation function = + repo.resolve(signature) + .apply(ImmutableList.of(mockExpression)); + assertEquals("mock(cast_to_boolean(string))", function.toString()); + } + + @Test + void resolve_should_throw_exception_for_unsupported_conversion() { + ExpressionEvaluationException error = + assertThrows(ExpressionEvaluationException.class, () -> + repo.resolve(registerFunctionResolver(mockFunctionName, BYTE, STRUCT)) + .apply(ImmutableList.of(mockExpression))); + assertEquals(error.getMessage(), "Type conversion to type STRUCT is not supported"); + } + @Test @DisplayName("resolve unregistered function should throw exception") void resolve_unregistered() { @@ -109,4 +193,52 @@ void resolve_unregistered() { () -> repo.resolve(new FunctionSignature(FunctionName.of("unknown"), Arrays.asList()))); assertEquals("unsupported function name: unknown", exception.getMessage()); } + + private FunctionSignature registerFunctionResolver(FunctionName funcName, + ExprType sourceType, + ExprType targetType) { + FunctionSignature unresolvedSignature = new FunctionSignature( + funcName, ImmutableList.of(sourceType)); + FunctionSignature resolvedSignature = new FunctionSignature( + funcName, ImmutableList.of(targetType)); + + FunctionResolver funcResolver = mock(FunctionResolver.class); + FunctionBuilder funcBuilder = mock(FunctionBuilder.class); + + when(mockMap.containsKey(eq(funcName))).thenReturn(true); + when(mockMap.get(eq(funcName))).thenReturn(funcResolver); + when(funcResolver.resolve(eq(unresolvedSignature))).thenReturn( + Pair.of(resolvedSignature, funcBuilder)); + repo.register(funcResolver); + + // Relax unnecessary stubbing check because error case test doesn't call this + lenient().doAnswer(invocation -> + new FakeFunctionExpression(funcName, invocation.getArgument(0)) + ).when(funcBuilder).apply(any()); + return unresolvedSignature; + } + + private static class FakeFunctionExpression extends FunctionExpression { + + public FakeFunctionExpression(FunctionName functionName, List arguments) { + super(functionName, arguments); + } + + @Override + public ExprValue valueOf(Environment valueEnv) { + return null; + } + + @Override + public ExprType type() { + return null; + } + + @Override + public String toString() { + return getFunctionName().getFunctionName() + + "(" + StringUtils.join(getArguments(), ", ") + ")"; + } + } + } diff --git a/core/src/test/java/org/opensearch/sql/expression/function/FunctionResolverTest.java b/core/src/test/java/org/opensearch/sql/expression/function/FunctionResolverTest.java index 1cd1e3756b1..6887837b356 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/FunctionResolverTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/FunctionResolverTest.java @@ -70,7 +70,7 @@ void resolve_function_signature_exactly_match() { FunctionResolver resolver = new FunctionResolver(functionName, ImmutableMap.of(exactlyMatchFS, exactlyMatchBuilder)); - assertEquals(exactlyMatchBuilder, resolver.resolve(functionSignature)); + assertEquals(exactlyMatchBuilder, resolver.resolve(functionSignature).getValue()); } @Test @@ -80,7 +80,7 @@ void resolve_function_signature_best_match() { FunctionResolver resolver = new FunctionResolver(functionName, ImmutableMap.of(bestMatchFS, bestMatchBuilder, leastMatchFS, leastMatchBuilder)); - assertEquals(bestMatchBuilder, resolver.resolve(functionSignature)); + assertEquals(bestMatchBuilder, resolver.resolve(functionSignature).getValue()); } @Test diff --git a/core/src/test/java/org/opensearch/sql/expression/function/WideningTypeRuleTest.java b/core/src/test/java/org/opensearch/sql/expression/function/WideningTypeRuleTest.java index dc57a13694d..9e678c8091b 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/WideningTypeRuleTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/WideningTypeRuleTest.java @@ -28,12 +28,14 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; import static org.opensearch.sql.data.type.ExprCoreType.BYTE; import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.FLOAT; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.LONG; import static org.opensearch.sql.data.type.ExprCoreType.SHORT; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.data.type.ExprCoreType.UNDEFINED; import static org.opensearch.sql.data.type.WideningTypeRule.IMPOSSIBLE_WIDENING; import static org.opensearch.sql.data.type.WideningTypeRule.TYPE_EQUAL; @@ -70,6 +72,7 @@ class WideningTypeRuleTest { .put(LONG, FLOAT, 1) .put(LONG, DOUBLE, 2) .put(FLOAT, DOUBLE, 1) + .put(STRING, BOOLEAN, 1) .put(UNDEFINED, BYTE, 1) .put(UNDEFINED, SHORT, 2) .put(UNDEFINED, INTEGER, 3) diff --git a/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java b/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java index ffccf9a62e7..cc2acf57102 100644 --- a/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java @@ -31,11 +31,13 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; +import static org.opensearch.sql.data.type.ExprCoreType.BYTE; import static org.opensearch.sql.data.type.ExprCoreType.DATE; import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.FLOAT; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.LONG; +import static org.opensearch.sql.data.type.ExprCoreType.SHORT; import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.data.type.ExprCoreType.TIME; import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; @@ -103,6 +105,22 @@ void castToString(ExprValue value) { assertEquals(new ExprStringValue(value.value().toString()), expression.valueOf(null)); } + @ParameterizedTest(name = "castToByte({0})") + @MethodSource({"numberData"}) + void castToByte(ExprValue value) { + FunctionExpression expression = dsl.castByte(DSL.literal(value)); + assertEquals(BYTE, expression.type()); + assertEquals(new ExprByteValue(value.byteValue()), expression.valueOf(null)); + } + + @ParameterizedTest(name = "castToShort({0})") + @MethodSource({"numberData"}) + void castToShort(ExprValue value) { + FunctionExpression expression = dsl.castShort(DSL.literal(value)); + assertEquals(SHORT, expression.type()); + assertEquals(new ExprShortValue(value.shortValue()), expression.valueOf(null)); + } + @ParameterizedTest(name = "castToInt({0})") @MethodSource({"numberData"}) void castToInt(ExprValue value) { @@ -111,6 +129,20 @@ void castToInt(ExprValue value) { assertEquals(new ExprIntegerValue(value.integerValue()), expression.valueOf(null)); } + @Test + void castStringToByte() { + FunctionExpression expression = dsl.castByte(DSL.literal("100")); + assertEquals(BYTE, expression.type()); + assertEquals(new ExprByteValue(100), expression.valueOf(null)); + } + + @Test + void castStringToShort() { + FunctionExpression expression = dsl.castShort(DSL.literal("100")); + assertEquals(SHORT, expression.type()); + assertEquals(new ExprShortValue(100), expression.valueOf(null)); + } + @Test void castStringToInt() { FunctionExpression expression = dsl.castInt(DSL.literal("100")); @@ -124,6 +156,28 @@ void castStringToIntException() { assertThrows(RuntimeException.class, () -> expression.valueOf(null)); } + @Test + void castBooleanToByte() { + FunctionExpression expression = dsl.castByte(DSL.literal(true)); + assertEquals(BYTE, expression.type()); + assertEquals(new ExprByteValue(1), expression.valueOf(null)); + + expression = dsl.castByte(DSL.literal(false)); + assertEquals(BYTE, expression.type()); + assertEquals(new ExprByteValue(0), expression.valueOf(null)); + } + + @Test + void castBooleanToShort() { + FunctionExpression expression = dsl.castShort(DSL.literal(true)); + assertEquals(SHORT, expression.type()); + assertEquals(new ExprShortValue(1), expression.valueOf(null)); + + expression = dsl.castShort(DSL.literal(false)); + assertEquals(SHORT, expression.type()); + assertEquals(new ExprShortValue(0), expression.valueOf(null)); + } + @Test void castBooleanToInt() { FunctionExpression expression = dsl.castInt(DSL.literal(true)); diff --git a/docs/dev/TypeConversion.md b/docs/dev/TypeConversion.md new file mode 100644 index 00000000000..07697fed073 --- /dev/null +++ b/docs/dev/TypeConversion.md @@ -0,0 +1,172 @@ +# Data Type Conversion in SQL/PPL + +## 1.Overview + +### 1.1 Type Conversion + +Type conversion means conversion from one data type to another which has two aspects to consider: + +1. Whether the conversion is implicit or explicit (implicit conversion is often called coercion) +2. Whether the data is converted within the family or reinterpreted as another data type outside + +It’s common that strong typed language only supports little implicit conversions and no data reinterpretation. While languages with weak typing allows many implicit conversions and flexible reinterpretation. + +### 1.2 Problem Statement + +Currently, there are only 2 implicit conversions allowed which are defined by type hierarchy tree: + +1. Numeric type coercion: narrower numeric types are closer to the root on the top. For example, an integer is converted to a long integer automatically similar as in JAVA. +2. NULL literals: `UNDEFINED` type can be converted to any other so that NULL literal can be accepted by any expression at runtime. + +![Current type hierarchy](img/type-hierarchy-tree-old.png) + +However, more general conversions for non-numeric types are missing, such as conversions between string, bool and date types. The strict type check causes inconvenience and other problems discussed below. + + +--- +## 2.Requirements + +### 2.1 Use Cases + +The common use case and motivation include: + +1. *User-friendly*: Although it doesn’t matter for application or BI tool which can always follow the strict grammar rule, it’s more friendly and accessible to human by implicit type conversion, ex. `date > DATE('2020-06-01') => date > '2020-06-01'` +2. *Schema-on-read*: More importantly, implicit conversion from string is required for schema on read (stored as raw string on write and extract field(s) on read), ex. `regex ‘...’ | abs(a)` + +### 2.2 Functionalities + +Immediate: + +1. Implicit conversion between bool and string: https://github.com/opendistro-for-elasticsearch/sql/issues/1061 +2. Implicit conversion between date and string: https://github.com/opendistro-for-elasticsearch/sql/issues/1056 + +Future: + +1. Implicit conversion between string and more other types for regex command support + + +--- +## 3.Design + +### 3.1 Type Precedence + +Type precedence determines the direction of conversion when fields involved in an expression has different type from resolved signature. Before introducing it into our type system, let’s check how an expression is resolved to a function implementation and why type precedence is required. + +``` +Compiling time: + Expression: 1 = 1.0 + Unresolved signature: equal(INT, DOUBLE) + Resovled signature: equal(DOUBLE, DOUBLE) , distance=1 + Function builder: returns equal(DOUBLE, DOUBLE) impl +``` + +Now let’s follow the same idea to add support for conversion from `BOOLEAN` to `STRING`. Because all boolean values can be converted to a string (in other word string is “wider”), String type is made the parent of Boolean. However, this leads to wrong semantic as the following expression `false = ‘FALSE’` for example: + +``` +Compiling time: + Expression: false = 'FALSE' + Unresolved signature: equal(BOOL, STRING) + Resovled signature: equal(STRING, STRING) + Function builder: returns equal(STRING, STRING) impl + +Runtime: + Function impl: String.value(false).equals('FALSE') + Evaluation result: *false* +``` + +Therefore type precedence is supposed to be defined based on semantic expected rather than intuitive “width” of type. Now let’s reverse the direction and make Boolean the parent of String type. + +![New type hierarchy](img/type-hierarchy-tree-with-implicit-cast.png) + +``` +Compiling time: + Expression: false = 'FALSE' + Unresolved signature: equal(BOOL, STRING) + Resovled signature: equal(BOOL, BOOL) + Function builder: 1) returns equal(BOOL, cast_to_bool(STRING)) impl + 2) returns equal(BOOL, BOOL) impl +Runtime: + equal impl: false.equals(cast_to_bool('FALSE')) + cast_to_bool impl: Boolean.valueOf('FALSE') + Evaluation result: *true* +``` + +### 3.2 General Rules + +1. Implicit conversion is defined by type precedence which is represented by the type hierarchy tree. +2. Explicit conversion defines the complete set of conversion allowed. If no explicit conversion defined, implicit conversion should be impossible too. +3. On the other hand, if implicit conversion can occur between 2 types, then explicit conversion should be allowed too. +4. Conversion within a data type family is considered as conversion between different data representation and should be supported as much as possible. +5. Conversion across 2 data type families is considered as data reinterpretation and should be enabled with strong motivation. + +--- +## 4.Implementation + +### 4.1 Explicit Conversion + +Explicit conversion is defined as the set of `CAST` function implementation which includes all the conversions allowed between data types. Same as before, missing cast function is added and implemented by the conversion logic in `ExprType` class. + +```java +public class Cast extends UnresolvedExpression { + + private static final Map CONVERTED_TYPE_FUNCTION_NAME_MAP = + new ImmutableMap.Builder() + .put("string", CAST_TO_STRING.getName()) + .put("byte", CAST_TO_BYTE.getName()) + .put("short", CAST_TO_SHORT.getName()) + .put("int", CAST_TO_INT.getName()) + .put("integer", CAST_TO_INT.getName()) + .put("long", CAST_TO_LONG.getName()) + .put("float", CAST_TO_FLOAT.getName()) + .put("double", CAST_TO_DOUBLE.getName()) + .put("boolean", CAST_TO_BOOLEAN.getName()) + .put("date", CAST_TO_DATE.getName()) + .put("time", CAST_TO_TIME.getName()) + .put("timestamp", CAST_TO_TIMESTAMP.getName()) + .build(); +} +``` + +### 4.2 Implicit Conversion + +Implicit conversion and precedence are defined by the type hierarchy tree. The data type at the head of an arrow has higher precedence than the type at the tail. + +```java +public enum ExprCoreType implements ExprType { + UNKNOWN, + UNDEFINED, + + /** + * Numbers. + */ + BYTE(UNDEFINED), + SHORT(BYTE), + INTEGER(SHORT), + LONG(INTEGER), + FLOAT(LONG), + DOUBLE(FLOAT), + + STRING(UNDEFINED), + BOOLEAN(STRING), // PR: change STRING's parent to BOOLEAN + + /** + * Date. + */ + TIMESTAMP(UNDEFINED), + DATE(UNDEFINED), + TIME(UNDEFINED), + DATETIME(UNDEFINED), + INTERVAL(UNDEFINED), + + STRUCT(UNDEFINED), + ARRAY(UNDEFINED); +} +``` + +### 4.3 Type Casting Logic + +As with examples in section 3.1, the implementation is: + +1. Define all possible conversions in CAST function family. +2. Define implicit conversions by type hierarchy tree (auto implicit cast from child to parent) +3. During compile time, wrap original function builder by a new one which cast arguments to target type. diff --git a/docs/dev/img/type-hierarchy-tree-old.png b/docs/dev/img/type-hierarchy-tree-old.png new file mode 100644 index 00000000000..7add83f2867 Binary files /dev/null and b/docs/dev/img/type-hierarchy-tree-old.png differ diff --git a/docs/dev/img/type-hierarchy-tree-with-implicit-cast.png b/docs/dev/img/type-hierarchy-tree-with-implicit-cast.png new file mode 100644 index 00000000000..b729e23ab9b Binary files /dev/null and b/docs/dev/img/type-hierarchy-tree-with-implicit-cast.png differ diff --git a/docs/user/general/datatypes.rst b/docs/user/general/datatypes.rst index 0077422a745..fe440728316 100644 --- a/docs/user/general/datatypes.rst +++ b/docs/user/general/datatypes.rst @@ -1,4 +1,3 @@ - ========== Data Types ========== @@ -105,6 +104,105 @@ The table below list the mapping between OpenSearch Data Type, OpenSearch SQL Da Notes: Not all the OpenSearch SQL Type has correspond OpenSearch Type. e.g. data and time. To use function which required such data type, user should explicitly convert the data type. +Data Type Conversion +==================== + +A data type can be converted to another, implicitly or explicitly or impossibly, according to type precedence defined and whether the conversion is supported by query engine. + +The general rules and design tenets for data type conversion include: + +1. Implicit conversion is defined by type precedence which is represented by the type hierarchy tree. See `Data Type Conversion in SQL/PPL `_ for more details. +2. Explicit conversion defines the complete set of conversion allowed. If no explicit conversion defined, implicit conversion should be impossible too. +3. On the other hand, if implicit conversion can occur between 2 types, then explicit conversion should be allowed too. +4. Conversion within a data type family is considered as conversion between different data representation and should be supported as much as possible. +5. Conversion across two data type families is considered as data reinterpretation and should be enabled with strong motivation. + +Type Conversion Matrix +---------------------- + +The following matrix illustrates the conversions allowed by our query engine for all the built-in data types as well as types provided by OpenSearch storage engine. + ++--------------+------------------------------------------------+---------+------------------------------+-----------------------------------------------+--------------------------+---------------------+ +| Data Types | Numeric Type Family | BOOLEAN | String Type Family | Datetime Type Family | OpenSearch Type Family | Complex Type Family | +| +------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| | BYTE | SHORT | INTEGER | LONG | FLOAT | DOUBLE | BOOLEAN | TEXT_KEYWORD | TEXT | STRING | TIMESTAMP | DATE | TIME | DATETIME | INTERVAL | GEO_POINT | IP | BINARY | STRUCT | ARRAY | ++==============+======+=======+=========+======+=======+========+=========+==============+======+========+===========+======+======+==========+==========+===========+=====+========+===========+=========+ +| UNDEFINED | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| BYTE | N/A | IE | IE | IE | IE | IE | X | X | X | E | X | X | X | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| SHORT | E | N/A | IE | IE | IE | IE | X | X | X | E | X | X | X | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| INTEGER | E | E | N/A | IE | IE | IE | X | X | X | E | X | X | X | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| LONG | E | E | E | N/A | IE | IE | X | X | X | E | X | X | X | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| FLOAT | E | E | E | E | N/A | IE | X | X | X | E | X | X | X | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| DOUBLE | E | E | E | E | E | N/A | X | X | X | E | X | X | X | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| BOOLEAN | E | E | E | E | E | E | N/A | X | X | E | X | X | X | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| TEXT_KEYWORD | | | | | | | | N/A | | IE | | | | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| TEXT | | | | | | | | | N/A | IE | | | | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| STRING | E | E | E | E | E | E | IE | X | X | N/A | E | E | E | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| TIMESTAMP | X | X | X | X | X | X | X | X | X | E | N/A | | | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| DATE | X | X | X | X | X | X | X | X | X | E | | N/A | | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| TIME | X | X | X | X | X | X | X | X | X | E | | | N/A | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| DATETIME | X | X | X | X | X | X | X | X | X | E | | | | N/A | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| INTERVAL | X | X | X | X | X | X | X | X | X | E | | | | X | N/A | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| GEO_POINT | X | X | X | X | X | X | X | X | X | | X | X | X | X | X | N/A | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| IP | X | X | X | X | X | X | X | X | X | | X | X | X | X | X | X | N/A | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| BINARY | X | X | X | X | X | X | X | X | X | | X | X | X | X | X | X | X | N/A | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| STRUCT | X | X | X | X | X | X | X | X | X | | X | X | X | X | X | X | X | X | N/A | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| ARRAY | X | X | X | X | X | X | X | X | X | | X | X | X | X | X | X | X | X | X | N/A | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ + +Note that: + +1. ``I`` means if implicit conversion will occur automatically. ``E`` stands for explicit conversion by ``CAST`` function. ``X`` for impossible to convert. Empty means not clear and need more test. +2. There is no ``UNDEFINED`` column because it's only for ``NULL`` literal at runtime and should not be present in function signature definition. +3. OpenSearch and complex types are not supported by ``CAST`` function, so it's impossible to convert a type to it for now. + +Examples +-------- + +Here are a few examples for implicit type conversion:: + + os> SELECT + ... 1 = 1.0, + ... 'True' = true; + fetched rows / total rows = 1/1 + +-----------+-----------------+ + | 1 = 1.0 | 'True' = true | + |-----------+-----------------| + | True | True | + +-----------+-----------------+ + +Here are a few examples for explicit type conversion:: + + os> SELECT + ... CAST(true AS INT), + ... CAST(1.2 AS STRING), + ... CAST('2021-06-10 00:00:00' AS TIMESTAMP); + fetched rows / total rows = 1/1 + +---------------------+-----------------------+--------------------------------------------+ + | CAST(true AS INT) | CAST(1.2 AS STRING) | CAST('2021-06-10 00:00:00' AS TIMESTAMP) | + |---------------------+-----------------------+--------------------------------------------| + | 1 | 1.2 | 2021-06-10 00:00:00 | + +---------------------+-----------------------+--------------------------------------------+ Undefined Data Type =================== @@ -248,7 +346,17 @@ A string is a sequence of characters enclosed in either single or double quotes. +-----------+-----------+-------------+-------------+ +Boolean Data Types +================== +A boolean can be represented by constant value ``TRUE`` or ``FALSE``. Besides, certain string representation is also accepted by function with boolean input. For example, string 'true', 'TRUE', 'false', 'FALSE' are all valid representation and can be converted to boolean implicitly or explicitly:: - - + os> SELECT + ... true, FALSE, + ... CAST('TRUE' AS boolean), CAST('false' AS boolean); + fetched rows / total rows = 1/1 + +--------+---------+---------------------------+----------------------------+ + | true | FALSE | CAST('TRUE' AS boolean) | CAST('false' AS boolean) | + |--------+---------+---------------------------+----------------------------| + | True | False | True | False | + +--------+---------+---------------------------+----------------------------+ diff --git a/integ-test/src/test/resources/correctness/expressions/cast.txt b/integ-test/src/test/resources/correctness/expressions/cast.txt index 4018a73f093..3556e0b7952 100644 --- a/integ-test/src/test/resources/correctness/expressions/cast.txt +++ b/integ-test/src/test/resources/correctness/expressions/cast.txt @@ -17,3 +17,7 @@ cast('01:01:01' as time) as castTime cast('true' as boolean) as castBool cast(1 as boolean) as castBool cast(cast(1 as string) as int) castCombine +false = 'False' as implicitCast +false = 'true' as implicitCast +'TRUE' = true as implicitCast +'false' = true as implicitCast diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java index 60d7f8c6844..2d402186885 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java @@ -45,16 +45,27 @@ @RequiredArgsConstructor public enum OpenSearchDataType implements ExprType { /** - * OpenSearch Text. + * OpenSearch Text. Rather than cast text to other types (STRING), leave it alone to prevent + * cast_to_string(OPENSEARCH_TEXT). * Ref: https://www.elastic.co/guide/en/elasticsearch/reference/current/text.html */ - OPENSEARCH_TEXT(Collections.singletonList(STRING), "string"), + OPENSEARCH_TEXT(Collections.singletonList(STRING), "string") { + @Override + public boolean shouldCast(ExprType other) { + return false; + } + }, /** * OpenSearch multi-fields which has text and keyword. * Ref: https://www.elastic.co/guide/en/elasticsearch/reference/current/multi-fields.html */ - OPENSEARCH_TEXT_KEYWORD(Arrays.asList(STRING, OPENSEARCH_TEXT), "string"), + OPENSEARCH_TEXT_KEYWORD(Arrays.asList(STRING, OPENSEARCH_TEXT), "string") { + @Override + public boolean shouldCast(ExprType other) { + return false; + } + }, OPENSEARCH_IP(Arrays.asList(UNKNOWN), "ip"), diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeTest.java index 825200ce009..49af26eb460 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeTest.java @@ -58,4 +58,10 @@ public void legacyTypeName() { assertEquals("text", OPENSEARCH_TEXT.legacyTypeName()); assertEquals("text", OPENSEARCH_TEXT_KEYWORD.legacyTypeName()); } + + @Test + public void testShouldCast() { + assertFalse(OPENSEARCH_TEXT.shouldCast(STRING)); + assertFalse(OPENSEARCH_TEXT_KEYWORD.shouldCast(STRING)); + } }