diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 251ac6cb8c0e2..fd97f9af1328a 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -1048,11 +1048,23 @@ pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Option { + use arrow::datatypes::DataType::*; + match (lhs_type, rhs_type) { + (DataType::Null, Utf8View | Utf8 | LargeUtf8) => Some(rhs_type.clone()), + (Utf8View | Utf8 | LargeUtf8, DataType::Null) => Some(lhs_type.clone()), + (DataType::Null, DataType::Null) => Some(Utf8), + _ => None, + } +} + /// coercion rules for regular expression comparison operations. /// This is a union of string coercion rules and dictionary coercion rules pub fn regex_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { string_coercion(lhs_type, rhs_type) .or_else(|| dictionary_coercion(lhs_type, rhs_type, false)) + .or_else(|| regex_null_coercion(lhs_type, rhs_type)) } /// Checks if the TimeUnit associated with a Time32 or Time64 type is consistent, diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 347a5d82dbecd..06f54481a6faf 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -2498,6 +2498,111 @@ mod tests { Ok(()) } + #[test] + fn regex_with_nulls() -> Result<()> { + let schema = Schema::new(vec![ + Field::new("a", DataType::Utf8, true), + Field::new("b", DataType::Utf8, true), + ]); + let a = Arc::new(StringArray::from(vec![ + Some("abc"), + None, + Some("abc"), + None, + Some("abc"), + ])) as ArrayRef; + let b = Arc::new(StringArray::from(vec![ + Some("^a"), + Some("^A"), + None, + None, + Some("^(b|c)"), + ])) as ArrayRef; + + let regex_expected = + BooleanArray::from(vec![Some(true), None, None, None, Some(false)]); + let regex_not_expected = + BooleanArray::from(vec![Some(false), None, None, None, Some(true)]); + apply_logic_op( + &Arc::new(schema.clone()), + &a, + &b, + Operator::RegexMatch, + regex_expected.clone(), + )?; + apply_logic_op( + &Arc::new(schema.clone()), + &a, + &b, + Operator::RegexIMatch, + regex_expected.clone(), + )?; + apply_logic_op( + &Arc::new(schema.clone()), + &a, + &b, + Operator::RegexNotMatch, + regex_not_expected.clone(), + )?; + apply_logic_op( + &Arc::new(schema), + &a, + &b, + Operator::RegexNotIMatch, + regex_not_expected.clone(), + )?; + + let schema = Schema::new(vec![ + Field::new("a", DataType::LargeUtf8, true), + Field::new("b", DataType::LargeUtf8, true), + ]); + let a = Arc::new(LargeStringArray::from(vec![ + Some("abc"), + None, + Some("abc"), + None, + Some("abc"), + ])) as ArrayRef; + let b = Arc::new(LargeStringArray::from(vec![ + Some("^a"), + Some("^A"), + None, + None, + Some("^(b|c)"), + ])) as ArrayRef; + + apply_logic_op( + &Arc::new(schema.clone()), + &a, + &b, + Operator::RegexMatch, + regex_expected.clone(), + )?; + apply_logic_op( + &Arc::new(schema.clone()), + &a, + &b, + Operator::RegexIMatch, + regex_expected.clone(), + )?; + apply_logic_op( + &Arc::new(schema.clone()), + &a, + &b, + Operator::RegexNotMatch, + regex_not_expected.clone(), + )?; + apply_logic_op( + &Arc::new(schema), + &a, + &b, + Operator::RegexNotIMatch, + regex_not_expected.clone(), + )?; + + Ok(()) + } + #[test] fn or_with_nulls_op() -> Result<()> { let schema = Schema::new(vec![ diff --git a/datafusion/sqllogictest/test_files/regexp.slt b/datafusion/sqllogictest/test_files/regexp.slt index 149ad7f6fdcd2..22322d79ccfe8 100644 --- a/datafusion/sqllogictest/test_files/regexp.slt +++ b/datafusion/sqllogictest/test_files/regexp.slt @@ -230,6 +230,66 @@ SELECT regexp_match('aaa-555', '.*-(\d*)'); ---- [555] +query B +select 'abc' ~ null; +---- +NULL + +query B +select null ~ null; +---- +NULL + +query B +select null ~ 'abc'; +---- +NULL + +query B +select 'abc' ~* null; +---- +NULL + +query B +select null ~* null; +---- +NULL + +query B +select null ~* 'abc'; +---- +NULL + +query B +select 'abc' !~ null; +---- +NULL + +query B +select null !~ null; +---- +NULL + +query B +select null !~ 'abc'; +---- +NULL + +query B +select 'abc' !~* null; +---- +NULL + +query B +select null !~* null; +---- +NULL + +query B +select null !~* 'abc'; +---- +NULL + # # regexp_replace tests #