diff --git a/go/arrow/array/binarybuilder.go b/go/arrow/array/binarybuilder.go index 401587e0e70..2ebd3d49a76 100644 --- a/go/arrow/array/binarybuilder.go +++ b/go/arrow/array/binarybuilder.go @@ -19,6 +19,7 @@ package array import ( "bytes" "encoding/base64" + "encoding/hex" "fmt" "math" "reflect" @@ -319,7 +320,10 @@ func (b *BinaryBuilder) UnmarshalOne(dec *json.Decoder) error { case string: data, err := base64.StdEncoding.DecodeString(v) if err != nil { - return err + data, err = hex.DecodeString(v) + if err != nil { + return err + } } b.Append(data) case []byte: diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go index 75b9993b69b..255b2106cc2 100644 --- a/go/arrow/array/compare.go +++ b/go/arrow/array/compare.go @@ -19,6 +19,7 @@ package array import ( "fmt" "math" + "strings" "github.com/apache/arrow/go/v12/arrow" "github.com/apache/arrow/go/v12/arrow/float16" @@ -494,13 +495,13 @@ func arrayApproxEqual(left, right arrow.Array, opt equalOption) bool { return arrayEqualBinary(l, r) case *String: r := right.(*String) - return arrayEqualString(l, r) + return arrayApproxEqualString(l, r) case *LargeBinary: r := right.(*LargeBinary) return arrayEqualLargeBinary(l, r) case *LargeString: r := right.(*LargeString) - return arrayEqualLargeString(l, r) + return arrayApproxEqualLargeString(l, r) case *Int8: r := right.(*Int8) return arrayEqualInt8(l, r) @@ -630,6 +631,34 @@ func validityBitmapEqual(left, right arrow.Array) bool { return true } +func stripNulls(s string) string { + return strings.ReplaceAll(s, "\x00", "") +} + +func arrayApproxEqualString(left, right *String) bool { + for i := 0; i < left.Len(); i++ { + if left.IsNull(i) { + continue + } + if stripNulls(left.Value(i)) != stripNulls(right.Value(i)) { + return false + } + } + return true +} + +func arrayApproxEqualLargeString(left, right *LargeString) bool { + for i := 0; i < left.Len(); i++ { + if left.IsNull(i) { + continue + } + if stripNulls(left.Value(i)) != stripNulls(right.Value(i)) { + return false + } + } + return true +} + func arrayApproxEqualFloat16(left, right *Float16, opt equalOption) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { diff --git a/go/arrow/array/compare_test.go b/go/arrow/array/compare_test.go index 32cbe54e025..04db88076dc 100644 --- a/go/arrow/array/compare_test.go +++ b/go/arrow/array/compare_test.go @@ -110,6 +110,48 @@ func TestArrayApproxEqual(t *testing.T) { } } +func TestArrayApproxEqualStrings(t *testing.T) { + for _, tc := range []struct { + name string + a1 interface{} + a2 interface{} + want bool + }{ + { + name: "string", + a1: []string{"a", "b"}, + a2: []string{"a", "b"}, + want: true, + }, + { + name: "string", + a1: []string{"a", "b\x00"}, + a2: []string{"a", "b"}, + want: true, + }, + { + name: "string", + a1: []string{"a", "b\x00"}, + a2: []string{"a\x00", "b"}, + want: true, + }, + } { + t.Run(tc.name, func(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + a1 := arrayOf(mem, tc.a1, nil) + defer a1.Release() + a2 := arrayOf(mem, tc.a2, nil) + defer a2.Release() + + if got, want := array.ApproxEqual(a1, a2), tc.want; got != want { + t.Fatalf("invalid comparison: got=%v, want=%v\na1: %v\na2: %v\n", got, want, a1, a2) + } + }) + } +} + func TestArrayApproxEqualFloats(t *testing.T) { f16sFrom := func(vs []float64) []float16.Num { o := make([]float16.Num, len(vs)) @@ -328,7 +370,12 @@ func arrayOf(mem memory.Allocator, a interface{}, valids []bool) arrow.Array { bldr.AppendValues(a, valids) return bldr.NewFloat64Array() + case []string: + bldr := array.NewStringBuilder(mem) + defer bldr.Release() + bldr.AppendValues(a, valids) + return bldr.NewStringArray() default: panic(fmt.Errorf("arrdata: invalid data slice type %T", a)) }