From 1a5de905c69780309c4b1f3491ad8aa84cf5ec1d Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Tue, 3 Sep 2019 22:05:31 +0800 Subject: [PATCH 01/10] add builtinRepeatSig --- expression/builtin_string_vec.go | 65 ++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 expression/builtin_string_vec.go diff --git a/expression/builtin_string_vec.go b/expression/builtin_string_vec.go new file mode 100644 index 0000000000000..d6150fdf56282 --- /dev/null +++ b/expression/builtin_string_vec.go @@ -0,0 +1,65 @@ +package expression + +import ( + "math" + "strings" + + "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/util/chunk" +) + +func (b *builtinRepeatSig) vecEvalString(input *chunk.Chunk, result *chunk.Column) error { + n := input.NumRows() + buf, err := b.get(types.ETString, n) + if err != nil { + return err + } + defer b.put(buf) + if err := b.args[0].VecEvalString(b.ctx, input, result); err != nil { + return err + } + + buf2, err := b.get(types.ETInt, n) + if err != nil { + return err + } + defer b.put(buf2) + if err := b.args[1].VecEvalInt(b.ctx, input, result); err != nil { + return err + } + + result.ReserveString(n) + nums := buf2.Int64s() + for i := 0; i < n; i ++ { + if buf.IsNull(i) || buf2.IsNull(i) { + result.AppendNull() + continue + } + num := nums[i] + if num < 1 { + result.AppendString("") + continue + } + if num > math.MaxInt32 { + num = math.MaxInt32 + } + + str := buf.GetString(i) + byteLength := len(str) + if uint64(byteLength)*uint64(num) > b.maxAllowedPacket { + b.ctx.GetSessionVars().StmtCtx.AppendWarning(errWarnAllowedPacketOverflowed.GenWithStackByArgs("repeat", b.maxAllowedPacket)) + result.AppendNull() + continue + } + if int64(byteLength) > int64(b.tp.Flen)/num { + result.AppendNull() + continue + } + result.AppendString(strings.Repeat(str, int(num))) + } + return nil +} + +func (b *builtinRepeatSig) vectorized() bool { + return true +} From 8d1d9d51b39d9a72b23417455d7fb07dd37615e0 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Tue, 3 Sep 2019 22:22:17 +0800 Subject: [PATCH 02/10] add test --- expression/bench_test.go | 65 +++++++++++++++++++++++++++----- expression/builtin_string_vec.go | 4 +- 2 files changed, 58 insertions(+), 11 deletions(-) diff --git a/expression/bench_test.go b/expression/bench_test.go index c8bd623f3b6e4..b11174d8fc28d 100644 --- a/expression/bench_test.go +++ b/expression/bench_test.go @@ -195,18 +195,57 @@ func BenchmarkScalarFunctionClone(b *testing.B) { b.ReportAllocs() } +// gener is used to generate data for test. +type gener interface { + gen() interface{} +} + +type rangeInt64Gener struct { + begin int + end int +} + +func (rig *rangeInt64Gener) gen() interface{} { + return int64(rand.Intn(rig.end-rig.begin) + rig.begin) +} + +type randLenStrGener struct { + lenBegin int + lenEnd int +} + +func (g *randLenStrGener) gen() interface{} { + n := rand.Intn(g.lenEnd-g.lenBegin) + g.lenBegin + buf := make([]byte, n) + for i := range buf { + x := rand.Intn(62) + if x < 10 { + buf[i] = byte('0' + x) + } else if x-10 < 26 { + buf[i] = byte('a' + x - 10) + } else { + buf[i] = byte('A' + x - 10 - 26) + } + } + return string(buf) +} + type vecExprBenchCase struct { retEvalType types.EvalType childrenTypes []types.EvalType + geners []gener // used to generate data for children } var vecExprBenchCases = map[string][]vecExprBenchCase{ ast.Cast: { - {types.ETInt, []types.EvalType{types.ETInt}}, + {types.ETInt, []types.EvalType{types.ETInt}, nil}, + }, + ast.Repeat: { + {types.ETString, []types.EvalType{types.ETString, types.ETInt}, []gener{&randLenStrGener{10, 50}, &rangeInt64Gener{10, 50}}}, }, } -func fillColumn(eType types.EvalType, chk *chunk.Chunk, colIdx int) { +func fillColumn(eType types.EvalType, chk *chunk.Chunk, colIdx int, testCase vecExprBenchCase) { nullRatio := 0.2 batchSize := 1024 switch eType { @@ -215,10 +254,14 @@ func fillColumn(eType types.EvalType, chk *chunk.Chunk, colIdx int) { if rand.Float64() < nullRatio { chk.AppendNull(colIdx) } else { - if rand.Float64() < 0.5 { - chk.AppendInt64(colIdx, -rand.Int63()) + if len(testCase.geners) > colIdx && testCase.geners[colIdx] != nil { + chk.AppendInt64(colIdx, testCase.geners[colIdx].gen().(int64)) } else { - chk.AppendInt64(colIdx, rand.Int63()) + if rand.Float64() < 0.5 { + chk.AppendInt64(colIdx, -rand.Int63()) + } else { + chk.AppendInt64(colIdx, rand.Int63()) + } } } } @@ -283,7 +326,11 @@ func fillColumn(eType types.EvalType, chk *chunk.Chunk, colIdx int) { if rand.Float64() < nullRatio { chk.AppendNull(colIdx) } else { - chk.AppendString(colIdx, fmt.Sprintf("%v", rand.Int())) + if len(testCase.geners) > colIdx && testCase.geners[colIdx] != nil { + chk.AppendString(colIdx, testCase.geners[colIdx].gen().(string)) + } else { + chk.AppendString(colIdx, fmt.Sprintf("%v", rand.Int())) + } } } default: @@ -320,7 +367,7 @@ func genVecExprBenchCase(ctx sessionctx.Context, funcName string, testCase vecEx cols := make([]Expression, len(testCase.childrenTypes)) input = chunk.New(fts, 1024, 1024) for i, eType := range testCase.childrenTypes { - fillColumn(eType, input, i) + fillColumn(eType, input, i, testCase) cols[i] = &Column{Index: i, RetType: fts[i]} } @@ -420,7 +467,7 @@ func genVecBuiltinFuncBenchCase(ctx sessionctx.Context, funcName string, testCas cols := make([]Expression, childrenNumber) input = chunk.New(fts, 1024, 1024) for i, eType := range testCase.childrenTypes { - fillColumn(eType, input, i) + fillColumn(eType, input, i, testCase) cols[i] = &Column{Index: i, RetType: fts[i]} } @@ -544,7 +591,7 @@ func (s *testEvaluatorSuite) TestVectorizedBuiltinFunc(c *C) { err := baseFunc.vecEvalString(input, output) c.Assert(err, IsNil) for row := it.Begin(); row != it.End(); row = it.Next() { - val, isNull, err := baseFunc.evalDuration(row) + val, isNull, err := baseFunc.evalString(row) c.Assert(err, IsNil) c.Assert(isNull, Equals, output.IsNull(i)) if !isNull { diff --git a/expression/builtin_string_vec.go b/expression/builtin_string_vec.go index d6150fdf56282..806547c340afb 100644 --- a/expression/builtin_string_vec.go +++ b/expression/builtin_string_vec.go @@ -15,7 +15,7 @@ func (b *builtinRepeatSig) vecEvalString(input *chunk.Chunk, result *chunk.Colum return err } defer b.put(buf) - if err := b.args[0].VecEvalString(b.ctx, input, result); err != nil { + if err := b.args[0].VecEvalString(b.ctx, input, buf); err != nil { return err } @@ -24,7 +24,7 @@ func (b *builtinRepeatSig) vecEvalString(input *chunk.Chunk, result *chunk.Colum return err } defer b.put(buf2) - if err := b.args[1].VecEvalInt(b.ctx, input, result); err != nil { + if err := b.args[1].VecEvalInt(b.ctx, input, buf2); err != nil { return err } From 66b87a02ab239bc401092cb404d56c36a821b024 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Tue, 3 Sep 2019 22:24:05 +0800 Subject: [PATCH 03/10] update UT --- expression/bench_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/expression/bench_test.go b/expression/bench_test.go index b11174d8fc28d..27c629d0aba6f 100644 --- a/expression/bench_test.go +++ b/expression/bench_test.go @@ -241,7 +241,7 @@ var vecExprBenchCases = map[string][]vecExprBenchCase{ {types.ETInt, []types.EvalType{types.ETInt}, nil}, }, ast.Repeat: { - {types.ETString, []types.EvalType{types.ETString, types.ETInt}, []gener{&randLenStrGener{10, 50}, &rangeInt64Gener{10, 50}}}, + {types.ETString, []types.EvalType{types.ETString, types.ETInt}, []gener{&randLenStrGener{10, 20}, &rangeInt64Gener{-10, 10}}}, }, } From 5c857a23d642c6cad20d1e57bfc6f3c06c5b8c4f Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Tue, 3 Sep 2019 22:26:44 +0800 Subject: [PATCH 04/10] refmt --- expression/builtin_string_vec.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/expression/builtin_string_vec.go b/expression/builtin_string_vec.go index 806547c340afb..83856f86daed8 100644 --- a/expression/builtin_string_vec.go +++ b/expression/builtin_string_vec.go @@ -30,7 +30,7 @@ func (b *builtinRepeatSig) vecEvalString(input *chunk.Chunk, result *chunk.Colum result.ReserveString(n) nums := buf2.Int64s() - for i := 0; i < n; i ++ { + for i := 0; i < n; i++ { if buf.IsNull(i) || buf2.IsNull(i) { result.AppendNull() continue From 3246378379c0e2360f4f42f9f7ce684303e57c61 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Wed, 4 Sep 2019 14:25:39 +0800 Subject: [PATCH 05/10] add more comments --- expression/bench_test.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/expression/bench_test.go b/expression/bench_test.go index 27c629d0aba6f..3406482667cc3 100644 --- a/expression/bench_test.go +++ b/expression/bench_test.go @@ -195,11 +195,12 @@ func BenchmarkScalarFunctionClone(b *testing.B) { b.ReportAllocs() } -// gener is used to generate data for test. -type gener interface { +// dataGenerator is used to generate data for test. +type dataGenerator interface { gen() interface{} } +// rangeInt64Gener is used to generate int64 items in [begin, end). type rangeInt64Gener struct { begin int end int @@ -209,6 +210,7 @@ func (rig *rangeInt64Gener) gen() interface{} { return int64(rand.Intn(rig.end-rig.begin) + rig.begin) } +// randLenStrGener is used to generate strings whose lengths are in [lenBegin, lenEnd). type randLenStrGener struct { lenBegin int lenEnd int @@ -233,7 +235,11 @@ func (g *randLenStrGener) gen() interface{} { type vecExprBenchCase struct { retEvalType types.EvalType childrenTypes []types.EvalType - geners []gener // used to generate data for children + // geners are used to generate data for children and geners[i] generates data for children[i]. + // if geners[i] is nil, the default dataGenerator will be used for its corresponding child. + // the geners slice can be shorter than the children slice, if it has 3 children, then + // geners[gen1, gen2] will be regarded as geners[gen1, gen2, nil]. + geners []dataGenerator } var vecExprBenchCases = map[string][]vecExprBenchCase{ @@ -241,7 +247,7 @@ var vecExprBenchCases = map[string][]vecExprBenchCase{ {types.ETInt, []types.EvalType{types.ETInt}, nil}, }, ast.Repeat: { - {types.ETString, []types.EvalType{types.ETString, types.ETInt}, []gener{&randLenStrGener{10, 20}, &rangeInt64Gener{-10, 10}}}, + {types.ETString, []types.EvalType{types.ETString, types.ETInt}, []dataGenerator{&randLenStrGener{10, 20}, &rangeInt64Gener{-10, 10}}}, }, } From a261af5ef65ef6e7a7554697f9e5ca2760cc97d2 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Wed, 4 Sep 2019 14:26:56 +0800 Subject: [PATCH 06/10] fixup --- expression/bench_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/expression/bench_test.go b/expression/bench_test.go index 7da6c9ad9716b..7ecb5ea567ce1 100644 --- a/expression/bench_test.go +++ b/expression/bench_test.go @@ -250,7 +250,7 @@ var vecExprBenchCases = map[string][]vecExprBenchCase{ {types.ETString, []types.EvalType{types.ETString, types.ETInt}, []dataGenerator{&randLenStrGener{10, 20}, &rangeInt64Gener{-10, 10}}}, }, ast.Log10: { - {types.ETReal, []types.EvalType{types.ETReal}}, + {types.ETReal, []types.EvalType{types.ETReal}, nil}, }, } From 3011ae8c34f8e7045839bd5e9de04f3b6a1b7059 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Wed, 4 Sep 2019 15:54:55 +0800 Subject: [PATCH 07/10] address comments --- expression/bench_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/expression/bench_test.go b/expression/bench_test.go index 7ecb5ea567ce1..27feb6fc3ab46 100644 --- a/expression/bench_test.go +++ b/expression/bench_test.go @@ -236,8 +236,8 @@ type vecExprBenchCase struct { retEvalType types.EvalType childrenTypes []types.EvalType // geners are used to generate data for children and geners[i] generates data for children[i]. - // if geners[i] is nil, the default dataGenerator will be used for its corresponding child. - // the geners slice can be shorter than the children slice, if it has 3 children, then + // If geners[i] is nil, the default dataGenerator will be used for its corresponding child. + // The geners slice can be shorter than the children slice, if it has 3 children, then // geners[gen1, gen2] will be regarded as geners[gen1, gen2, nil]. geners []dataGenerator } From b8d51a272373c12986ecbbdbd4d1be70a75199bb Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Wed, 4 Sep 2019 16:58:48 +0800 Subject: [PATCH 08/10] address comments --- expression/builtin_string_vec.go | 1 + 1 file changed, 1 insertion(+) diff --git a/expression/builtin_string_vec.go b/expression/builtin_string_vec.go index 83856f86daed8..f8728c2d9b571 100644 --- a/expression/builtin_string_vec.go +++ b/expression/builtin_string_vec.go @@ -31,6 +31,7 @@ func (b *builtinRepeatSig) vecEvalString(input *chunk.Chunk, result *chunk.Colum result.ReserveString(n) nums := buf2.Int64s() for i := 0; i < n; i++ { + // TODO: introduce vectorized null-bitmap to speed it up. if buf.IsNull(i) || buf2.IsNull(i) { result.AppendNull() continue From a775c704d147e4f3f39214e8fe223dbaca944961 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Wed, 4 Sep 2019 17:23:47 +0800 Subject: [PATCH 09/10] address comments --- expression/builtin.go | 18 ++++++++---------- expression/builtin_string_vec.go | 8 ++++---- expression/builtin_vectorized_test.go | 12 ++++++------ 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/expression/builtin.go b/expression/builtin.go index 66d3ef3473eff..5afedf43b3697 100644 --- a/expression/builtin.go +++ b/expression/builtin.go @@ -34,11 +34,11 @@ import ( // baseBuiltinFunc will be contained in every struct that implement builtinFunc interface. type baseBuiltinFunc struct { - columnBufferAllocator - args []Expression - ctx sessionctx.Context - tp *types.FieldType - pbCode tipb.ScalarFuncSig + bufAllocator columnBufferAllocator + args []Expression + ctx sessionctx.Context + tp *types.FieldType + pbCode tipb.ScalarFuncSig childrenVectorizedOnce *sync.Once childrenVectorized bool @@ -66,7 +66,7 @@ func newBaseBuiltinFunc(ctx sessionctx.Context, args []Expression) baseBuiltinFu panic("ctx should not be nil") } return baseBuiltinFunc{ - columnBufferAllocator: newLocalSliceBuffer(len(args)), + bufAllocator: newLocalSliceBuffer(len(args)), childrenVectorizedOnce: new(sync.Once), args: args, @@ -171,7 +171,7 @@ func newBaseBuiltinFuncWithTp(ctx sessionctx.Context, args []Expression, retType fieldType.Charset, fieldType.Collate = charset.GetDefaultCharsetAndCollate() } return baseBuiltinFunc{ - columnBufferAllocator: newLocalSliceBuffer(len(args)), + bufAllocator: newLocalSliceBuffer(len(args)), childrenVectorizedOnce: new(sync.Once), args: args, @@ -297,7 +297,7 @@ func (b *baseBuiltinFunc) cloneFrom(from *baseBuiltinFunc) { b.ctx = from.ctx b.tp = from.tp b.pbCode = from.pbCode - b.columnBufferAllocator = newLocalSliceBuffer(len(b.args)) + b.bufAllocator = newLocalSliceBuffer(len(b.args)) b.childrenVectorizedOnce = new(sync.Once) } @@ -338,8 +338,6 @@ func newBaseBuiltinCastFunc(builtinFunc baseBuiltinFunc, inUnion bool) baseBuilt // vecBuiltinFunc contains all vectorized methods for a builtin function. type vecBuiltinFunc interface { - columnBufferAllocator - // vectorized returns if this builtin function itself supports vectorized evaluation. vectorized() bool diff --git a/expression/builtin_string_vec.go b/expression/builtin_string_vec.go index f8728c2d9b571..48af261d755b7 100644 --- a/expression/builtin_string_vec.go +++ b/expression/builtin_string_vec.go @@ -10,20 +10,20 @@ import ( func (b *builtinRepeatSig) vecEvalString(input *chunk.Chunk, result *chunk.Column) error { n := input.NumRows() - buf, err := b.get(types.ETString, n) + buf, err := b.bufAllocator.get(types.ETString, n) if err != nil { return err } - defer b.put(buf) + defer b.bufAllocator.put(buf) if err := b.args[0].VecEvalString(b.ctx, input, buf); err != nil { return err } - buf2, err := b.get(types.ETInt, n) + buf2, err := b.bufAllocator.get(types.ETInt, n) if err != nil { return err } - defer b.put(buf2) + defer b.bufAllocator.put(buf2) if err := b.args[1].VecEvalInt(b.ctx, input, buf2); err != nil { return err } diff --git a/expression/builtin_vectorized_test.go b/expression/builtin_vectorized_test.go index fd6a08cbc3ec5..480dadc530556 100644 --- a/expression/builtin_vectorized_test.go +++ b/expression/builtin_vectorized_test.go @@ -39,7 +39,7 @@ type mockVecPlusIntBuiltinFunc struct { func (p *mockVecPlusIntBuiltinFunc) allocBuf(n int) (*chunk.Column, error) { if p.enableAlloc { - return p.get(types.ETInt, n) + return p.bufAllocator.get(types.ETInt, n) } if p.buf == nil { p.buf = chunk.NewColumn(types.NewFieldType(mysql.TypeLonglong), n) @@ -49,7 +49,7 @@ func (p *mockVecPlusIntBuiltinFunc) allocBuf(n int) (*chunk.Column, error) { func (p *mockVecPlusIntBuiltinFunc) releaseBuf(buf *chunk.Column) { if p.enableAlloc { - p.put(buf) + p.bufAllocator.put(buf) } } @@ -207,7 +207,7 @@ func (p *mockBuiltinDouble) vecEvalReal(input *chunk.Chunk, result *chunk.Column func (p *mockBuiltinDouble) vecEvalString(input *chunk.Chunk, result *chunk.Column) error { var buf *chunk.Column var err error - if buf, err = p.baseBuiltinFunc.get(p.evalType, input.NumRows()); err != nil { + if buf, err = p.baseBuiltinFunc.bufAllocator.get(p.evalType, input.NumRows()); err != nil { return err } if err := p.args[0].VecEvalString(p.ctx, input, buf); err != nil { @@ -218,7 +218,7 @@ func (p *mockBuiltinDouble) vecEvalString(input *chunk.Chunk, result *chunk.Colu str := buf.GetString(i) result.AppendString(str + str) } - p.baseBuiltinFunc.put(buf) + p.baseBuiltinFunc.bufAllocator.put(buf) return nil } @@ -268,7 +268,7 @@ func (p *mockBuiltinDouble) vecEvalDuration(input *chunk.Chunk, result *chunk.Co func (p *mockBuiltinDouble) vecEvalJSON(input *chunk.Chunk, result *chunk.Column) error { var buf *chunk.Column var err error - if buf, err = p.baseBuiltinFunc.get(p.evalType, input.NumRows()); err != nil { + if buf, err = p.baseBuiltinFunc.bufAllocator.get(p.evalType, input.NumRows()); err != nil { return err } if err := p.args[0].VecEvalJSON(p.ctx, input, buf); err != nil { @@ -290,7 +290,7 @@ func (p *mockBuiltinDouble) vecEvalJSON(input *chunk.Chunk, result *chunk.Column } result.AppendJSON(j) } - p.baseBuiltinFunc.put(buf) + p.baseBuiltinFunc.bufAllocator.put(buf) return nil } From ae77b05d9f1ef849c413f366fe8a2c145626a597 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Wed, 4 Sep 2019 17:24:25 +0800 Subject: [PATCH 10/10] add more comments --- expression/builtin_string_vec.go | 1 + 1 file changed, 1 insertion(+) diff --git a/expression/builtin_string_vec.go b/expression/builtin_string_vec.go index 48af261d755b7..69fd90f441422 100644 --- a/expression/builtin_string_vec.go +++ b/expression/builtin_string_vec.go @@ -42,6 +42,7 @@ func (b *builtinRepeatSig) vecEvalString(input *chunk.Chunk, result *chunk.Colum continue } if num > math.MaxInt32 { + // to avoid overflow when calculating uint64(byteLength)*uint64(num) later num = math.MaxInt32 }