Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 63 additions & 10 deletions expression/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,21 +195,66 @@ func BenchmarkScalarFunctionClone(b *testing.B) {
b.ReportAllocs()
}

// dataGenerator is used to generate data for test.
type dataGenerator interface {
Comment thread
SunRunAway marked this conversation as resolved.
gen() interface{}
}

// rangeInt64Gener is used to generate int64 items in [begin, end).
type rangeInt64Gener struct {
Comment thread
qw4990 marked this conversation as resolved.
begin int
end int
}

func (rig *rangeInt64Gener) gen() interface{} {
return int64(rand.Intn(rig.end-rig.begin) + rig.begin)
}

// randLenStrGener is used to generate strings whose lengths are in [lenBegin, lenEnd).
type randLenStrGener struct {
Comment thread
qw4990 marked this conversation as resolved.
lenBegin int
lenEnd int
}

func (g *randLenStrGener) gen() interface{} {
n := rand.Intn(g.lenEnd-g.lenBegin) + g.lenBegin
buf := make([]byte, n)
for i := range buf {
x := rand.Intn(62)
if x < 10 {
buf[i] = byte('0' + x)
} else if x-10 < 26 {
buf[i] = byte('a' + x - 10)
} else {
buf[i] = byte('A' + x - 10 - 26)
}
}
return string(buf)
}

type vecExprBenchCase struct {
retEvalType types.EvalType
childrenTypes []types.EvalType
// geners are used to generate data for children and geners[i] generates data for children[i].
// If geners[i] is nil, the default dataGenerator will be used for its corresponding child.
// The geners slice can be shorter than the children slice, if it has 3 children, then
// geners[gen1, gen2] will be regarded as geners[gen1, gen2, nil].
geners []dataGenerator
}

var vecExprBenchCases = map[string][]vecExprBenchCase{
ast.Cast: {
{types.ETInt, []types.EvalType{types.ETInt}},
{types.ETInt, []types.EvalType{types.ETInt}, nil},
},
ast.Repeat: {
{types.ETString, []types.EvalType{types.ETString, types.ETInt}, []dataGenerator{&randLenStrGener{10, 20}, &rangeInt64Gener{-10, 10}}},
},
ast.Log10: {
{types.ETReal, []types.EvalType{types.ETReal}},
{types.ETReal, []types.EvalType{types.ETReal}, nil},
},
}

func fillColumn(eType types.EvalType, chk *chunk.Chunk, colIdx int) {
func fillColumn(eType types.EvalType, chk *chunk.Chunk, colIdx int, testCase vecExprBenchCase) {
nullRatio := 0.2
batchSize := 1024
switch eType {
Expand All @@ -218,10 +263,14 @@ func fillColumn(eType types.EvalType, chk *chunk.Chunk, colIdx int) {
if rand.Float64() < nullRatio {
chk.AppendNull(colIdx)
} else {
if rand.Float64() < 0.5 {
chk.AppendInt64(colIdx, -rand.Int63())
if len(testCase.geners) > colIdx && testCase.geners[colIdx] != nil {
Comment thread
zz-jason marked this conversation as resolved.
chk.AppendInt64(colIdx, testCase.geners[colIdx].gen().(int64))
} else {
chk.AppendInt64(colIdx, rand.Int63())
if rand.Float64() < 0.5 {
chk.AppendInt64(colIdx, -rand.Int63())
} else {
chk.AppendInt64(colIdx, rand.Int63())
}
}
}
}
Expand Down Expand Up @@ -286,7 +335,11 @@ func fillColumn(eType types.EvalType, chk *chunk.Chunk, colIdx int) {
if rand.Float64() < nullRatio {
chk.AppendNull(colIdx)
} else {
chk.AppendString(colIdx, fmt.Sprintf("%v", rand.Int()))
if len(testCase.geners) > colIdx && testCase.geners[colIdx] != nil {
chk.AppendString(colIdx, testCase.geners[colIdx].gen().(string))
} else {
chk.AppendString(colIdx, fmt.Sprintf("%v", rand.Int()))
}
}
}
default:
Expand Down Expand Up @@ -323,7 +376,7 @@ func genVecExprBenchCase(ctx sessionctx.Context, funcName string, testCase vecEx
cols := make([]Expression, len(testCase.childrenTypes))
input = chunk.New(fts, 1024, 1024)
for i, eType := range testCase.childrenTypes {
fillColumn(eType, input, i)
fillColumn(eType, input, i, testCase)
cols[i] = &Column{Index: i, RetType: fts[i]}
}

Expand Down Expand Up @@ -423,7 +476,7 @@ func genVecBuiltinFuncBenchCase(ctx sessionctx.Context, funcName string, testCas
cols := make([]Expression, childrenNumber)
input = chunk.New(fts, 1024, 1024)
for i, eType := range testCase.childrenTypes {
fillColumn(eType, input, i)
fillColumn(eType, input, i, testCase)
cols[i] = &Column{Index: i, RetType: fts[i]}
}

Expand Down Expand Up @@ -547,7 +600,7 @@ func (s *testEvaluatorSuite) TestVectorizedBuiltinFunc(c *C) {
err := baseFunc.vecEvalString(input, output)
c.Assert(err, IsNil)
for row := it.Begin(); row != it.End(); row = it.Next() {
val, isNull, err := baseFunc.evalDuration(row)
val, isNull, err := baseFunc.evalString(row)
c.Assert(err, IsNil)
c.Assert(isNull, Equals, output.IsNull(i))
if !isNull {
Expand Down
18 changes: 8 additions & 10 deletions expression/builtin.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ import (

// baseBuiltinFunc will be contained in every struct that implement builtinFunc interface.
type baseBuiltinFunc struct {
columnBufferAllocator
args []Expression
ctx sessionctx.Context
tp *types.FieldType
pbCode tipb.ScalarFuncSig
bufAllocator columnBufferAllocator
args []Expression
ctx sessionctx.Context
tp *types.FieldType
pbCode tipb.ScalarFuncSig

childrenVectorizedOnce *sync.Once
childrenVectorized bool
Expand Down Expand Up @@ -66,7 +66,7 @@ func newBaseBuiltinFunc(ctx sessionctx.Context, args []Expression) baseBuiltinFu
panic("ctx should not be nil")
}
return baseBuiltinFunc{
columnBufferAllocator: newLocalSliceBuffer(len(args)),
bufAllocator: newLocalSliceBuffer(len(args)),
childrenVectorizedOnce: new(sync.Once),

args: args,
Expand Down Expand Up @@ -171,7 +171,7 @@ func newBaseBuiltinFuncWithTp(ctx sessionctx.Context, args []Expression, retType
fieldType.Charset, fieldType.Collate = charset.GetDefaultCharsetAndCollate()
}
return baseBuiltinFunc{
columnBufferAllocator: newLocalSliceBuffer(len(args)),
bufAllocator: newLocalSliceBuffer(len(args)),
childrenVectorizedOnce: new(sync.Once),

args: args,
Expand Down Expand Up @@ -297,7 +297,7 @@ func (b *baseBuiltinFunc) cloneFrom(from *baseBuiltinFunc) {
b.ctx = from.ctx
b.tp = from.tp
b.pbCode = from.pbCode
b.columnBufferAllocator = newLocalSliceBuffer(len(b.args))
b.bufAllocator = newLocalSliceBuffer(len(b.args))
b.childrenVectorizedOnce = new(sync.Once)
}

Expand Down Expand Up @@ -338,8 +338,6 @@ func newBaseBuiltinCastFunc(builtinFunc baseBuiltinFunc, inUnion bool) baseBuilt

// vecBuiltinFunc contains all vectorized methods for a builtin function.
type vecBuiltinFunc interface {
columnBufferAllocator

// vectorized returns if this builtin function itself supports vectorized evaluation.
vectorized() bool

Expand Down
67 changes: 67 additions & 0 deletions expression/builtin_string_vec.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package expression

import (
"math"
"strings"

"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
)

func (b *builtinRepeatSig) vecEvalString(input *chunk.Chunk, result *chunk.Column) error {
n := input.NumRows()
buf, err := b.bufAllocator.get(types.ETString, n)
if err != nil {
return err
}
defer b.bufAllocator.put(buf)
if err := b.args[0].VecEvalString(b.ctx, input, buf); err != nil {
return err
}

buf2, err := b.bufAllocator.get(types.ETInt, n)
if err != nil {
return err
}
defer b.bufAllocator.put(buf2)
if err := b.args[1].VecEvalInt(b.ctx, input, buf2); err != nil {
return err
}

result.ReserveString(n)
nums := buf2.Int64s()
for i := 0; i < n; i++ {
// TODO: introduce vectorized null-bitmap to speed it up.
if buf.IsNull(i) || buf2.IsNull(i) {
Comment thread
SunRunAway marked this conversation as resolved.
result.AppendNull()
continue
}
num := nums[i]
if num < 1 {
result.AppendString("")
continue
}
if num > math.MaxInt32 {
Comment thread
SunRunAway marked this conversation as resolved.
// to avoid overflow when calculating uint64(byteLength)*uint64(num) later
num = math.MaxInt32
}

str := buf.GetString(i)
byteLength := len(str)
if uint64(byteLength)*uint64(num) > b.maxAllowedPacket {
b.ctx.GetSessionVars().StmtCtx.AppendWarning(errWarnAllowedPacketOverflowed.GenWithStackByArgs("repeat", b.maxAllowedPacket))
result.AppendNull()
continue
}
if int64(byteLength) > int64(b.tp.Flen)/num {
result.AppendNull()
continue
}
result.AppendString(strings.Repeat(str, int(num)))
}
return nil
}

func (b *builtinRepeatSig) vectorized() bool {
return true
}
12 changes: 6 additions & 6 deletions expression/builtin_vectorized_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ type mockVecPlusIntBuiltinFunc struct {

func (p *mockVecPlusIntBuiltinFunc) allocBuf(n int) (*chunk.Column, error) {
if p.enableAlloc {
return p.get(types.ETInt, n)
return p.bufAllocator.get(types.ETInt, n)
}
if p.buf == nil {
p.buf = chunk.NewColumn(types.NewFieldType(mysql.TypeLonglong), n)
Expand All @@ -49,7 +49,7 @@ func (p *mockVecPlusIntBuiltinFunc) allocBuf(n int) (*chunk.Column, error) {

func (p *mockVecPlusIntBuiltinFunc) releaseBuf(buf *chunk.Column) {
if p.enableAlloc {
p.put(buf)
p.bufAllocator.put(buf)
}
}

Expand Down Expand Up @@ -207,7 +207,7 @@ func (p *mockBuiltinDouble) vecEvalReal(input *chunk.Chunk, result *chunk.Column
func (p *mockBuiltinDouble) vecEvalString(input *chunk.Chunk, result *chunk.Column) error {
var buf *chunk.Column
var err error
if buf, err = p.baseBuiltinFunc.get(p.evalType, input.NumRows()); err != nil {
if buf, err = p.baseBuiltinFunc.bufAllocator.get(p.evalType, input.NumRows()); err != nil {
return err
}
if err := p.args[0].VecEvalString(p.ctx, input, buf); err != nil {
Expand All @@ -218,7 +218,7 @@ func (p *mockBuiltinDouble) vecEvalString(input *chunk.Chunk, result *chunk.Colu
str := buf.GetString(i)
result.AppendString(str + str)
}
p.baseBuiltinFunc.put(buf)
p.baseBuiltinFunc.bufAllocator.put(buf)
return nil
}

Expand Down Expand Up @@ -268,7 +268,7 @@ func (p *mockBuiltinDouble) vecEvalDuration(input *chunk.Chunk, result *chunk.Co
func (p *mockBuiltinDouble) vecEvalJSON(input *chunk.Chunk, result *chunk.Column) error {
var buf *chunk.Column
var err error
if buf, err = p.baseBuiltinFunc.get(p.evalType, input.NumRows()); err != nil {
if buf, err = p.baseBuiltinFunc.bufAllocator.get(p.evalType, input.NumRows()); err != nil {
return err
}
if err := p.args[0].VecEvalJSON(p.ctx, input, buf); err != nil {
Expand All @@ -290,7 +290,7 @@ func (p *mockBuiltinDouble) vecEvalJSON(input *chunk.Chunk, result *chunk.Column
}
result.AppendJSON(j)
}
p.baseBuiltinFunc.put(buf)
p.baseBuiltinFunc.bufAllocator.put(buf)
return nil
}

Expand Down