diff --git a/go/parquet/doc.go b/go/parquet/doc.go index cf87b81826e..87a592836a9 100644 --- a/go/parquet/doc.go +++ b/go/parquet/doc.go @@ -29,8 +29,8 @@ // go get -u github.com/apache/arrow/go/parquet // // In addition, two cli utilities are provided: -// go install github.factset.com/mtopol/parquet-go/cmd/parquet_reader -// go install github.factset.com/mtopol/parquet-go/cmd/parquet_schema +// go install github.com/apache/arrow/go/parquet/cmd/parquet_reader +// go install github.com/apache/arrow/go/parquet/cmd/parquet_schema // // Modules // diff --git a/go/parquet/go.mod b/go/parquet/go.mod index 9c415931191..cf2be66aba0 100644 --- a/go/parquet/go.mod +++ b/go/parquet/go.mod @@ -19,15 +19,17 @@ module github.com/apache/arrow/go/parquet go 1.15 require ( + github.com/JohnCGriffin/overflow v0.0.0-20170615021017-4d914c927216 github.com/andybalholm/brotli v1.0.1 - github.com/apache/arrow/go/arrow v0.0.0-20210310173904-5de02e3697aa + github.com/apache/arrow/go/arrow v0.0.0-20210520144409-d07f30ada677 github.com/apache/thrift/lib/go/thrift v0.0.0-20210120171102-e27e82c46ba4 github.com/golang/snappy v0.0.3 github.com/klauspost/asmfmt v1.2.3 - github.com/klauspost/compress v1.11.12 + github.com/klauspost/compress v1.12.2 github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 github.com/stretchr/testify v1.7.0 + github.com/zeebo/xxh3 v0.10.0 golang.org/x/exp v0.0.0-20210220032938-85be41e4509f golang.org/x/sys v0.0.0-20210309074719-68d13333faf2 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 diff --git a/go/parquet/go.sum b/go/parquet/go.sum index be02835cc89..bebc1ff48a1 100644 --- a/go/parquet/go.sum +++ b/go/parquet/go.sum @@ -2,11 +2,13 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT dmitri.shuralyov.com/gpu/mtl v0.0.0-20201218220906-28db891af037/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/JohnCGriffin/overflow v0.0.0-20170615021017-4d914c927216 h1:2ZboyJ8vl75fGesnG9NpMTD2DyQI3FzMXy4x752rGF0= +github.com/JohnCGriffin/overflow v0.0.0-20170615021017-4d914c927216/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk= github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= github.com/andybalholm/brotli v1.0.1 h1:KqhlKozYbRtJvsPrrEeXcO+N2l6NYT5A2QAFmSULpEc= github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= -github.com/apache/arrow/go/arrow v0.0.0-20210310173904-5de02e3697aa h1:0Bhiab9ep1wmbD1Lm17uqPkzgYhcBIZf1CsvrMhFMGI= -github.com/apache/arrow/go/arrow v0.0.0-20210310173904-5de02e3697aa/go.mod h1:c9sxoIT3YgLxH4UhLOCKaBlEojuMhVYpk4Ntv3opUTQ= +github.com/apache/arrow/go/arrow v0.0.0-20210520144409-d07f30ada677 h1:F7HiqIf4aBsF4YUBcLolXZ8duSEideNnZnr3lBGa2sA= +github.com/apache/arrow/go/arrow v0.0.0-20210520144409-d07f30ada677/go.mod h1:R4hW3Ug0s+n4CUsWHKOj00Pu01ZqU4x/hSF5kXUcXKQ= github.com/apache/thrift/lib/go/thrift v0.0.0-20210120171102-e27e82c46ba4 h1:orNYqmQGnSjgOauLWjHEp9/qIDT98xv/0Aa4Zet3/Y8= github.com/apache/thrift/lib/go/thrift v0.0.0-20210120171102-e27e82c46ba4/go.mod h1:V/LzksIyqd3KZuQ2SunvReTG/UkArhII1dAWY5U1sCE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= @@ -42,15 +44,19 @@ github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= github.com/klauspost/asmfmt v1.2.3 h1:qEM7SLDo6DXXXz5yTpqUoxhsrtwH30nNR2riO2ZjznY= github.com/klauspost/asmfmt v1.2.3/go.mod h1:RAoUvqkWr2rUa2I19qKMEVZQe4BVtcHGTMCUOcCU2Lg= -github.com/klauspost/compress v1.11.12 h1:famVnQVu7QwryBN4jNseQdUKES71ZAOnB6UQQJPZvqk= -github.com/klauspost/compress v1.11.12/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.12.2 h1:2KCfW3I9M7nSc5wOqXAlW2v2U6v+w6cbjvbfp+OykW8= +github.com/klauspost/compress v1.12.2/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= +github.com/pierrec/lz4/v4 v4.1.4 h1:PjkB+qEooc9nw4F6Pxe/e0xaRdWz3suItXWxWqAO1QE= +github.com/pierrec/lz4/v4 v4.1.4/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -58,6 +64,8 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/testify v1.2.0/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/zeebo/xxh3 v0.10.0 h1:1+2Mov9zfxTNUeoDG9k9i13VfxTR0p1JQu8L0vikxB0= +github.com/zeebo/xxh3 v0.10.0/go.mod h1:AQY73TOrhF3jNsdiM9zZOb8MThrYbZONHj7ryDBaLpg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -97,6 +105,7 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200727154430-2d971f7391a4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200909081042-eff7692f9009/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210309074719-68d13333faf2 h1:46ULzRKLh1CwgRq2dC5SlBzEqqNCi8rreOZnNrbqcIY= golang.org/x/sys v0.0.0-20210309074719-68d13333faf2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -135,7 +144,6 @@ google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyac google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.32.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= -google.golang.org/grpc/cmd/protoc-gen-go-grpc v0.0.0-20200910201057-6591123024b3/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= diff --git a/go/parquet/internal/encoding/boolean_decoder.go b/go/parquet/internal/encoding/boolean_decoder.go new file mode 100644 index 00000000000..a33b21a3181 --- /dev/null +++ b/go/parquet/internal/encoding/boolean_decoder.go @@ -0,0 +1,101 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "github.com/apache/arrow/go/arrow/bitutil" + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/utils" + "golang.org/x/xerrors" +) + +// PlainBooleanDecoder is for the Plain Encoding type, there is no +// dictionary decoding for bools. +type PlainBooleanDecoder struct { + decoder + + bitOffset int +} + +// Type for the PlainBooleanDecoder is parquet.Types.Boolean +func (PlainBooleanDecoder) Type() parquet.Type { + return parquet.Types.Boolean +} + +// Decode fills out with bools decoded from the data at the current point +// or until we reach the end of the data. +// +// Returns the number of values decoded +func (dec *PlainBooleanDecoder) Decode(out []bool) (int, error) { + max := utils.MinInt(len(out), dec.nvals) + + unalignedExtract := func(start, end, curBitOffset int) int { + i := start + for ; curBitOffset < end; i, curBitOffset = i+1, curBitOffset+1 { + out[i] = (dec.data[0] & byte(1< 0 { // only go in here if there's at least one full byte to decode + if i > 0 { // skip our data forward if we decoded anything above + dec.data = dec.data[1:] + out = out[i:] + } + // determine the number of aligned bytes we can grab using SIMD optimized + // functions to improve performance. + alignedBytes := bitutil.BytesForBits(int64(batch)) + utils.BytesToBools(dec.data[:alignedBytes], out) + dec.data = dec.data[alignedBytes:] + out = out[alignedBytes*8:] + } + + // grab any trailing bits now that we've got our aligned bytes. + dec.bitOffset += unalignedExtract(dec.bitOffset, bitsRemain-batch, dec.bitOffset) + + dec.nvals -= max + return max, nil +} + +// DecodeSpaced is like Decode except it expands the values to leave spaces for null +// as determined by the validBits bitmap. +func (dec *PlainBooleanDecoder) DecodeSpaced(out []bool, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + if nullCount > 0 { + toRead := len(out) - nullCount + valuesRead, err := dec.Decode(out[:toRead]) + if err != nil { + return 0, err + } + if valuesRead != toRead { + return valuesRead, xerrors.New("parquet: boolean decoder: number of values / definition levels read did not match") + } + return spacedExpand(out, nullCount, validBits, validBitsOffset), nil + } + return dec.Decode(out) +} diff --git a/go/parquet/internal/encoding/boolean_encoder.go b/go/parquet/internal/encoding/boolean_encoder.go new file mode 100644 index 00000000000..fc9cd2728ac --- /dev/null +++ b/go/parquet/internal/encoding/boolean_encoder.go @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "github.com/apache/arrow/go/arrow/bitutil" + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/utils" +) + +const ( + boolBufSize = 1024 + boolsInBuf = boolBufSize * 8 +) + +// PlainBooleanEncoder encodes bools as a bitmap as per the Plain Encoding +type PlainBooleanEncoder struct { + encoder + bitsBuffer []byte + wr utils.BitmapWriter +} + +// Type for the PlainBooleanEncoder is parquet.Types.Boolean +func (PlainBooleanEncoder) Type() parquet.Type { + return parquet.Types.Boolean +} + +// Put encodes the contents of in into the underlying data buffer. +func (enc *PlainBooleanEncoder) Put(in []bool) { + if enc.bitsBuffer == nil { + enc.bitsBuffer = make([]byte, boolBufSize) + } + if enc.wr == nil { + enc.wr = utils.NewBitmapWriter(enc.bitsBuffer, 0, boolsInBuf) + } + + n := enc.wr.AppendBools(in) + for n < len(in) { + enc.wr.Finish() + enc.append(enc.bitsBuffer) + enc.wr.Reset(0, boolsInBuf) + in = in[n:] + n = enc.wr.AppendBools(in) + } +} + +// PutSpaced will use the validBits bitmap to determine which values are nulls +// and can be left out from the slice, and the encoded without those nulls. +func (enc *PlainBooleanEncoder) PutSpaced(in []bool, validBits []byte, validBitsOffset int64) { + bufferOut := make([]bool, len(in)) + nvalid := spacedCompress(in, bufferOut, validBits, validBitsOffset) + enc.Put(bufferOut[:nvalid]) +} + +// EstimatedDataEncodedSize returns the current number of bytes that have +// been buffered so far +func (enc *PlainBooleanEncoder) EstimatedDataEncodedSize() int64 { + return int64(enc.sink.Len() + int(bitutil.BytesForBits(enc.wr.Pos()))) +} + +// FlushValues returns the buffered data, the responsibility is on the caller +// to release the buffer memory +func (enc *PlainBooleanEncoder) FlushValues() Buffer { + if enc.wr.Pos() > 0 { + toFlush := int(enc.wr.Pos()) + enc.append(enc.bitsBuffer[:bitutil.BytesForBits(int64(toFlush))]) + } + + return enc.sink.Finish() +} diff --git a/go/parquet/internal/encoding/byte_array_decoder.go b/go/parquet/internal/encoding/byte_array_decoder.go new file mode 100644 index 00000000000..fa8033b78fa --- /dev/null +++ b/go/parquet/internal/encoding/byte_array_decoder.go @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "encoding/binary" + + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/utils" + "golang.org/x/xerrors" +) + +// PlainByteArrayDecoder decodes a data chunk for bytearrays according to +// the plain encoding. The byte arrays will use slices to reference the +// data rather than copying it. +// +// The parquet spec defines Plain encoding for ByteArrays as a 4 byte little +// endian integer containing the length of the bytearray followed by that many +// bytes being the raw data of the byte array. +type PlainByteArrayDecoder struct { + decoder +} + +// Type returns parquet.Types.ByteArray for this decoder +func (PlainByteArrayDecoder) Type() parquet.Type { + return parquet.Types.ByteArray +} + +// Decode will populate the slice of bytearrays in full or until the number +// of values is consumed. +// +// Returns the number of values that were decoded. +func (pbad *PlainByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) { + max := utils.MinInt(len(out), pbad.nvals) + + for i := 0; i < max; i++ { + // there should always be at least four bytes which is the length of the + // next value in the data. + if len(pbad.data) < 4 { + return i, xerrors.New("parquet: eof reading bytearray") + } + + // the first 4 bytes are a little endian int32 length + byteLen := int32(binary.LittleEndian.Uint32(pbad.data[:4])) + if byteLen < 0 { + return i, xerrors.New("parquet: invalid BYTE_ARRAY value") + } + + if int64(len(pbad.data)) < int64(byteLen)+4 { + return i, xerrors.New("parquet: eof reading bytearray") + } + + out[i] = pbad.data[4 : byteLen+4 : byteLen+4] + pbad.data = pbad.data[byteLen+4:] + } + + pbad.nvals -= max + return max, nil +} + +// DecodeSpaced is like Decode, but expands the slice out to leave empty values +// where the validBits bitmap has 0s +func (pbad *PlainByteArrayDecoder) DecodeSpaced(out []parquet.ByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + toRead := len(out) - nullCount + valuesRead, err := pbad.Decode(out[:toRead]) + if err != nil { + return valuesRead, err + } + if valuesRead != toRead { + return valuesRead, xerrors.New("parquet: number of values / definition levels read did not match") + } + + return spacedExpand(out, nullCount, validBits, validBitsOffset), nil +} diff --git a/go/parquet/internal/encoding/byte_array_encoder.go b/go/parquet/internal/encoding/byte_array_encoder.go new file mode 100644 index 00000000000..8d46c6f5a9b --- /dev/null +++ b/go/parquet/internal/encoding/byte_array_encoder.go @@ -0,0 +1,123 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "encoding/binary" + "unsafe" + + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/utils" +) + +// PlainByteArrayEncoder encodes byte arrays according to the spec for Plain encoding +// by encoding the length as a int32 followed by the bytes of the value. +type PlainByteArrayEncoder struct { + encoder + + bitSetReader utils.SetBitRunReader +} + +// PutByteArray writes out the 4 bytes for the length followed by the data +func (enc *PlainByteArrayEncoder) PutByteArray(val parquet.ByteArray) { + inc := val.Len() + arrow.Uint32SizeBytes + enc.sink.Reserve(inc) + vlen := toLEFunc(uint32(val.Len())) + enc.sink.UnsafeWrite((*(*[4]byte)(unsafe.Pointer(&vlen)))[:]) + enc.sink.UnsafeWrite(val) +} + +// Put writes out all of the values in this slice to the encoding sink +func (enc *PlainByteArrayEncoder) Put(in []parquet.ByteArray) { + for _, val := range in { + enc.PutByteArray(val) + } +} + +// PutSpaced uses the bitmap of validBits to leave out anything that is null according +// to the bitmap. +// +// If validBits is nil, this is equivalent to calling Put +func (enc *PlainByteArrayEncoder) PutSpaced(in []parquet.ByteArray, validBits []byte, validBitsOffset int64) { + if validBits != nil { + if enc.bitSetReader == nil { + enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in))) + } else { + enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in))) + } + + for { + run := enc.bitSetReader.NextRun() + if run.Length == 0 { + break + } + enc.Put(in[int(run.Pos):int(run.Pos+run.Length)]) + } + } else { + enc.Put(in) + } +} + +// Type returns parquet.Types.ByteArray for the bytearray encoder +func (PlainByteArrayEncoder) Type() parquet.Type { + return parquet.Types.ByteArray +} + +// WriteDict writes the dictionary out to the provided slice, out should be +// at least DictEncodedSize() bytes +func (enc *DictByteArrayEncoder) WriteDict(out []byte) { + enc.memo.(BinaryMemoTable).VisitValues(0, func(v []byte) { + binary.LittleEndian.PutUint32(out, uint32(len(v))) + out = out[arrow.Uint32SizeBytes:] + copy(out, v) + out = out[len(v):] + }) +} + +// PutByteArray adds a single byte array to buffer, updating the dictionary +// and encoded size if it's a new value +func (enc *DictByteArrayEncoder) PutByteArray(in parquet.ByteArray) { + if in == nil { + in = empty[:] + } + memoIdx, found, err := enc.memo.GetOrInsert(in) + if err != nil { + panic(err) + } + if !found { + enc.dictEncodedSize += in.Len() + arrow.Uint32SizeBytes + } + enc.addIndex(memoIdx) +} + +// Put takes a slice of ByteArrays to add and encode. +func (enc *DictByteArrayEncoder) Put(in []parquet.ByteArray) { + for _, val := range in { + enc.PutByteArray(val) + } +} + +// PutSpaced like with the non-dict encoder leaves out the values where the validBits bitmap is 0 +func (enc *DictByteArrayEncoder) PutSpaced(in []parquet.ByteArray, validBits []byte, validBitsOffset int64) { + utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error { + for i := int64(0); i < length; i++ { + enc.PutByteArray(in[i+pos]) + } + return nil + }) +} diff --git a/go/parquet/internal/encoding/decoder.go b/go/parquet/internal/encoding/decoder.go new file mode 100644 index 00000000000..6de61574ec5 --- /dev/null +++ b/go/parquet/internal/encoding/decoder.go @@ -0,0 +1,186 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "bytes" + "reflect" + + "github.com/apache/arrow/go/arrow/memory" + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/debug" + format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/parquet/internal/utils" + "github.com/apache/arrow/go/parquet/schema" + "golang.org/x/xerrors" +) + +// DecoderTraits provides an interface for more easily interacting with types +// to generate decoders for specific types. +type DecoderTraits interface { + Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder + BytesRequired(int) int +} + +// NewDecoder constructs a decoder for a given type and encoding +func NewDecoder(t parquet.Type, e parquet.Encoding, descr *schema.Column, mem memory.Allocator) TypedDecoder { + traits := getDecodingTraits(t) + if traits == nil { + return nil + } + + return traits.Decoder(e, descr, false /* use dictionary */, mem) +} + +// NewDictDecoder is like NewDecoder but for dictionary encodings, panics if type is bool. +// +// if mem is nil, memory.DefaultAllocator will be used +func NewDictDecoder(t parquet.Type, descr *schema.Column, mem memory.Allocator) DictDecoder { + traits := getDecodingTraits(t) + if traits == nil { + return nil + } + + if mem == nil { + mem = memory.DefaultAllocator + } + + return traits.Decoder(parquet.Encodings.RLEDict, descr, true /* use dictionary */, mem).(DictDecoder) +} + +type decoder struct { + descr *schema.Column + encoding format.Encoding + nvals int + data []byte + typeLen int +} + +// newDecoderBase constructs the base decoding object that is embedded in the +// type specific decoders. +func newDecoderBase(e format.Encoding, descr *schema.Column) decoder { + typeLen := -1 + if descr != nil && descr.PhysicalType() == parquet.Types.FixedLenByteArray { + typeLen = int(descr.TypeLength()) + } + + return decoder{ + descr: descr, + encoding: e, + typeLen: typeLen, + } +} + +// SetData sets the data for decoding into the decoder to update the available +// data bytes and number of values available. +func (d *decoder) SetData(nvals int, data []byte) error { + d.data = data + d.nvals = nvals + return nil +} + +// ValuesLeft returns the number of remaining values that can be decoded +func (d *decoder) ValuesLeft() int { return d.nvals } + +// Encoding returns the encoding type used by this decoder to decode the bytes. +func (d *decoder) Encoding() parquet.Encoding { return parquet.Encoding(d.encoding) } + +type dictDecoder struct { + decoder + mem memory.Allocator + dictValueDecoder utils.DictionaryConverter + idxDecoder *utils.RleDecoder +} + +// SetDict sets a decoder that can be used to decode the dictionary that is +// used for this column in order to return the proper values. +func (d *dictDecoder) SetDict(dict TypedDecoder) { + if dict.Type() != d.descr.PhysicalType() { + panic("parquet: mismatch dictionary and column data type") + } + + d.dictValueDecoder = NewDictConverter(dict) +} + +// SetData sets the index value data into the decoder. +func (d *dictDecoder) SetData(nvals int, data []byte) error { + d.nvals = nvals + if len(data) == 0 { + // no data, bitwidth can safely be 0 + d.idxDecoder = utils.NewRleDecoder(bytes.NewReader(data), 0 /* bitwidth */) + return nil + } + + // grab the bit width from the first byte + width := uint8(data[0]) + if width >= 64 { + return xerrors.New("parquet: invalid or corrupted bit width") + } + + // pass the rest of the data, minus that first byte, to the decoder + d.idxDecoder = utils.NewRleDecoder(bytes.NewReader(data[1:]), int(width)) + return nil +} + +func (d *dictDecoder) decode(out interface{}) (int, error) { + return d.idxDecoder.GetBatchWithDict(d.dictValueDecoder, out) +} + +func (d *dictDecoder) decodeSpaced(out interface{}, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + return d.idxDecoder.GetBatchWithDictSpaced(d.dictValueDecoder, out, nullCount, validBits, validBitsOffset) +} + +var empty = [1]byte{0} + +// spacedExpand is used to take a slice of data and utilize the bitmap provided to fill in nulls into the +// correct slots according to the bitmap in order to produce a fully expanded result slice with nulls +// in the correct slots. +func spacedExpand(buffer interface{}, nullCount int, validBits []byte, validBitsOffset int64) int { + bufferRef := reflect.ValueOf(buffer) + if bufferRef.Kind() != reflect.Slice { + panic("invalid spacedexpand type, not slice") + } + + var ( + numValues int = bufferRef.Len() + ) + + idxDecode := int64(numValues - nullCount) + if idxDecode == 0 { // if there's nothing to decode there's nothing to do. + return numValues + } + + // read the bitmap in reverse grabbing runs of valid bits where possible. + rdr := utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(numValues)) + for { + run := rdr.NextRun() + if run.Length == 0 { + break + } + + // copy data from the end of the slice to it's proper location in the slice after accounting for the nulls + // because we technically don't care what is in the null slots we don't actually have to clean + // up after ourselves because we're doing this in reverse to guarantee that we'll always simply + // overwrite any existing data with the correctly spaced data. Any data that happens to be left in the null + // slots is fine since it shouldn't matter and saves us work. + idxDecode -= run.Length + n := reflect.Copy(bufferRef.Slice(int(run.Pos), bufferRef.Len()), bufferRef.Slice(int(idxDecode), int(int64(idxDecode)+run.Length))) + debug.Assert(n == int(run.Length), "reflect.Copy copied incorrect number of elements in spacedExpand") + } + + return numValues +} diff --git a/go/parquet/internal/encoding/delta_bit_packing.go b/go/parquet/internal/encoding/delta_bit_packing.go new file mode 100644 index 00000000000..babd0b1fa97 --- /dev/null +++ b/go/parquet/internal/encoding/delta_bit_packing.go @@ -0,0 +1,520 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "bytes" + "math" + "math/bits" + "reflect" + + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/arrow/memory" + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/utils" + "golang.org/x/xerrors" +) + +// see the deltaBitPack encoder for a description of the encoding format that is +// used for delta-bitpacking. +type deltaBitPackDecoder struct { + decoder + + mem memory.Allocator + + usedFirst bool + bitdecoder *utils.BitReader + blockSize uint64 + currentBlockVals uint32 + miniBlocks uint64 + valsPerMini uint32 + currentMiniBlockVals uint32 + minDelta int64 + miniBlockIdx uint64 + + deltaBitWidths *memory.Buffer + deltaBitWidth byte + + lastVal int64 +} + +// returns the number of bytes read so far +func (d *deltaBitPackDecoder) bytesRead() int64 { + return d.bitdecoder.CurOffset() +} + +func (d *deltaBitPackDecoder) Allocator() memory.Allocator { return d.mem } + +// SetData sets the bytes and the expected number of values to decode +// into the decoder, updating the decoder and allowing it to be reused. +func (d *deltaBitPackDecoder) SetData(nvalues int, data []byte) error { + // set our data into the underlying decoder for the type + if err := d.decoder.SetData(nvalues, data); err != nil { + return err + } + // create a bit reader for our decoder's values + d.bitdecoder = utils.NewBitReader(bytes.NewReader(d.data)) + d.currentBlockVals = 0 + d.currentMiniBlockVals = 0 + if d.deltaBitWidths == nil { + d.deltaBitWidths = memory.NewResizableBuffer(d.mem) + } + + var ok bool + d.blockSize, ok = d.bitdecoder.GetVlqInt() + if !ok { + return xerrors.New("parquet: eof exception") + } + + if d.miniBlocks, ok = d.bitdecoder.GetVlqInt(); !ok { + return xerrors.New("parquet: eof exception") + } + + var totalValues uint64 + if totalValues, ok = d.bitdecoder.GetVlqInt(); !ok { + return xerrors.New("parquet: eof exception") + } + + if int(totalValues) != d.nvals { + return xerrors.New("parquet: mismatch between number of values and count in data header") + } + + if d.lastVal, ok = d.bitdecoder.GetZigZagVlqInt(); !ok { + return xerrors.New("parquet: eof exception") + } + + if d.miniBlocks != 0 { + d.valsPerMini = uint32(d.blockSize / d.miniBlocks) + } + return nil +} + +// initialize a block to decode +func (d *deltaBitPackDecoder) initBlock() error { + // first we grab the min delta value that we'll start from + var ok bool + if d.minDelta, ok = d.bitdecoder.GetZigZagVlqInt(); !ok { + return xerrors.New("parquet: eof exception") + } + + // ensure we have enough space for our miniblocks to decode the widths + d.deltaBitWidths.Resize(int(d.miniBlocks)) + + var err error + for i := uint64(0); i < d.miniBlocks; i++ { + if d.deltaBitWidths.Bytes()[i], err = d.bitdecoder.ReadByte(); err != nil { + return err + } + } + + d.miniBlockIdx = 0 + d.deltaBitWidth = d.deltaBitWidths.Bytes()[0] + d.currentBlockVals = uint32(d.blockSize) + return nil +} + +// DeltaBitPackInt32Decoder decodes Int32 values which are packed using the Delta BitPacking algorithm. +type DeltaBitPackInt32Decoder struct { + *deltaBitPackDecoder + + miniBlockValues []int32 +} + +func (d *DeltaBitPackInt32Decoder) unpackNextMini() error { + if d.miniBlockValues == nil { + d.miniBlockValues = make([]int32, 0, int(d.valsPerMini)) + } else { + d.miniBlockValues = d.miniBlockValues[:0] + } + d.deltaBitWidth = d.deltaBitWidths.Bytes()[int(d.miniBlockIdx)] + d.currentMiniBlockVals = d.valsPerMini + + for j := 0; j < int(d.valsPerMini); j++ { + delta, ok := d.bitdecoder.GetValue(int(d.deltaBitWidth)) + if !ok { + return xerrors.New("parquet: eof exception") + } + + d.lastVal += int64(delta) + int64(d.minDelta) + d.miniBlockValues = append(d.miniBlockValues, int32(d.lastVal)) + } + d.miniBlockIdx++ + return nil +} + +// Decode retrieves min(remaining values, len(out)) values from the data and returns the number +// of values actually decoded and any errors encountered. +func (d *DeltaBitPackInt32Decoder) Decode(out []int32) (int, error) { + max := utils.MinInt(len(out), d.nvals) + if max == 0 { + return 0, nil + } + + out = out[:max] + if !d.usedFirst { // starting value to calculate deltas against + out[0] = int32(d.lastVal) + out = out[1:] + d.usedFirst = true + } + + var err error + for len(out) > 0 { // unpack mini blocks until we get all the values we need + if d.currentBlockVals == 0 { + err = d.initBlock() + } + if d.currentMiniBlockVals == 0 { + err = d.unpackNextMini() + } + if err != nil { + return 0, err + } + + // copy as many values from our mini block as we can into out + start := int(d.valsPerMini - d.currentMiniBlockVals) + end := utils.MinInt(int(d.valsPerMini), len(out)) + copy(out, d.miniBlockValues[start:end]) + + numCopied := end - start + out = out[numCopied:] + d.currentBlockVals -= uint32(numCopied) + d.currentMiniBlockVals -= uint32(numCopied) + } + return max, nil +} + +// DecodeSpaced is like Decode, but the result is spaced out appropriately based on the passed in bitmap +func (d *DeltaBitPackInt32Decoder) DecodeSpaced(out []int32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + toread := len(out) - nullCount + values, err := d.Decode(out[:toread]) + if err != nil { + return values, err + } + if values != toread { + return values, xerrors.New("parquet: number of values / definition levels read did not match") + } + + return spacedExpand(out, nullCount, validBits, validBitsOffset), nil +} + +// Type returns the physical parquet type that this decoder decodes, in this case Int32 +func (DeltaBitPackInt32Decoder) Type() parquet.Type { + return parquet.Types.Int32 +} + +// DeltaBitPackInt64Decoder decodes a delta bit packed int64 column of data. +type DeltaBitPackInt64Decoder struct { + *deltaBitPackDecoder + + miniBlockValues []int64 +} + +func (d *DeltaBitPackInt64Decoder) unpackNextMini() error { + if d.miniBlockValues == nil { + d.miniBlockValues = make([]int64, 0, int(d.valsPerMini)) + } else { + d.miniBlockValues = d.miniBlockValues[:0] + } + + d.deltaBitWidth = d.deltaBitWidths.Bytes()[int(d.miniBlockIdx)] + d.currentMiniBlockVals = d.valsPerMini + + for j := 0; j < int(d.valsPerMini); j++ { + delta, ok := d.bitdecoder.GetValue(int(d.deltaBitWidth)) + if !ok { + return xerrors.New("parquet: eof exception") + } + + d.lastVal += int64(delta) + int64(d.minDelta) + d.miniBlockValues = append(d.miniBlockValues, d.lastVal) + } + d.miniBlockIdx++ + return nil +} + +// Decode retrieves min(remaining values, len(out)) values from the data and returns the number +// of values actually decoded and any errors encountered. +func (d *DeltaBitPackInt64Decoder) Decode(out []int64) (int, error) { + max := utils.MinInt(len(out), d.nvals) + if max == 0 { + return 0, nil + } + + out = out[:max] + if !d.usedFirst { + out[0] = d.lastVal + out = out[1:] + d.usedFirst = true + } + + var err error + for len(out) > 0 { + if d.currentBlockVals == 0 { + err = d.initBlock() + } + if d.currentMiniBlockVals == 0 { + err = d.unpackNextMini() + } + + if err != nil { + return 0, err + } + + start := int(d.valsPerMini - d.currentMiniBlockVals) + end := utils.MinInt(int(d.valsPerMini), len(out)) + copy(out, d.miniBlockValues[start:end]) + + numCopied := end - start + out = out[numCopied:] + d.currentBlockVals -= uint32(numCopied) + d.currentMiniBlockVals -= uint32(numCopied) + } + return max, nil +} + +// Type returns the physical parquet type that this decoder decodes, in this case Int64 +func (DeltaBitPackInt64Decoder) Type() parquet.Type { + return parquet.Types.Int64 +} + +// DecodeSpaced is like Decode, but the result is spaced out appropriately based on the passed in bitmap +func (d DeltaBitPackInt64Decoder) DecodeSpaced(out []int64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + toread := len(out) - nullCount + values, err := d.Decode(out[:toread]) + if err != nil { + return values, err + } + if values != toread { + return values, xerrors.New("parquet: number of values / definition levels read did not match") + } + + return spacedExpand(out, nullCount, validBits, validBitsOffset), nil +} + +const ( + // block size must be a multiple of 128 + defaultBlockSize = 128 + defaultNumMiniBlocks = 4 + // block size / number of mini blocks must result in a multiple of 32 + defaultNumValuesPerMini = 32 + // max size of the header for the delta blocks + maxHeaderWriterSize = 32 +) + +// deltaBitPackEncoder is an encoder for the DeltaBinary Packing format +// as per the parquet spec. +// +// Consists of a header followed by blocks of delta encoded values binary packed. +// +// Format +// [header] [block 1] [block 2] ... [block N] +// +// Header +// [block size] [number of mini blocks per block] [total value count] [first value] +// +// Block +// [min delta] [list of bitwidths of the miniblocks] [miniblocks...] +// +// Sets aside bytes at the start of the internal buffer where the header will be written, +// and only writes the header when FlushValues is called before returning it. +type deltaBitPackEncoder struct { + encoder + + bitWriter *utils.BitWriter + totalVals uint64 + firstVal int64 + currentVal int64 + + blockSize uint64 + miniBlockSize uint64 + numMiniBlocks uint64 + deltas []int64 +} + +// flushBlock flushes out a finished block for writing to the underlying encoder +func (enc *deltaBitPackEncoder) flushBlock() { + if len(enc.deltas) == 0 { + return + } + + // determine the minimum delta value + minDelta := int64(math.MaxInt64) + for _, delta := range enc.deltas { + if delta < minDelta { + minDelta = delta + } + } + + enc.bitWriter.WriteZigZagVlqInt(minDelta) + // reserve enough bytes to write out our miniblock deltas + offset := enc.bitWriter.ReserveBytes(int(enc.numMiniBlocks)) + + valuesToWrite := int64(len(enc.deltas)) + for i := 0; i < int(enc.numMiniBlocks); i++ { + n := utils.Min(int64(enc.miniBlockSize), valuesToWrite) + if n == 0 { + break + } + + maxDelta := int64(math.MinInt64) + start := i * int(enc.miniBlockSize) + for _, val := range enc.deltas[start : start+int(n)] { + maxDelta = utils.Max(maxDelta, val) + } + + // compute bit width to store (max_delta - min_delta) + width := uint(bits.Len64(uint64(maxDelta - minDelta))) + // write out the bit width we used into the bytes we reserved earlier + enc.bitWriter.WriteAt([]byte{byte(width)}, int64(offset+i)) + + // write out our deltas + for _, val := range enc.deltas[start : start+int(n)] { + enc.bitWriter.WriteValue(uint64(val-minDelta), width) + } + + valuesToWrite -= n + + // pad the last block if n < miniBlockSize + for ; n < int64(enc.miniBlockSize); n++ { + enc.bitWriter.WriteValue(0, width) + } + } + enc.deltas = enc.deltas[:0] +} + +// putInternal is the implementation for actually writing data which must be +// integral data as int, int8, int32, or int64. +func (enc *deltaBitPackEncoder) putInternal(data interface{}) { + v := reflect.ValueOf(data) + if v.Len() == 0 { + return + } + + idx := 0 + if enc.totalVals == 0 { + enc.blockSize = defaultBlockSize + enc.numMiniBlocks = defaultNumMiniBlocks + enc.miniBlockSize = defaultNumValuesPerMini + + enc.firstVal = v.Index(0).Int() + enc.currentVal = enc.firstVal + idx = 1 + + enc.bitWriter = utils.NewBitWriter(enc.sink) + } + + enc.totalVals += uint64(v.Len()) + for ; idx < v.Len(); idx++ { + val := v.Index(idx).Int() + enc.deltas = append(enc.deltas, val-enc.currentVal) + enc.currentVal = val + if len(enc.deltas) == int(enc.blockSize) { + enc.flushBlock() + } + } +} + +// FlushValues flushes any remaining data and returns the finished encoded buffer +func (enc *deltaBitPackEncoder) FlushValues() Buffer { + if enc.bitWriter != nil { + // write any remaining values + enc.flushBlock() + enc.bitWriter.Flush(true) + } else { + enc.blockSize = defaultBlockSize + enc.numMiniBlocks = defaultNumMiniBlocks + enc.miniBlockSize = defaultNumValuesPerMini + } + + buffer := make([]byte, maxHeaderWriterSize) + headerWriter := utils.NewBitWriter(utils.NewWriterAtBuffer(buffer)) + + headerWriter.WriteVlqInt(uint64(enc.blockSize)) + headerWriter.WriteVlqInt(uint64(enc.numMiniBlocks)) + headerWriter.WriteVlqInt(uint64(enc.totalVals)) + headerWriter.WriteZigZagVlqInt(int64(enc.firstVal)) + headerWriter.Flush(false) + + buffer = buffer[:headerWriter.Written()] + enc.totalVals = 0 + + if enc.bitWriter != nil { + flushed := enc.sink.Finish() + defer flushed.Release() + + buffer = append(buffer, flushed.Buf()[:enc.bitWriter.Written()]...) + } + return poolBuffer{memory.NewBufferBytes(buffer)} +} + +// EstimatedDataEncodedSize returns the current amount of data actually flushed out and written +func (enc *deltaBitPackEncoder) EstimatedDataEncodedSize() int64 { + return int64(enc.bitWriter.Written()) +} + +// DeltaBitPackInt32Encoder is an encoder for the delta bitpacking encoding for int32 data. +type DeltaBitPackInt32Encoder struct { + *deltaBitPackEncoder +} + +// Put writes the values from the provided slice of int32 to the encoder +func (enc DeltaBitPackInt32Encoder) Put(in []int32) { + enc.putInternal(in) +} + +// PutSpaced takes a slice of int32 along with a bitmap that describes the nulls and an offset into the bitmap +// in order to write spaced data to the encoder. +func (enc DeltaBitPackInt32Encoder) PutSpaced(in []int32, validBits []byte, validBitsOffset int64) { + buffer := memory.NewResizableBuffer(enc.mem) + buffer.Reserve(arrow.Int32Traits.BytesRequired(len(in))) + defer buffer.Release() + + data := arrow.Int32Traits.CastFromBytes(buffer.Buf()) + nvalid := spacedCompress(in, data, validBits, validBitsOffset) + enc.Put(data[:nvalid]) +} + +// Type returns the underlying physical type this encoder works with, in this case Int32 +func (DeltaBitPackInt32Encoder) Type() parquet.Type { + return parquet.Types.Int32 +} + +// DeltaBitPackInt32Encoder is an encoder for the delta bitpacking encoding for int32 data. +type DeltaBitPackInt64Encoder struct { + *deltaBitPackEncoder +} + +// Put writes the values from the provided slice of int64 to the encoder +func (enc DeltaBitPackInt64Encoder) Put(in []int64) { + enc.putInternal(in) +} + +// PutSpaced takes a slice of int64 along with a bitmap that describes the nulls and an offset into the bitmap +// in order to write spaced data to the encoder. +func (enc DeltaBitPackInt64Encoder) PutSpaced(in []int64, validBits []byte, validBitsOffset int64) { + buffer := memory.NewResizableBuffer(enc.mem) + buffer.Reserve(arrow.Int64Traits.BytesRequired(len(in))) + defer buffer.Release() + + data := arrow.Int64Traits.CastFromBytes(buffer.Buf()) + nvalid := spacedCompress(in, data, validBits, validBitsOffset) + enc.Put(data[:nvalid]) +} + +// Type returns the underlying physical type this encoder works with, in this case Int64 +func (DeltaBitPackInt64Encoder) Type() parquet.Type { + return parquet.Types.Int64 +} diff --git a/go/parquet/internal/encoding/delta_byte_array.go b/go/parquet/internal/encoding/delta_byte_array.go new file mode 100644 index 00000000000..d11413ea236 --- /dev/null +++ b/go/parquet/internal/encoding/delta_byte_array.go @@ -0,0 +1,216 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "github.com/apache/arrow/go/arrow/memory" + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/utils" + "golang.org/x/xerrors" +) + +// DeltaByteArrayEncoder is an encoder for writing bytearrays which are delta encoded +// this is also known as incremental encoding or front compression. For each element +// in a sequence of strings, we store the prefix length of the previous entry plus the suffix +// see https://en.wikipedia.org/wiki/Incremental_encoding for a longer description. +// +// This is stored as a sequence of delta-encoded prefix lengths followed by the suffixes +// encoded as delta length byte arrays. +type DeltaByteArrayEncoder struct { + encoder + + prefixEncoder *DeltaBitPackInt32Encoder + suffixEncoder *DeltaLengthByteArrayEncoder + + lastVal parquet.ByteArray +} + +func (enc *DeltaByteArrayEncoder) initEncoders() { + enc.prefixEncoder = &DeltaBitPackInt32Encoder{ + deltaBitPackEncoder: &deltaBitPackEncoder{encoder: newEncoderBase(enc.encoding, nil, enc.mem)}} + enc.suffixEncoder = &DeltaLengthByteArrayEncoder{ + newEncoderBase(enc.encoding, nil, enc.mem), + &DeltaBitPackInt32Encoder{ + deltaBitPackEncoder: &deltaBitPackEncoder{encoder: newEncoderBase(enc.encoding, nil, enc.mem)}}} +} + +// Type returns the underlying physical type this operates on, in this case ByteArrays only +func (DeltaByteArrayEncoder) Type() parquet.Type { return parquet.Types.ByteArray } + +// Put writes a slice of ByteArrays to the encoder +func (enc *DeltaByteArrayEncoder) Put(in []parquet.ByteArray) { + if len(in) == 0 { + return + } + + var suf parquet.ByteArray + if enc.prefixEncoder == nil { // initialize our encoders if we haven't yet + enc.initEncoders() + enc.prefixEncoder.Put([]int32{0}) + suf = in[0] + enc.lastVal = in[0] + enc.suffixEncoder.Put([]parquet.ByteArray{suf}) + in = in[1:] + } + + // for each value, figure out the common prefix with the previous value + // and then write the prefix length and the suffix. + for _, val := range in { + l1 := enc.lastVal.Len() + l2 := val.Len() + j := 0 + for j < l1 && j < l2 { + if enc.lastVal[j] != val[j] { + break + } + j++ + } + enc.prefixEncoder.Put([]int32{int32(j)}) + suf = val[j:] + enc.suffixEncoder.Put([]parquet.ByteArray{suf}) + enc.lastVal = val + } + + // do the memcpy after the loops to keep a copy of the lastVal + // we do a copy here so that we only copy and keep a reference + // to the suffix, and aren't forcing the *entire* value to stay + // in memory while we have this reference to just the suffix. + enc.lastVal = append([]byte{}, enc.lastVal...) +} + +// PutSpaced is like Put, but assumes the data is already spaced for nulls and uses the bitmap provided and offset +// to compress the data before writing it without the null slots. +func (enc *DeltaByteArrayEncoder) PutSpaced(in []parquet.ByteArray, validBits []byte, validBitsOffset int64) { + if validBits != nil { + data := make([]parquet.ByteArray, len(in)) + nvalid := spacedCompress(in, data, validBits, validBitsOffset) + enc.Put(data[:nvalid]) + } else { + enc.Put(in) + } +} + +// Flush flushes any remaining data out and returns the finished encoded buffer. +func (enc *DeltaByteArrayEncoder) FlushValues() Buffer { + if enc.prefixEncoder == nil { + enc.initEncoders() + } + prefixBuf := enc.prefixEncoder.FlushValues() + defer prefixBuf.Release() + + suffixBuf := enc.suffixEncoder.FlushValues() + defer suffixBuf.Release() + + ret := bufferPool.Get().(*memory.Buffer) + ret.ResizeNoShrink(prefixBuf.Len() + suffixBuf.Len()) + copy(ret.Bytes(), prefixBuf.Bytes()) + copy(ret.Bytes()[prefixBuf.Len():], suffixBuf.Bytes()) + return poolBuffer{ret} +} + +// DeltaByteArrayDecoder is a decoder for a column of data encoded using incremental or prefix encoding. +type DeltaByteArrayDecoder struct { + *DeltaLengthByteArrayDecoder + + prefixLengths []int32 + lastVal parquet.ByteArray +} + +// Type returns the underlying physical type this decoder operates on, in this case ByteArrays only +func (DeltaByteArrayDecoder) Type() parquet.Type { + return parquet.Types.ByteArray +} + +func (d *DeltaByteArrayDecoder) Allocator() memory.Allocator { return d.mem } + +// SetData expects the data passed in to be the prefix lengths, followed by the +// blocks of suffix data in order to initialize the decoder. +func (d *DeltaByteArrayDecoder) SetData(nvalues int, data []byte) error { + prefixLenDec := DeltaBitPackInt32Decoder{ + deltaBitPackDecoder: &deltaBitPackDecoder{ + decoder: newDecoderBase(d.encoding, d.descr), + mem: d.mem}} + + if err := prefixLenDec.SetData(nvalues, data); err != nil { + return err + } + + d.prefixLengths = make([]int32, nvalues) + // decode all the prefix lengths first so we know how many bytes it took to get the + // prefix lengths for nvalues + prefixLenDec.Decode(d.prefixLengths) + + // now that we know how many bytes we needed for the prefix lengths, the rest are the + // delta length byte array encoding. + return d.DeltaLengthByteArrayDecoder.SetData(nvalues, data[int(prefixLenDec.bytesRead()):]) +} + +// Decode decodes byte arrays into the slice provided and returns the number of values actually decoded +func (d *DeltaByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) { + max := utils.MinInt(len(out), d.nvals) + if max == 0 { + return 0, nil + } + out = out[:max] + + var err error + if d.lastVal == nil { + _, err = d.DeltaLengthByteArrayDecoder.Decode(out[:1]) + if err != nil { + return 0, err + } + d.lastVal = out[0] + out = out[1:] + d.prefixLengths = d.prefixLengths[1:] + } + + var prefixLen int32 + suffixHolder := make([]parquet.ByteArray, 1) + for len(out) > 0 { + prefixLen, d.prefixLengths = d.prefixLengths[0], d.prefixLengths[1:] + + prefix := d.lastVal[:prefixLen:prefixLen] + _, err = d.DeltaLengthByteArrayDecoder.Decode(suffixHolder) + if err != nil { + return 0, err + } + + if len(suffixHolder[0]) == 0 { + d.lastVal = prefix + } else { + d.lastVal = make([]byte, int(prefixLen)+len(suffixHolder[0])) + copy(d.lastVal, prefix) + copy(d.lastVal[prefixLen:], suffixHolder[0]) + } + out[0], out = d.lastVal, out[1:] + } + return max, nil +} + +// DecodeSpaced is like decode, but the result is spaced out based on the bitmap provided. +func (d *DeltaByteArrayDecoder) DecodeSpaced(out []parquet.ByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + toread := len(out) - nullCount + values, err := d.Decode(out[:toread]) + if err != nil { + return values, err + } + if values != toread { + return values, xerrors.New("parquet: number of values / definition levels read did not match") + } + + return spacedExpand(out, nullCount, validBits, validBitsOffset), nil +} diff --git a/go/parquet/internal/encoding/delta_length_byte_array.go b/go/parquet/internal/encoding/delta_length_byte_array.go new file mode 100644 index 00000000000..3563ccec461 --- /dev/null +++ b/go/parquet/internal/encoding/delta_length_byte_array.go @@ -0,0 +1,144 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "github.com/apache/arrow/go/arrow/memory" + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/utils" + "golang.org/x/xerrors" +) + +// DeltaLengthByteArrayEncoder encodes data using by taking all of the byte array lengths +// and encoding them in front using delta encoding, followed by all of the binary data +// concatenated back to back. The expected savings is from the cost of encoding the lengths +// and possibly better compression in the data which will no longer be interleaved with the lengths. +// +// This encoding is always preferred over PLAIN for byte array columns where possible. +// +// For example, if the data was "Hello", "World", "Foobar", "ABCDEF" the encoded data would be: +// DeltaEncoding(5, 5, 6, 6) "HelloWorldFoobarABCDEF" +type DeltaLengthByteArrayEncoder struct { + encoder + + lengthEncoder *DeltaBitPackInt32Encoder +} + +// Put writes the provided slice of byte arrays to the encoder +func (enc *DeltaLengthByteArrayEncoder) Put(in []parquet.ByteArray) { + lengths := make([]int32, len(in)) + totalLen := int(0) + for idx, val := range in { + lengths[idx] = int32(val.Len()) + totalLen += val.Len() + } + + enc.lengthEncoder.Put(lengths) + enc.sink.Reserve(totalLen) + for _, val := range in { + enc.sink.UnsafeWrite(val) + } +} + +// PutSpaced is like Put, but the data is spaced out according to the bitmap provided and is compressed +// accordingly before it is written to drop the null data from the write. +func (enc *DeltaLengthByteArrayEncoder) PutSpaced(in []parquet.ByteArray, validBits []byte, validBitsOffset int64) { + if validBits != nil { + data := make([]parquet.ByteArray, len(in)) + nvalid := spacedCompress(in, data, validBits, validBitsOffset) + enc.Put(data[:nvalid]) + } else { + enc.Put(in) + } +} + +// Type returns the underlying type which is handled by this encoder, ByteArrays only. +func (DeltaLengthByteArrayEncoder) Type() parquet.Type { + return parquet.Types.ByteArray +} + +// FlushValues flushes any remaining data and returns the final encoded buffer of data. +func (enc *DeltaLengthByteArrayEncoder) FlushValues() Buffer { + ret := enc.lengthEncoder.FlushValues() + defer ret.Release() + + data := enc.sink.Finish() + defer data.Release() + + output := bufferPool.Get().(*memory.Buffer) + output.ResizeNoShrink(ret.Len() + data.Len()) + copy(output.Bytes(), ret.Bytes()) + copy(output.Bytes()[ret.Len():], data.Bytes()) + return poolBuffer{output} +} + +// DeltaLengthByteArrayDecoder is a decoder for handling data produced by the corresponding +// encoder which expects delta packed lengths followed by the bytes of data. +type DeltaLengthByteArrayDecoder struct { + decoder + + mem memory.Allocator + lengths []int32 +} + +// Type returns the underlying type which is handled by this encoder, ByteArrays only. +func (DeltaLengthByteArrayDecoder) Type() parquet.Type { + return parquet.Types.ByteArray +} + +func (d *DeltaLengthByteArrayDecoder) Allocator() memory.Allocator { return d.mem } + +// SetData sets in the expected data to the decoder which should be nvalues delta packed lengths +// followed by the rest of the byte array data immediately after. +func (d *DeltaLengthByteArrayDecoder) SetData(nvalues int, data []byte) error { + dec := DeltaBitPackInt32Decoder{ + deltaBitPackDecoder: &deltaBitPackDecoder{ + decoder: newDecoderBase(d.encoding, d.descr), + mem: d.mem}} + + if err := dec.SetData(nvalues, data); err != nil { + return err + } + d.lengths = make([]int32, nvalues) + dec.Decode(d.lengths) + + return d.decoder.SetData(nvalues, data[int(dec.bytesRead()):]) +} + +// Decode populates the passed in slice with data decoded until it hits the length of out +// or runs out of values in the column to decode, then returns the number of values actually decoded. +func (d *DeltaLengthByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) { + max := utils.MinInt(len(out), d.nvals) + for i := 0; i < max; i++ { + out[i] = d.data[:d.lengths[i]:d.lengths[i]] + d.data = d.data[d.lengths[i]:] + } + d.nvals -= max + d.lengths = d.lengths[max:] + return max, nil +} + +// DecodeSpaced is like Decode, but for spaced data using the provided bitmap to determine where the nulls should be inserted. +func (d *DeltaLengthByteArrayDecoder) DecodeSpaced(out []parquet.ByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + toread := len(out) - nullCount + values, _ := d.Decode(out[:toread]) + if values != toread { + return values, xerrors.New("parquet: number of values / definition levels read did not match") + } + + return spacedExpand(out, nullCount, validBits, validBitsOffset), nil +} diff --git a/go/parquet/internal/encoding/encoder.go b/go/parquet/internal/encoding/encoder.go new file mode 100644 index 00000000000..49072c8e151 --- /dev/null +++ b/go/parquet/internal/encoding/encoder.go @@ -0,0 +1,311 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "math/bits" + "reflect" + + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/arrow/bitutil" + "github.com/apache/arrow/go/arrow/endian" + "github.com/apache/arrow/go/arrow/memory" + "github.com/apache/arrow/go/parquet" + format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/parquet/internal/utils" + "github.com/apache/arrow/go/parquet/schema" +) + +var toLEFunc func(uint32) uint32 + +func init() { + if endian.IsBigEndian { + toLEFunc = bits.ReverseBytes32 + } else { + toLEFunc = func(in uint32) uint32 { return in } + } +} + +//go:generate go run ../../../arrow/_tools/tmpl/main.go -i -data=physical_types.tmpldata plain_encoder_types.gen.go.tmpl typed_encoder.gen.go.tmpl + +// EncoderTraits is an interface for the different types to make it more +// convenient to construct encoders for specific types. +type EncoderTraits interface { + Encoder(format.Encoding, bool, *schema.Column, memory.Allocator) TypedEncoder +} + +// NewEncoder will return the appropriately typed encoder for the requested physical type +// and encoding. +// +// If mem is nil, memory.DefaultAllocator will be used. +func NewEncoder(t parquet.Type, e parquet.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder { + traits := getEncodingTraits(t) + if traits == nil { + return nil + } + + if mem == nil { + mem = memory.DefaultAllocator + } + return traits.Encoder(format.Encoding(e), useDict, descr, mem) +} + +type encoder struct { + descr *schema.Column + encoding format.Encoding + typeLen int + mem memory.Allocator + + sink *PooledBufferWriter +} + +// newEncoderBase constructs a new base encoder for embedding on the typed encoders +// encapsulating the common functionality. +func newEncoderBase(e format.Encoding, descr *schema.Column, mem memory.Allocator) encoder { + typelen := -1 + if descr != nil && descr.PhysicalType() == parquet.Types.FixedLenByteArray { + typelen = int(descr.TypeLength()) + } + return encoder{ + descr: descr, + encoding: e, + mem: mem, + typeLen: typelen, + sink: NewPooledBufferWriter(1024), + } +} + +// ReserveForWrite allocates n bytes so that the next n bytes written do not require new allocations. +func (e *encoder) ReserveForWrite(n int) { e.sink.Reserve(n) } +func (e *encoder) EstimatedDataEncodedSize() int64 { return int64(e.sink.Len()) } +func (e *encoder) Encoding() parquet.Encoding { return parquet.Encoding(e.encoding) } +func (e *encoder) Allocator() memory.Allocator { return e.mem } +func (e *encoder) append(data []byte) { e.sink.Write(data) } + +// FlushValues flushes any unwritten data to the buffer and returns the finished encoded buffer of data. +// This also clears the encoder, ownership of the data belongs to whomever called FlushValues, Release +// should be called on the resulting Buffer when done. +func (e *encoder) FlushValues() Buffer { return e.sink.Finish() } + +// Bytes returns the current bytes that have been written to the encoder's buffer but doesn't transfer ownership. +func (e *encoder) Bytes() []byte { return e.sink.Bytes() } + +// Reset drops the data currently in the encoder and resets for new use. +func (e *encoder) Reset() { e.sink.Reset(0) } + +type dictEncoder struct { + encoder + + dictEncodedSize int + idxBuffer *memory.Buffer + idxValues []int32 + memo MemoTable +} + +// newDictEncoderBase constructs and returns a dictionary encoder for the appropriate type using the passed +// in memo table for constructing the index. +func newDictEncoderBase(descr *schema.Column, memo MemoTable, mem memory.Allocator) dictEncoder { + return dictEncoder{ + encoder: newEncoderBase(format.Encoding_PLAIN_DICTIONARY, descr, mem), + idxBuffer: memory.NewResizableBuffer(mem), + memo: memo, + } +} + +// Reset drops all the currently encoded values from the index and indexes from the data to allow +// restarting the encoding process. +func (d *dictEncoder) Reset() { + d.encoder.Reset() + d.dictEncodedSize = 0 + d.idxValues = d.idxValues[:0] + d.idxBuffer.ResizeNoShrink(0) + d.memo.Reset() +} + +// append the passed index to the indexbuffer +func (d *dictEncoder) addIndex(idx int) { + if len(d.idxValues) == cap(d.idxValues) { + curLen := len(d.idxValues) + d.idxBuffer.ResizeNoShrink(arrow.Int32Traits.BytesRequired(bitutil.NextPowerOf2(curLen + 1))) + d.idxValues = arrow.Int32Traits.CastFromBytes(d.idxBuffer.Buf())[: curLen : d.idxBuffer.Len()/arrow.Int32SizeBytes] + } + d.idxValues = append(d.idxValues, int32(idx)) +} + +// FlushValues dumps all the currently buffered indexes that would become the data page to a buffer and +// returns it. +func (d *dictEncoder) FlushValues() Buffer { + buf := bufferPool.Get().(*memory.Buffer) + buf.Reserve(int(d.EstimatedDataEncodedSize())) + size := d.WriteIndices(buf.Buf()) + buf.ResizeNoShrink(size) + return poolBuffer{buf} +} + +// EstimatedDataEncodedSize returns the maximum number of bytes needed to store the RLE encoded indexes, not including the +// dictionary index in the computation. +func (d *dictEncoder) EstimatedDataEncodedSize() int64 { + return 1 + int64(utils.MaxBufferSize(d.BitWidth(), len(d.idxValues))+utils.MinBufferSize(d.BitWidth())) +} + +// NumEntries returns the number of entires in the dictionary index for this encoder. +func (d *dictEncoder) NumEntries() int { + return d.memo.Size() +} + +// BitWidth returns the max bitwidth that would be necessary for encoding the index values currently +// in the dictionary based on the size of the dictionary index. +func (d *dictEncoder) BitWidth() int { + switch d.NumEntries() { + case 0: + return 0 + case 1: + return 1 + default: + return bits.Len32(uint32(d.NumEntries() - 1)) + } +} + +// WriteDict writes the dictionary index to the given byte slice. +func (d *dictEncoder) WriteDict(out []byte) { + d.memo.CopyValues(out) +} + +// WriteIndices performs Run Length encoding on the indexes and the writes the encoded +// index value data to the provided byte slice, returning the number of bytes actually written. +func (d *dictEncoder) WriteIndices(out []byte) int { + out[0] = byte(d.BitWidth()) + + enc := utils.NewRleEncoder(utils.NewWriterAtBuffer(out[1:]), d.BitWidth()) + for _, idx := range d.idxValues { + if !enc.Put(uint64(idx)) { + return -1 + } + } + nbytes := enc.Flush() + + d.idxValues = d.idxValues[:0] + return nbytes + 1 +} + +// Put adds a value to the dictionary data column, inserting the value if it +// didn't already exist in the dictionary. +func (d *dictEncoder) Put(v interface{}) { + memoIdx, found, err := d.memo.GetOrInsert(v) + if err != nil { + panic(err) + } + if !found { + d.dictEncodedSize += int(reflect.TypeOf(v).Size()) + } + d.addIndex(memoIdx) +} + +// DictEncodedSize returns the current size of the encoded dictionary +func (d *dictEncoder) DictEncodedSize() int { + return d.dictEncodedSize +} + +// spacedCompress is a helper function for encoders to remove the slots in the slices passed in according +// to the bitmap which are null into an output slice that is no longer spaced out with slots for nulls. +func spacedCompress(src, out interface{}, validBits []byte, validBitsOffset int64) int { + nvalid := 0 + + // for efficiency we use a type switch because the copy runs significantly faster when typed + // than calling reflect.Copy + switch s := src.(type) { + case []int32: + o := out.([]int32) + reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s))) + for { + run := reader.NextRun() + if run.Length == 0 { + break + } + copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)]) + nvalid += int(run.Length) + } + case []int64: + o := out.([]int64) + reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s))) + for { + run := reader.NextRun() + if run.Length == 0 { + break + } + copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)]) + nvalid += int(run.Length) + } + case []float32: + o := out.([]float32) + reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s))) + for { + run := reader.NextRun() + if run.Length == 0 { + break + } + copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)]) + nvalid += int(run.Length) + } + case []float64: + o := out.([]float64) + reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s))) + for { + run := reader.NextRun() + if run.Length == 0 { + break + } + copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)]) + nvalid += int(run.Length) + } + case []parquet.ByteArray: + o := out.([]parquet.ByteArray) + reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s))) + for { + run := reader.NextRun() + if run.Length == 0 { + break + } + copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)]) + nvalid += int(run.Length) + } + case []parquet.FixedLenByteArray: + o := out.([]parquet.FixedLenByteArray) + reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s))) + for { + run := reader.NextRun() + if run.Length == 0 { + break + } + copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)]) + nvalid += int(run.Length) + } + case []bool: + o := out.([]bool) + reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s))) + for { + run := reader.NextRun() + if run.Length == 0 { + break + } + copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)]) + nvalid += int(run.Length) + } + } + + return nvalid +} diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go new file mode 100644 index 00000000000..a23489290c8 --- /dev/null +++ b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "math" + + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/utils" + "golang.org/x/xerrors" +) + +// PlainFixedLenByteArrayDecoder is a plain encoding decoder for Fixed Length Byte Arrays +type PlainFixedLenByteArrayDecoder struct { + decoder +} + +// Type returns the physical type this decoder operates on, FixedLength Byte Arrays +func (PlainFixedLenByteArrayDecoder) Type() parquet.Type { + return parquet.Types.FixedLenByteArray +} + +// Decode populates out with fixed length byte array values until either there are no more +// values to decode or the length of out has been filled. Then returns the total number of values +// that were decoded. +func (pflba *PlainFixedLenByteArrayDecoder) Decode(out []parquet.FixedLenByteArray) (int, error) { + max := utils.MinInt(len(out), pflba.nvals) + numBytesNeeded := max * pflba.typeLen + if numBytesNeeded > len(pflba.data) || numBytesNeeded > math.MaxInt32 { + return 0, xerrors.New("parquet: eof exception") + } + + for idx := range out[:max] { + out[idx] = pflba.data[:pflba.typeLen] + pflba.data = pflba.data[pflba.typeLen:] + } + return max, nil +} + +// DecodeSpaced does the same as Decode but spaces out the resulting slice according to the bitmap leaving space for null values +func (pflba *PlainFixedLenByteArrayDecoder) DecodeSpaced(out []parquet.FixedLenByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + toRead := len(out) - nullCount + valuesRead, err := pflba.Decode(out[:toRead]) + if err != nil { + return valuesRead, err + } + if valuesRead != toRead { + return valuesRead, xerrors.New("parquet: number of values / definitions levels read did not match") + } + + return spacedExpand(out, nullCount, validBits, validBitsOffset), nil +} diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go new file mode 100644 index 00000000000..7eda0d38b0b --- /dev/null +++ b/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go @@ -0,0 +1,104 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/utils" +) + +// PlainFixedLenByteArrayEncoder writes the raw bytes of the byte array +// always writing typeLength bytes for each value. +type PlainFixedLenByteArrayEncoder struct { + encoder + + bitSetReader utils.SetBitRunReader +} + +// Put writes the provided values to the encoder +func (enc *PlainFixedLenByteArrayEncoder) Put(in []parquet.FixedLenByteArray) { + typeLen := enc.descr.TypeLength() + if typeLen == 0 { + return + } + + bytesNeeded := len(in) * typeLen + enc.sink.Reserve(bytesNeeded) + for _, val := range in { + if val == nil { + panic("value cannot be nil") + } + enc.sink.UnsafeWrite(val[:typeLen]) + } +} + +// PutSpaced is like Put but works with data that is spaced out according to the passed in bitmap +func (enc *PlainFixedLenByteArrayEncoder) PutSpaced(in []parquet.FixedLenByteArray, validBits []byte, validBitsOffset int64) { + if validBits != nil { + if enc.bitSetReader == nil { + enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in))) + } else { + enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in))) + } + + for { + run := enc.bitSetReader.NextRun() + if run.Length == 0 { + break + } + enc.Put(in[int(run.Pos):int(run.Pos+run.Length)]) + } + } else { + enc.Put(in) + } +} + +// Type returns the underlying physical type this encoder works with, Fixed Length byte arrays. +func (PlainFixedLenByteArrayEncoder) Type() parquet.Type { + return parquet.Types.FixedLenByteArray +} + +// WriteDict overrides the embedded WriteDict function to call a specialized function +// for copying out the Fixed length values from the dictionary more efficiently. +func (enc *DictFixedLenByteArrayEncoder) WriteDict(out []byte) { + enc.memo.(BinaryMemoTable).CopyFixedWidthValues(0, enc.typeLen, out) +} + +// Put writes fixed length values to a dictionary encoded column +func (enc *DictFixedLenByteArrayEncoder) Put(in []parquet.FixedLenByteArray) { + for _, v := range in { + if v == nil { + v = empty[:] + } + memoIdx, found, err := enc.memo.GetOrInsert(v) + if err != nil { + panic(err) + } + if !found { + enc.dictEncodedSize += enc.typeLen + } + enc.addIndex(memoIdx) + } +} + +// PutSpaced is like Put but leaves space for nulls +func (enc *DictFixedLenByteArrayEncoder) PutSpaced(in []parquet.FixedLenByteArray, validBits []byte, validBitsOffset int64) { + utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error { + enc.Put(in[pos : pos+length]) + return nil + }) +} diff --git a/go/parquet/internal/encoding/physical_types.tmpldata b/go/parquet/internal/encoding/physical_types.tmpldata new file mode 100644 index 00000000000..0adeb9955bf --- /dev/null +++ b/go/parquet/internal/encoding/physical_types.tmpldata @@ -0,0 +1,52 @@ +[ + { + "Name": "Int32", + "name": "int32", + "lower": "int32", + "prefix": "arrow" + }, + { + "Name": "Int64", + "name": "int64", + "lower": "int64", + "prefix": "arrow" + }, + { + "Name": "Int96", + "name": "parquet.Int96", + "lower": "int96", + "prefix": "parquet" + }, + { + "Name": "Float32", + "name": "float32", + "lower": "float32", + "prefix": "arrow", + "physical": "Float" + }, + { + "Name": "Float64", + "name": "float64", + "lower": "float64", + "prefix": "arrow", + "physical": "Double" + }, + { + "Name": "Boolean", + "name": "bool", + "lower": "bool", + "prefix": "arrow" + }, + { + "Name": "ByteArray", + "name": "parquet.ByteArray", + "lower": "byteArray", + "prefix": "parquet" + }, + { + "Name": "FixedLenByteArray", + "name": "parquet.FixedLenByteArray", + "lower": "fixedLenByteArray", + "prefix": "parquet" + } +] diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go b/go/parquet/internal/encoding/plain_encoder_types.gen.go new file mode 100644 index 00000000000..a3826339dfa --- /dev/null +++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go @@ -0,0 +1,639 @@ +// Code generated by plain_encoder_types.gen.go.tmpl. DO NOT EDIT. + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "bytes" + "encoding/binary" + "math" + + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/arrow/endian" + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/utils" + "golang.org/x/xerrors" +) + +var ( + writeInt32LE func(*encoder, []int32) + copyFromInt32LE func(dst []int32, src []byte) + writeInt64LE func(*encoder, []int64) + copyFromInt64LE func(dst []int64, src []byte) + writeInt96LE func(*encoder, []parquet.Int96) + copyFromInt96LE func(dst []parquet.Int96, src []byte) + writeFloat32LE func(*encoder, []float32) + copyFromFloat32LE func(dst []float32, src []byte) + writeFloat64LE func(*encoder, []float64) + copyFromFloat64LE func(dst []float64, src []byte) +) + +func init() { + // int96 is already internally represented as little endian data + // no need to have special behavior on big endian architectures + // for read/write, consumers will need to be aware of the fact + // that it is internally 12 bytes little endian when attempting + // to utilize it. + writeInt96LE = func(e *encoder, in []parquet.Int96) { + e.append(parquet.Int96Traits.CastToBytes(in)) + } + copyFromInt96LE = func(dst []parquet.Int96, src []byte) { + copy(parquet.Int96Traits.CastToBytes(dst), src) + } + + if endian.IsBigEndian { + writeInt32LE = func(e *encoder, in []int32) { + binary.Write(e.sink, binary.LittleEndian, in) + } + copyFromInt32LE = func(dst []int32, src []byte) { + r := bytes.NewReader(src) + binary.Read(r, binary.LittleEndian, &dst) + } + writeInt64LE = func(e *encoder, in []int64) { + binary.Write(e.sink, binary.LittleEndian, in) + } + copyFromInt64LE = func(dst []int64, src []byte) { + r := bytes.NewReader(src) + binary.Read(r, binary.LittleEndian, &dst) + } + writeFloat32LE = func(e *encoder, in []float32) { + binary.Write(e.sink, binary.LittleEndian, in) + } + copyFromFloat32LE = func(dst []float32, src []byte) { + r := bytes.NewReader(src) + binary.Read(r, binary.LittleEndian, &dst) + } + writeFloat64LE = func(e *encoder, in []float64) { + binary.Write(e.sink, binary.LittleEndian, in) + } + copyFromFloat64LE = func(dst []float64, src []byte) { + r := bytes.NewReader(src) + binary.Read(r, binary.LittleEndian, &dst) + } + } else { + writeInt32LE = func(e *encoder, in []int32) { + e.append(arrow.Int32Traits.CastToBytes(in)) + } + copyFromInt32LE = func(dst []int32, src []byte) { + copy(arrow.Int32Traits.CastToBytes(dst), src) + } + writeInt64LE = func(e *encoder, in []int64) { + e.append(arrow.Int64Traits.CastToBytes(in)) + } + copyFromInt64LE = func(dst []int64, src []byte) { + copy(arrow.Int64Traits.CastToBytes(dst), src) + } + writeFloat32LE = func(e *encoder, in []float32) { + e.append(arrow.Float32Traits.CastToBytes(in)) + } + copyFromFloat32LE = func(dst []float32, src []byte) { + copy(arrow.Float32Traits.CastToBytes(dst), src) + } + writeFloat64LE = func(e *encoder, in []float64) { + e.append(arrow.Float64Traits.CastToBytes(in)) + } + copyFromFloat64LE = func(dst []float64, src []byte) { + copy(arrow.Float64Traits.CastToBytes(dst), src) + } + } +} + +// PlainInt32Encoder is an encoder for int32 values using Plain Encoding +// which in general is just storing the values as raw bytes of the appropriate size +type PlainInt32Encoder struct { + encoder + + bitSetReader utils.SetBitRunReader +} + +// Put encodes a slice of values into the underlying buffer +func (enc *PlainInt32Encoder) Put(in []int32) { + writeInt32LE(&enc.encoder, in) +} + +// PutSpaced encodes a slice of values into the underlying buffer which are spaced out +// including null values defined by the validBits bitmap starting at a given bit offset. +// the values are first compressed by having the null slots removed before writing to the buffer +func (enc *PlainInt32Encoder) PutSpaced(in []int32, validBits []byte, validBitsOffset int64) { + nbytes := arrow.Int32Traits.BytesRequired(len(in)) + enc.ReserveForWrite(nbytes) + + if enc.bitSetReader == nil { + enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in))) + } else { + enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in))) + } + + for { + run := enc.bitSetReader.NextRun() + if run.Length == 0 { + break + } + enc.Put(in[int(run.Pos):int(run.Pos+run.Length)]) + } +} + +// Type returns the underlying physical type this encoder is able to encode +func (PlainInt32Encoder) Type() parquet.Type { + return parquet.Types.Int32 +} + +// PlainInt32Decoder is a decoder specifically for decoding Plain Encoding data +// of int32 type. +type PlainInt32Decoder struct { + decoder + + bitSetReader utils.SetBitRunReader +} + +// Type returns the physical type this decoder is able to decode for +func (PlainInt32Decoder) Type() parquet.Type { + return parquet.Types.Int32 +} + +// Decode populates the given slice with values from the data to be decoded, +// decoding the min(len(out), remaining values). +// It returns the number of values actually decoded and any error encountered. +func (dec *PlainInt32Decoder) Decode(out []int32) (int, error) { + max := utils.MinInt(len(out), dec.nvals) + nbytes := int64(max) * int64(arrow.Int32SizeBytes) + if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 { + return 0, xerrors.Errorf("parquet: eof exception decode plain Int32, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data)) + } + + copyFromInt32LE(out, dec.data[:nbytes]) + dec.data = dec.data[nbytes:] + dec.nvals -= max + return max, nil +} + +// DecodeSpaced is the same as decode, except it expands the data out to leave spaces for null values +// as defined by the bitmap provided. +func (dec *PlainInt32Decoder) DecodeSpaced(out []int32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + toread := len(out) - nullCount + values, err := dec.Decode(out[:toread]) + if err != nil { + return 0, err + } + if values != toread { + return 0, xerrors.New("parquet: number of values / definition levels read did not match") + } + + nvalues := len(out) + if nullCount == 0 { + return nvalues, nil + } + + idxDecode := nvalues - nullCount + if dec.bitSetReader == nil { + dec.bitSetReader = utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(nvalues)) + } else { + dec.bitSetReader.Reset(validBits, validBitsOffset, int64(nvalues)) + } + + for { + run := dec.bitSetReader.NextRun() + if run.Length == 0 { + break + } + + idxDecode -= int(run.Length) + copy(out[int(run.Pos):], out[idxDecode:idxDecode+int(run.Length)]) + } + return nvalues, nil +} + +// PlainInt64Encoder is an encoder for int64 values using Plain Encoding +// which in general is just storing the values as raw bytes of the appropriate size +type PlainInt64Encoder struct { + encoder + + bitSetReader utils.SetBitRunReader +} + +// Put encodes a slice of values into the underlying buffer +func (enc *PlainInt64Encoder) Put(in []int64) { + writeInt64LE(&enc.encoder, in) +} + +// PutSpaced encodes a slice of values into the underlying buffer which are spaced out +// including null values defined by the validBits bitmap starting at a given bit offset. +// the values are first compressed by having the null slots removed before writing to the buffer +func (enc *PlainInt64Encoder) PutSpaced(in []int64, validBits []byte, validBitsOffset int64) { + nbytes := arrow.Int64Traits.BytesRequired(len(in)) + enc.ReserveForWrite(nbytes) + + if enc.bitSetReader == nil { + enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in))) + } else { + enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in))) + } + + for { + run := enc.bitSetReader.NextRun() + if run.Length == 0 { + break + } + enc.Put(in[int(run.Pos):int(run.Pos+run.Length)]) + } +} + +// Type returns the underlying physical type this encoder is able to encode +func (PlainInt64Encoder) Type() parquet.Type { + return parquet.Types.Int64 +} + +// PlainInt64Decoder is a decoder specifically for decoding Plain Encoding data +// of int64 type. +type PlainInt64Decoder struct { + decoder + + bitSetReader utils.SetBitRunReader +} + +// Type returns the physical type this decoder is able to decode for +func (PlainInt64Decoder) Type() parquet.Type { + return parquet.Types.Int64 +} + +// Decode populates the given slice with values from the data to be decoded, +// decoding the min(len(out), remaining values). +// It returns the number of values actually decoded and any error encountered. +func (dec *PlainInt64Decoder) Decode(out []int64) (int, error) { + max := utils.MinInt(len(out), dec.nvals) + nbytes := int64(max) * int64(arrow.Int64SizeBytes) + if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 { + return 0, xerrors.Errorf("parquet: eof exception decode plain Int64, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data)) + } + + copyFromInt64LE(out, dec.data[:nbytes]) + dec.data = dec.data[nbytes:] + dec.nvals -= max + return max, nil +} + +// DecodeSpaced is the same as decode, except it expands the data out to leave spaces for null values +// as defined by the bitmap provided. +func (dec *PlainInt64Decoder) DecodeSpaced(out []int64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + toread := len(out) - nullCount + values, err := dec.Decode(out[:toread]) + if err != nil { + return 0, err + } + if values != toread { + return 0, xerrors.New("parquet: number of values / definition levels read did not match") + } + + nvalues := len(out) + if nullCount == 0 { + return nvalues, nil + } + + idxDecode := nvalues - nullCount + if dec.bitSetReader == nil { + dec.bitSetReader = utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(nvalues)) + } else { + dec.bitSetReader.Reset(validBits, validBitsOffset, int64(nvalues)) + } + + for { + run := dec.bitSetReader.NextRun() + if run.Length == 0 { + break + } + + idxDecode -= int(run.Length) + copy(out[int(run.Pos):], out[idxDecode:idxDecode+int(run.Length)]) + } + return nvalues, nil +} + +// PlainInt96Encoder is an encoder for parquet.Int96 values using Plain Encoding +// which in general is just storing the values as raw bytes of the appropriate size +type PlainInt96Encoder struct { + encoder + + bitSetReader utils.SetBitRunReader +} + +// Put encodes a slice of values into the underlying buffer +func (enc *PlainInt96Encoder) Put(in []parquet.Int96) { + writeInt96LE(&enc.encoder, in) +} + +// PutSpaced encodes a slice of values into the underlying buffer which are spaced out +// including null values defined by the validBits bitmap starting at a given bit offset. +// the values are first compressed by having the null slots removed before writing to the buffer +func (enc *PlainInt96Encoder) PutSpaced(in []parquet.Int96, validBits []byte, validBitsOffset int64) { + nbytes := parquet.Int96Traits.BytesRequired(len(in)) + enc.ReserveForWrite(nbytes) + + if enc.bitSetReader == nil { + enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in))) + } else { + enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in))) + } + + for { + run := enc.bitSetReader.NextRun() + if run.Length == 0 { + break + } + enc.Put(in[int(run.Pos):int(run.Pos+run.Length)]) + } +} + +// Type returns the underlying physical type this encoder is able to encode +func (PlainInt96Encoder) Type() parquet.Type { + return parquet.Types.Int96 +} + +// PlainInt96Decoder is a decoder specifically for decoding Plain Encoding data +// of parquet.Int96 type. +type PlainInt96Decoder struct { + decoder + + bitSetReader utils.SetBitRunReader +} + +// Type returns the physical type this decoder is able to decode for +func (PlainInt96Decoder) Type() parquet.Type { + return parquet.Types.Int96 +} + +// Decode populates the given slice with values from the data to be decoded, +// decoding the min(len(out), remaining values). +// It returns the number of values actually decoded and any error encountered. +func (dec *PlainInt96Decoder) Decode(out []parquet.Int96) (int, error) { + max := utils.MinInt(len(out), dec.nvals) + nbytes := int64(max) * int64(parquet.Int96SizeBytes) + if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 { + return 0, xerrors.Errorf("parquet: eof exception decode plain Int96, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data)) + } + + copyFromInt96LE(out, dec.data[:nbytes]) + dec.data = dec.data[nbytes:] + dec.nvals -= max + return max, nil +} + +// DecodeSpaced is the same as decode, except it expands the data out to leave spaces for null values +// as defined by the bitmap provided. +func (dec *PlainInt96Decoder) DecodeSpaced(out []parquet.Int96, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + toread := len(out) - nullCount + values, err := dec.Decode(out[:toread]) + if err != nil { + return 0, err + } + if values != toread { + return 0, xerrors.New("parquet: number of values / definition levels read did not match") + } + + nvalues := len(out) + if nullCount == 0 { + return nvalues, nil + } + + idxDecode := nvalues - nullCount + if dec.bitSetReader == nil { + dec.bitSetReader = utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(nvalues)) + } else { + dec.bitSetReader.Reset(validBits, validBitsOffset, int64(nvalues)) + } + + for { + run := dec.bitSetReader.NextRun() + if run.Length == 0 { + break + } + + idxDecode -= int(run.Length) + copy(out[int(run.Pos):], out[idxDecode:idxDecode+int(run.Length)]) + } + return nvalues, nil +} + +// PlainFloat32Encoder is an encoder for float32 values using Plain Encoding +// which in general is just storing the values as raw bytes of the appropriate size +type PlainFloat32Encoder struct { + encoder + + bitSetReader utils.SetBitRunReader +} + +// Put encodes a slice of values into the underlying buffer +func (enc *PlainFloat32Encoder) Put(in []float32) { + writeFloat32LE(&enc.encoder, in) +} + +// PutSpaced encodes a slice of values into the underlying buffer which are spaced out +// including null values defined by the validBits bitmap starting at a given bit offset. +// the values are first compressed by having the null slots removed before writing to the buffer +func (enc *PlainFloat32Encoder) PutSpaced(in []float32, validBits []byte, validBitsOffset int64) { + nbytes := arrow.Float32Traits.BytesRequired(len(in)) + enc.ReserveForWrite(nbytes) + + if enc.bitSetReader == nil { + enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in))) + } else { + enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in))) + } + + for { + run := enc.bitSetReader.NextRun() + if run.Length == 0 { + break + } + enc.Put(in[int(run.Pos):int(run.Pos+run.Length)]) + } +} + +// Type returns the underlying physical type this encoder is able to encode +func (PlainFloat32Encoder) Type() parquet.Type { + return parquet.Types.Float +} + +// PlainFloat32Decoder is a decoder specifically for decoding Plain Encoding data +// of float32 type. +type PlainFloat32Decoder struct { + decoder + + bitSetReader utils.SetBitRunReader +} + +// Type returns the physical type this decoder is able to decode for +func (PlainFloat32Decoder) Type() parquet.Type { + return parquet.Types.Float +} + +// Decode populates the given slice with values from the data to be decoded, +// decoding the min(len(out), remaining values). +// It returns the number of values actually decoded and any error encountered. +func (dec *PlainFloat32Decoder) Decode(out []float32) (int, error) { + max := utils.MinInt(len(out), dec.nvals) + nbytes := int64(max) * int64(arrow.Float32SizeBytes) + if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 { + return 0, xerrors.Errorf("parquet: eof exception decode plain Float32, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data)) + } + + copyFromFloat32LE(out, dec.data[:nbytes]) + dec.data = dec.data[nbytes:] + dec.nvals -= max + return max, nil +} + +// DecodeSpaced is the same as decode, except it expands the data out to leave spaces for null values +// as defined by the bitmap provided. +func (dec *PlainFloat32Decoder) DecodeSpaced(out []float32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + toread := len(out) - nullCount + values, err := dec.Decode(out[:toread]) + if err != nil { + return 0, err + } + if values != toread { + return 0, xerrors.New("parquet: number of values / definition levels read did not match") + } + + nvalues := len(out) + if nullCount == 0 { + return nvalues, nil + } + + idxDecode := nvalues - nullCount + if dec.bitSetReader == nil { + dec.bitSetReader = utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(nvalues)) + } else { + dec.bitSetReader.Reset(validBits, validBitsOffset, int64(nvalues)) + } + + for { + run := dec.bitSetReader.NextRun() + if run.Length == 0 { + break + } + + idxDecode -= int(run.Length) + copy(out[int(run.Pos):], out[idxDecode:idxDecode+int(run.Length)]) + } + return nvalues, nil +} + +// PlainFloat64Encoder is an encoder for float64 values using Plain Encoding +// which in general is just storing the values as raw bytes of the appropriate size +type PlainFloat64Encoder struct { + encoder + + bitSetReader utils.SetBitRunReader +} + +// Put encodes a slice of values into the underlying buffer +func (enc *PlainFloat64Encoder) Put(in []float64) { + writeFloat64LE(&enc.encoder, in) +} + +// PutSpaced encodes a slice of values into the underlying buffer which are spaced out +// including null values defined by the validBits bitmap starting at a given bit offset. +// the values are first compressed by having the null slots removed before writing to the buffer +func (enc *PlainFloat64Encoder) PutSpaced(in []float64, validBits []byte, validBitsOffset int64) { + nbytes := arrow.Float64Traits.BytesRequired(len(in)) + enc.ReserveForWrite(nbytes) + + if enc.bitSetReader == nil { + enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in))) + } else { + enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in))) + } + + for { + run := enc.bitSetReader.NextRun() + if run.Length == 0 { + break + } + enc.Put(in[int(run.Pos):int(run.Pos+run.Length)]) + } +} + +// Type returns the underlying physical type this encoder is able to encode +func (PlainFloat64Encoder) Type() parquet.Type { + return parquet.Types.Double +} + +// PlainFloat64Decoder is a decoder specifically for decoding Plain Encoding data +// of float64 type. +type PlainFloat64Decoder struct { + decoder + + bitSetReader utils.SetBitRunReader +} + +// Type returns the physical type this decoder is able to decode for +func (PlainFloat64Decoder) Type() parquet.Type { + return parquet.Types.Double +} + +// Decode populates the given slice with values from the data to be decoded, +// decoding the min(len(out), remaining values). +// It returns the number of values actually decoded and any error encountered. +func (dec *PlainFloat64Decoder) Decode(out []float64) (int, error) { + max := utils.MinInt(len(out), dec.nvals) + nbytes := int64(max) * int64(arrow.Float64SizeBytes) + if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 { + return 0, xerrors.Errorf("parquet: eof exception decode plain Float64, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data)) + } + + copyFromFloat64LE(out, dec.data[:nbytes]) + dec.data = dec.data[nbytes:] + dec.nvals -= max + return max, nil +} + +// DecodeSpaced is the same as decode, except it expands the data out to leave spaces for null values +// as defined by the bitmap provided. +func (dec *PlainFloat64Decoder) DecodeSpaced(out []float64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + toread := len(out) - nullCount + values, err := dec.Decode(out[:toread]) + if err != nil { + return 0, err + } + if values != toread { + return 0, xerrors.New("parquet: number of values / definition levels read did not match") + } + + nvalues := len(out) + if nullCount == 0 { + return nvalues, nil + } + + idxDecode := nvalues - nullCount + if dec.bitSetReader == nil { + dec.bitSetReader = utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(nvalues)) + } else { + dec.bitSetReader.Reset(validBits, validBitsOffset, int64(nvalues)) + } + + for { + run := dec.bitSetReader.NextRun() + if run.Length == 0 { + break + } + + idxDecode -= int(run.Length) + copy(out[int(run.Pos):], out[idxDecode:idxDecode+int(run.Length)]) + } + return nvalues, nil +} diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl new file mode 100644 index 00000000000..1b72497444c --- /dev/null +++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl @@ -0,0 +1,182 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "encoding/binary" + + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/utils" +) + +var ( +{{range .In}} +{{if and (ne .Name "Boolean") (ne .Name "ByteArray") (ne .Name "FixedLenByteArray") -}} + write{{.Name}}LE func(*encoder, []{{.name}}) + copyFrom{{.Name}}LE func(dst []{{.name}}, src []byte) +{{- end}} +{{- end}} +) + +func init() { + // int96 is already internally represented as little endian data + // no need to have special behavior on big endian architectures + // for read/write, consumers will need to be aware of the fact + // that it is internally 12 bytes little endian when attempting + // to utilize it. + writeInt96LE = func(e *encoder, in []parquet.Int96) { + e.append(parquet.Int96Traits.CastToBytes(in)) + } + copyFromInt96LE = func(dst []parquet.Int96, src []byte) { + copy(parquet.Int96Traits.CastToBytes(dst), src) + } + + if endian.IsBigEndian { +{{- range .In}} +{{- if and (ne .Name "Boolean") (ne .Name "ByteArray") (ne .Name "FixedLenByteArray") (ne .Name "Int96")}} + write{{.Name}}LE = func(e *encoder, in []{{.name}}) { + binary.Write(e.sink, binary.LittleEndian, in) + } + copyFrom{{.Name}}LE = func(dst []{{.name}}, src []byte) { + r := bytes.NewReader(src) + binary.Read(r, binary.LittleEndian, &dst) + } +{{- end -}} +{{- end}} + } else { +{{- range .In}} +{{- if and (ne .Name "Boolean") (ne .Name "ByteArray") (ne .Name "FixedLenByteArray") (ne .Name "Int96")}} + write{{.Name}}LE = func(e *encoder, in []{{.name}}) { + e.append({{.prefix}}.{{.Name}}Traits.CastToBytes(in)) + } + copyFrom{{.Name}}LE = func(dst []{{.name}}, src []byte) { + copy({{.prefix}}.{{.Name}}Traits.CastToBytes(dst), src) + } +{{- end -}} +{{- end}} + } +} + +{{range .In}} +{{if and (ne .Name "Boolean") (ne .Name "ByteArray") (ne .Name "FixedLenByteArray")}} +// Plain{{.Name}}Encoder is an encoder for {{.name}} values using Plain Encoding +// which in general is just storing the values as raw bytes of the appropriate size +type Plain{{.Name}}Encoder struct { + encoder + + bitSetReader utils.SetBitRunReader +} + +// Put encodes a slice of values into the underlying buffer +func (enc *Plain{{.Name}}Encoder) Put(in []{{.name}}) { + write{{.Name}}LE(&enc.encoder, in) +} + +// PutSpaced encodes a slice of values into the underlying buffer which are spaced out +// including null values defined by the validBits bitmap starting at a given bit offset. +// the values are first compressed by having the null slots removed before writing to the buffer +func (enc *Plain{{.Name}}Encoder) PutSpaced(in []{{.name}}, validBits []byte, validBitsOffset int64) { + nbytes := {{.prefix}}.{{.Name}}Traits.BytesRequired(len(in)) + enc.ReserveForWrite(nbytes) + + if enc.bitSetReader == nil { + enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in))) + } else { + enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in))) + } + + for { + run := enc.bitSetReader.NextRun() + if run.Length == 0 { + break + } + enc.Put(in[int(run.Pos):int(run.Pos+run.Length)]) + } +} + +// Type returns the underlying physical type this encoder is able to encode +func (Plain{{.Name}}Encoder) Type() parquet.Type { + return parquet.Types.{{if .physical}}{{.physical}}{{else}}{{.Name}}{{end}} +} + +// Plain{{.Name}}Decoder is a decoder specifically for decoding Plain Encoding data +// of {{.name}} type. +type Plain{{.Name}}Decoder struct { + decoder + + bitSetReader utils.SetBitRunReader +} + +// Type returns the physical type this decoder is able to decode for +func (Plain{{.Name}}Decoder) Type() parquet.Type { + return parquet.Types.{{if .physical}}{{.physical}}{{else}}{{.Name}}{{end}} +} + +// Decode populates the given slice with values from the data to be decoded, +// decoding the min(len(out), remaining values). +// It returns the number of values actually decoded and any error encountered. +func (dec *Plain{{.Name}}Decoder) Decode(out []{{.name}}) (int, error) { + max := utils.MinInt(len(out), dec.nvals) + nbytes := int64(max) * int64({{.prefix}}.{{.Name}}SizeBytes) + if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 { + return 0, xerrors.Errorf("parquet: eof exception decode plain {{.Name}}, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data)) + } + + copyFrom{{.Name}}LE(out, dec.data[:nbytes]) + dec.data = dec.data[nbytes:] + dec.nvals -= max + return max, nil +} + +// DecodeSpaced is the same as decode, except it expands the data out to leave spaces for null values +// as defined by the bitmap provided. +func (dec *Plain{{.Name}}Decoder) DecodeSpaced(out []{{.name}}, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + toread := len(out) - nullCount + values, err := dec.Decode(out[:toread]) + if err != nil { + return 0, err + } + if values != toread { + return 0, xerrors.New("parquet: number of values / definition levels read did not match") + } + + nvalues := len(out) + if nullCount == 0 { + return nvalues, nil + } + + idxDecode := nvalues - nullCount + if dec.bitSetReader == nil { + dec.bitSetReader = utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(nvalues)) + } else { + dec.bitSetReader.Reset(validBits, validBitsOffset, int64(nvalues)) + } + + for { + run := dec.bitSetReader.NextRun() + if run.Length == 0 { + break + } + + idxDecode -= int(run.Length) + copy(out[int(run.Pos):], out[idxDecode:idxDecode+int(run.Length)]) + } + return nvalues, nil +} +{{end}} +{{end}} diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go b/go/parquet/internal/encoding/typed_encoder.gen.go new file mode 100644 index 00000000000..abcfd95142e --- /dev/null +++ b/go/parquet/internal/encoding/typed_encoder.gen.go @@ -0,0 +1,1443 @@ +// Code generated by typed_encoder.gen.go.tmpl. DO NOT EDIT. + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "unsafe" + + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/arrow/memory" + "github.com/apache/arrow/go/parquet" + format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/parquet/internal/utils" + "github.com/apache/arrow/go/parquet/schema" + "golang.org/x/xerrors" +) + +// fully typed encoder interfaces to enable writing against encoder/decoders +// without having to care about what encoding type is actually being used. + +var ( + Int32EncoderTraits int32EncoderTraits + Int32DecoderTraits int32DecoderTraits + Int64EncoderTraits int64EncoderTraits + Int64DecoderTraits int64DecoderTraits + Int96EncoderTraits int96EncoderTraits + Int96DecoderTraits int96DecoderTraits + Float32EncoderTraits float32EncoderTraits + Float32DecoderTraits float32DecoderTraits + Float64EncoderTraits float64EncoderTraits + Float64DecoderTraits float64DecoderTraits + BooleanEncoderTraits boolEncoderTraits + BooleanDecoderTraits boolDecoderTraits + ByteArrayEncoderTraits byteArrayEncoderTraits + ByteArrayDecoderTraits byteArrayDecoderTraits + FixedLenByteArrayEncoderTraits fixedLenByteArrayEncoderTraits + FixedLenByteArrayDecoderTraits fixedLenByteArrayDecoderTraits +) + +// Int32Encoder is the interface for all encoding types that implement encoding +// int32 values. +type Int32Encoder interface { + TypedEncoder + Put([]int32) + PutSpaced([]int32, []byte, int64) +} + +// Int32Decoder is the interface for all encoding types that implement decoding +// int32 values. +type Int32Decoder interface { + TypedDecoder + Decode([]int32) (int, error) + DecodeSpaced([]int32, int, []byte, int64) (int, error) +} + +// the int32EncoderTraits struct is used to make it easy to create encoders and decoders based on type +type int32EncoderTraits struct{} + +// Encoder returns an encoder for int32 type data, using the specified encoding type and whether or not +// it should be dictionary encoded. +func (int32EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder { + + switch e { + case format.Encoding_PLAIN: + return &PlainInt32Encoder{encoder: newEncoderBase(e, descr, mem)} + case format.Encoding_DELTA_BINARY_PACKED: + return DeltaBitPackInt32Encoder{&deltaBitPackEncoder{ + encoder: newEncoderBase(e, descr, mem)}} + default: + panic("unimplemented encoding type") + } +} + +// int32DecoderTraits is a helper struct for providing information regardless of the type +// and used as a generic way to create a Decoder or Dictionary Decoder for int32 values +type int32DecoderTraits struct{} + +// BytesRequired returns the number of bytes required to store n int32 values. +func (int32DecoderTraits) BytesRequired(n int) int { + return arrow.Int32Traits.BytesRequired(n) +} + +// Decoder returns a decoder for int32 typed data of the requested encoding type if available +func (int32DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder { + if useDict { + return &DictInt32Decoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}} + } + + switch e { + case parquet.Encodings.Plain: + return &PlainInt32Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} + case parquet.Encodings.DeltaBinaryPacked: + if mem == nil { + mem = memory.DefaultAllocator + } + return &DeltaBitPackInt32Decoder{ + deltaBitPackDecoder: &deltaBitPackDecoder{ + decoder: newDecoderBase(format.Encoding(e), descr), + mem: mem, + }} + default: + panic("unimplemented encoding type") + } +} + +// DictInt32Encoder is an encoder for int32 data using dictionary encoding +type DictInt32Encoder struct { + dictEncoder +} + +// Type returns the underlying physical type that can be encoded with this encoder +func (enc *DictInt32Encoder) Type() parquet.Type { + return parquet.Types.Int32 +} + +// WriteDict populates the byte slice with the dictionary index +func (enc *DictInt32Encoder) WriteDict(out []byte) { + enc.memo.CopyValues(arrow.Int32Traits.CastFromBytes(out)) +} + +// Put encodes the values passed in, adding to the index as needed. +func (enc *DictInt32Encoder) Put(in []int32) { + for _, val := range in { + enc.dictEncoder.Put(val) + } +} + +// PutSpaced is the same as Put but for when the data being encoded has slots open for +// null values, using the bitmap provided to skip values as needed. +func (enc *DictInt32Encoder) PutSpaced(in []int32, validBits []byte, validBitsOffset int64) { + utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error { + for i := int64(0); i < length; i++ { + enc.dictEncoder.Put(in[i+pos]) + } + return nil + }) +} + +// DictInt32Decoder is a decoder for decoding dictionary encoded data for int32 columns +type DictInt32Decoder struct { + dictDecoder +} + +// Type returns the underlying physical type that can be decoded with this decoder +func (DictInt32Decoder) Type() parquet.Type { + return parquet.Types.Int32 +} + +// Decode populates the passed in slice with min(len(out), remaining values) values, +// decoding using hte dictionary to get the actual values. Returns the number of values +// actually decoded and any error encountered. +func (d *DictInt32Decoder) Decode(out []int32) (int, error) { + vals := utils.MinInt(len(out), d.nvals) + decoded, err := d.decode(out[:vals]) + if err != nil { + return decoded, err + } + if vals != decoded { + return decoded, xerrors.New("parquet: dict eof exception") + } + d.nvals -= vals + return vals, nil +} + +// Decode spaced is like Decode but will space out the data leaving slots for null values +// based on the provided bitmap. +func (d *DictInt32Decoder) DecodeSpaced(out []int32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + vals := utils.MinInt(len(out), d.nvals) + decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) + if err != nil { + return decoded, err + } + if vals != decoded { + return decoded, xerrors.New("parquet: dict spaced eof exception") + } + d.nvals -= vals + return vals, nil +} + +// Int32DictConverter is a helper for dictionary handling which is used for converting +// run length encoded indexes into the actual values that are stored in the dictionary index page. +type Int32DictConverter struct { + valueDecoder Int32Decoder + dict []int32 + zeroVal int32 +} + +// ensure validates that we've decoded dictionary values up to the index +// provided so that we don't need to decode the entire dictionary at start. +func (dc *Int32DictConverter) ensure(idx utils.IndexType) error { + if len(dc.dict) <= int(idx) { + if cap(dc.dict) <= int(idx) { + val := make([]int32, int(idx+1)-len(dc.dict)) + n, err := dc.valueDecoder.Decode(val) + if err != nil { + return err + } + dc.dict = append(dc.dict, val[:n]...) + } else { + cur := len(dc.dict) + n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1]) + if err != nil { + return err + } + dc.dict = dc.dict[:cur+n] + } + } + return nil +} + +// IsValid verifies that the set of indexes passed in are all valid indexes +// in the dictionary and if necessary decodes dictionary indexes up to the index +// requested. +func (dc *Int32DictConverter) IsValid(idxes ...utils.IndexType) bool { + min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes))) + dc.ensure(utils.IndexType(max)) + + return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict) +} + +// Fill populates the slice passed in entirely with the value at dictionary index indicated by val +func (dc *Int32DictConverter) Fill(out interface{}, val utils.IndexType) error { + o := out.([]int32) + if err := dc.ensure(val); err != nil { + return err + } + o[0] = dc.dict[val] + for i := 1; i < len(o); i *= 2 { + copy(o[i:], o[:i]) + } + return nil +} + +// FillZero populates the entire slice of out with the zero value for int32 +func (dc *Int32DictConverter) FillZero(out interface{}) { + o := out.([]int32) + o[0] = dc.zeroVal + for i := 1; i < len(o); i *= 2 { + copy(o[i:], o[:i]) + } +} + +// Copy populates the slice provided with the values in the dictionary at the indexes +// in the vals slice. +func (dc *Int32DictConverter) Copy(out interface{}, vals []utils.IndexType) error { + o := out.([]int32) + for idx, val := range vals { + o[idx] = dc.dict[val] + } + return nil +} + +// Int64Encoder is the interface for all encoding types that implement encoding +// int64 values. +type Int64Encoder interface { + TypedEncoder + Put([]int64) + PutSpaced([]int64, []byte, int64) +} + +// Int64Decoder is the interface for all encoding types that implement decoding +// int64 values. +type Int64Decoder interface { + TypedDecoder + Decode([]int64) (int, error) + DecodeSpaced([]int64, int, []byte, int64) (int, error) +} + +// the int64EncoderTraits struct is used to make it easy to create encoders and decoders based on type +type int64EncoderTraits struct{} + +// Encoder returns an encoder for int64 type data, using the specified encoding type and whether or not +// it should be dictionary encoded. +func (int64EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder { + + switch e { + case format.Encoding_PLAIN: + return &PlainInt64Encoder{encoder: newEncoderBase(e, descr, mem)} + case format.Encoding_DELTA_BINARY_PACKED: + return DeltaBitPackInt64Encoder{&deltaBitPackEncoder{ + encoder: newEncoderBase(e, descr, mem)}} + default: + panic("unimplemented encoding type") + } +} + +// int64DecoderTraits is a helper struct for providing information regardless of the type +// and used as a generic way to create a Decoder or Dictionary Decoder for int64 values +type int64DecoderTraits struct{} + +// BytesRequired returns the number of bytes required to store n int64 values. +func (int64DecoderTraits) BytesRequired(n int) int { + return arrow.Int64Traits.BytesRequired(n) +} + +// Decoder returns a decoder for int64 typed data of the requested encoding type if available +func (int64DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder { + if useDict { + return &DictInt64Decoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}} + } + + switch e { + case parquet.Encodings.Plain: + return &PlainInt64Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} + case parquet.Encodings.DeltaBinaryPacked: + if mem == nil { + mem = memory.DefaultAllocator + } + return &DeltaBitPackInt64Decoder{ + deltaBitPackDecoder: &deltaBitPackDecoder{ + decoder: newDecoderBase(format.Encoding(e), descr), + mem: mem, + }} + default: + panic("unimplemented encoding type") + } +} + +// DictInt64Encoder is an encoder for int64 data using dictionary encoding +type DictInt64Encoder struct { + dictEncoder +} + +// Type returns the underlying physical type that can be encoded with this encoder +func (enc *DictInt64Encoder) Type() parquet.Type { + return parquet.Types.Int64 +} + +// WriteDict populates the byte slice with the dictionary index +func (enc *DictInt64Encoder) WriteDict(out []byte) { + enc.memo.CopyValues(arrow.Int64Traits.CastFromBytes(out)) +} + +// Put encodes the values passed in, adding to the index as needed. +func (enc *DictInt64Encoder) Put(in []int64) { + for _, val := range in { + enc.dictEncoder.Put(val) + } +} + +// PutSpaced is the same as Put but for when the data being encoded has slots open for +// null values, using the bitmap provided to skip values as needed. +func (enc *DictInt64Encoder) PutSpaced(in []int64, validBits []byte, validBitsOffset int64) { + utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error { + for i := int64(0); i < length; i++ { + enc.dictEncoder.Put(in[i+pos]) + } + return nil + }) +} + +// DictInt64Decoder is a decoder for decoding dictionary encoded data for int64 columns +type DictInt64Decoder struct { + dictDecoder +} + +// Type returns the underlying physical type that can be decoded with this decoder +func (DictInt64Decoder) Type() parquet.Type { + return parquet.Types.Int64 +} + +// Decode populates the passed in slice with min(len(out), remaining values) values, +// decoding using hte dictionary to get the actual values. Returns the number of values +// actually decoded and any error encountered. +func (d *DictInt64Decoder) Decode(out []int64) (int, error) { + vals := utils.MinInt(len(out), d.nvals) + decoded, err := d.decode(out[:vals]) + if err != nil { + return decoded, err + } + if vals != decoded { + return decoded, xerrors.New("parquet: dict eof exception") + } + d.nvals -= vals + return vals, nil +} + +// Decode spaced is like Decode but will space out the data leaving slots for null values +// based on the provided bitmap. +func (d *DictInt64Decoder) DecodeSpaced(out []int64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + vals := utils.MinInt(len(out), d.nvals) + decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) + if err != nil { + return decoded, err + } + if vals != decoded { + return decoded, xerrors.New("parquet: dict spaced eof exception") + } + d.nvals -= vals + return vals, nil +} + +// Int64DictConverter is a helper for dictionary handling which is used for converting +// run length encoded indexes into the actual values that are stored in the dictionary index page. +type Int64DictConverter struct { + valueDecoder Int64Decoder + dict []int64 + zeroVal int64 +} + +// ensure validates that we've decoded dictionary values up to the index +// provided so that we don't need to decode the entire dictionary at start. +func (dc *Int64DictConverter) ensure(idx utils.IndexType) error { + if len(dc.dict) <= int(idx) { + if cap(dc.dict) <= int(idx) { + val := make([]int64, int(idx+1)-len(dc.dict)) + n, err := dc.valueDecoder.Decode(val) + if err != nil { + return err + } + dc.dict = append(dc.dict, val[:n]...) + } else { + cur := len(dc.dict) + n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1]) + if err != nil { + return err + } + dc.dict = dc.dict[:cur+n] + } + } + return nil +} + +// IsValid verifies that the set of indexes passed in are all valid indexes +// in the dictionary and if necessary decodes dictionary indexes up to the index +// requested. +func (dc *Int64DictConverter) IsValid(idxes ...utils.IndexType) bool { + min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes))) + dc.ensure(utils.IndexType(max)) + + return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict) +} + +// Fill populates the slice passed in entirely with the value at dictionary index indicated by val +func (dc *Int64DictConverter) Fill(out interface{}, val utils.IndexType) error { + o := out.([]int64) + if err := dc.ensure(val); err != nil { + return err + } + o[0] = dc.dict[val] + for i := 1; i < len(o); i *= 2 { + copy(o[i:], o[:i]) + } + return nil +} + +// FillZero populates the entire slice of out with the zero value for int64 +func (dc *Int64DictConverter) FillZero(out interface{}) { + o := out.([]int64) + o[0] = dc.zeroVal + for i := 1; i < len(o); i *= 2 { + copy(o[i:], o[:i]) + } +} + +// Copy populates the slice provided with the values in the dictionary at the indexes +// in the vals slice. +func (dc *Int64DictConverter) Copy(out interface{}, vals []utils.IndexType) error { + o := out.([]int64) + for idx, val := range vals { + o[idx] = dc.dict[val] + } + return nil +} + +// Int96Encoder is the interface for all encoding types that implement encoding +// parquet.Int96 values. +type Int96Encoder interface { + TypedEncoder + Put([]parquet.Int96) + PutSpaced([]parquet.Int96, []byte, int64) +} + +// Int96Decoder is the interface for all encoding types that implement decoding +// parquet.Int96 values. +type Int96Decoder interface { + TypedDecoder + Decode([]parquet.Int96) (int, error) + DecodeSpaced([]parquet.Int96, int, []byte, int64) (int, error) +} + +// the int96EncoderTraits struct is used to make it easy to create encoders and decoders based on type +type int96EncoderTraits struct{} + +// Encoder returns an encoder for int96 type data, using the specified encoding type and whether or not +// it should be dictionary encoded. +// dictionary encoding does not exist for this type and Encoder will panic if useDict is true +func (int96EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder { + + switch e { + case format.Encoding_PLAIN: + return &PlainInt96Encoder{encoder: newEncoderBase(e, descr, mem)} + default: + panic("unimplemented encoding type") + } +} + +// int96DecoderTraits is a helper struct for providing information regardless of the type +// and used as a generic way to create a Decoder or Dictionary Decoder for int96 values +type int96DecoderTraits struct{} + +// BytesRequired returns the number of bytes required to store n int96 values. +func (int96DecoderTraits) BytesRequired(n int) int { + return parquet.Int96Traits.BytesRequired(n) +} + +// Decoder returns a decoder for int96 typed data of the requested encoding type if available +func (int96DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder { + if useDict { + panic("dictionary decoding unimplemented for int96") + } + + switch e { + case parquet.Encodings.Plain: + return &PlainInt96Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} + default: + panic("unimplemented encoding type") + } +} + +// Float32Encoder is the interface for all encoding types that implement encoding +// float32 values. +type Float32Encoder interface { + TypedEncoder + Put([]float32) + PutSpaced([]float32, []byte, int64) +} + +// Float32Decoder is the interface for all encoding types that implement decoding +// float32 values. +type Float32Decoder interface { + TypedDecoder + Decode([]float32) (int, error) + DecodeSpaced([]float32, int, []byte, int64) (int, error) +} + +// the float32EncoderTraits struct is used to make it easy to create encoders and decoders based on type +type float32EncoderTraits struct{} + +// Encoder returns an encoder for float32 type data, using the specified encoding type and whether or not +// it should be dictionary encoded. +func (float32EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder { + + switch e { + case format.Encoding_PLAIN: + return &PlainFloat32Encoder{encoder: newEncoderBase(e, descr, mem)} + default: + panic("unimplemented encoding type") + } +} + +// float32DecoderTraits is a helper struct for providing information regardless of the type +// and used as a generic way to create a Decoder or Dictionary Decoder for float32 values +type float32DecoderTraits struct{} + +// BytesRequired returns the number of bytes required to store n float32 values. +func (float32DecoderTraits) BytesRequired(n int) int { + return arrow.Float32Traits.BytesRequired(n) +} + +// Decoder returns a decoder for float32 typed data of the requested encoding type if available +func (float32DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder { + if useDict { + return &DictFloat32Decoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}} + } + + switch e { + case parquet.Encodings.Plain: + return &PlainFloat32Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} + default: + panic("unimplemented encoding type") + } +} + +// DictFloat32Encoder is an encoder for float32 data using dictionary encoding +type DictFloat32Encoder struct { + dictEncoder +} + +// Type returns the underlying physical type that can be encoded with this encoder +func (enc *DictFloat32Encoder) Type() parquet.Type { + return parquet.Types.Float +} + +// WriteDict populates the byte slice with the dictionary index +func (enc *DictFloat32Encoder) WriteDict(out []byte) { + enc.memo.CopyValues(arrow.Float32Traits.CastFromBytes(out)) +} + +// Put encodes the values passed in, adding to the index as needed. +func (enc *DictFloat32Encoder) Put(in []float32) { + for _, val := range in { + enc.dictEncoder.Put(val) + } +} + +// PutSpaced is the same as Put but for when the data being encoded has slots open for +// null values, using the bitmap provided to skip values as needed. +func (enc *DictFloat32Encoder) PutSpaced(in []float32, validBits []byte, validBitsOffset int64) { + utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error { + for i := int64(0); i < length; i++ { + enc.dictEncoder.Put(in[i+pos]) + } + return nil + }) +} + +// DictFloat32Decoder is a decoder for decoding dictionary encoded data for float32 columns +type DictFloat32Decoder struct { + dictDecoder +} + +// Type returns the underlying physical type that can be decoded with this decoder +func (DictFloat32Decoder) Type() parquet.Type { + return parquet.Types.Float +} + +// Decode populates the passed in slice with min(len(out), remaining values) values, +// decoding using hte dictionary to get the actual values. Returns the number of values +// actually decoded and any error encountered. +func (d *DictFloat32Decoder) Decode(out []float32) (int, error) { + vals := utils.MinInt(len(out), d.nvals) + decoded, err := d.decode(out[:vals]) + if err != nil { + return decoded, err + } + if vals != decoded { + return decoded, xerrors.New("parquet: dict eof exception") + } + d.nvals -= vals + return vals, nil +} + +// Decode spaced is like Decode but will space out the data leaving slots for null values +// based on the provided bitmap. +func (d *DictFloat32Decoder) DecodeSpaced(out []float32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + vals := utils.MinInt(len(out), d.nvals) + decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) + if err != nil { + return decoded, err + } + if vals != decoded { + return decoded, xerrors.New("parquet: dict spaced eof exception") + } + d.nvals -= vals + return vals, nil +} + +// Float32DictConverter is a helper for dictionary handling which is used for converting +// run length encoded indexes into the actual values that are stored in the dictionary index page. +type Float32DictConverter struct { + valueDecoder Float32Decoder + dict []float32 + zeroVal float32 +} + +// ensure validates that we've decoded dictionary values up to the index +// provided so that we don't need to decode the entire dictionary at start. +func (dc *Float32DictConverter) ensure(idx utils.IndexType) error { + if len(dc.dict) <= int(idx) { + if cap(dc.dict) <= int(idx) { + val := make([]float32, int(idx+1)-len(dc.dict)) + n, err := dc.valueDecoder.Decode(val) + if err != nil { + return err + } + dc.dict = append(dc.dict, val[:n]...) + } else { + cur := len(dc.dict) + n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1]) + if err != nil { + return err + } + dc.dict = dc.dict[:cur+n] + } + } + return nil +} + +// IsValid verifies that the set of indexes passed in are all valid indexes +// in the dictionary and if necessary decodes dictionary indexes up to the index +// requested. +func (dc *Float32DictConverter) IsValid(idxes ...utils.IndexType) bool { + min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes))) + dc.ensure(utils.IndexType(max)) + + return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict) +} + +// Fill populates the slice passed in entirely with the value at dictionary index indicated by val +func (dc *Float32DictConverter) Fill(out interface{}, val utils.IndexType) error { + o := out.([]float32) + if err := dc.ensure(val); err != nil { + return err + } + o[0] = dc.dict[val] + for i := 1; i < len(o); i *= 2 { + copy(o[i:], o[:i]) + } + return nil +} + +// FillZero populates the entire slice of out with the zero value for float32 +func (dc *Float32DictConverter) FillZero(out interface{}) { + o := out.([]float32) + o[0] = dc.zeroVal + for i := 1; i < len(o); i *= 2 { + copy(o[i:], o[:i]) + } +} + +// Copy populates the slice provided with the values in the dictionary at the indexes +// in the vals slice. +func (dc *Float32DictConverter) Copy(out interface{}, vals []utils.IndexType) error { + o := out.([]float32) + for idx, val := range vals { + o[idx] = dc.dict[val] + } + return nil +} + +// Float64Encoder is the interface for all encoding types that implement encoding +// float64 values. +type Float64Encoder interface { + TypedEncoder + Put([]float64) + PutSpaced([]float64, []byte, int64) +} + +// Float64Decoder is the interface for all encoding types that implement decoding +// float64 values. +type Float64Decoder interface { + TypedDecoder + Decode([]float64) (int, error) + DecodeSpaced([]float64, int, []byte, int64) (int, error) +} + +// the float64EncoderTraits struct is used to make it easy to create encoders and decoders based on type +type float64EncoderTraits struct{} + +// Encoder returns an encoder for float64 type data, using the specified encoding type and whether or not +// it should be dictionary encoded. +func (float64EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder { + + switch e { + case format.Encoding_PLAIN: + return &PlainFloat64Encoder{encoder: newEncoderBase(e, descr, mem)} + default: + panic("unimplemented encoding type") + } +} + +// float64DecoderTraits is a helper struct for providing information regardless of the type +// and used as a generic way to create a Decoder or Dictionary Decoder for float64 values +type float64DecoderTraits struct{} + +// BytesRequired returns the number of bytes required to store n float64 values. +func (float64DecoderTraits) BytesRequired(n int) int { + return arrow.Float64Traits.BytesRequired(n) +} + +// Decoder returns a decoder for float64 typed data of the requested encoding type if available +func (float64DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder { + if useDict { + return &DictFloat64Decoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}} + } + + switch e { + case parquet.Encodings.Plain: + return &PlainFloat64Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} + default: + panic("unimplemented encoding type") + } +} + +// DictFloat64Encoder is an encoder for float64 data using dictionary encoding +type DictFloat64Encoder struct { + dictEncoder +} + +// Type returns the underlying physical type that can be encoded with this encoder +func (enc *DictFloat64Encoder) Type() parquet.Type { + return parquet.Types.Double +} + +// WriteDict populates the byte slice with the dictionary index +func (enc *DictFloat64Encoder) WriteDict(out []byte) { + enc.memo.CopyValues(arrow.Float64Traits.CastFromBytes(out)) +} + +// Put encodes the values passed in, adding to the index as needed. +func (enc *DictFloat64Encoder) Put(in []float64) { + for _, val := range in { + enc.dictEncoder.Put(val) + } +} + +// PutSpaced is the same as Put but for when the data being encoded has slots open for +// null values, using the bitmap provided to skip values as needed. +func (enc *DictFloat64Encoder) PutSpaced(in []float64, validBits []byte, validBitsOffset int64) { + utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error { + for i := int64(0); i < length; i++ { + enc.dictEncoder.Put(in[i+pos]) + } + return nil + }) +} + +// DictFloat64Decoder is a decoder for decoding dictionary encoded data for float64 columns +type DictFloat64Decoder struct { + dictDecoder +} + +// Type returns the underlying physical type that can be decoded with this decoder +func (DictFloat64Decoder) Type() parquet.Type { + return parquet.Types.Double +} + +// Decode populates the passed in slice with min(len(out), remaining values) values, +// decoding using hte dictionary to get the actual values. Returns the number of values +// actually decoded and any error encountered. +func (d *DictFloat64Decoder) Decode(out []float64) (int, error) { + vals := utils.MinInt(len(out), d.nvals) + decoded, err := d.decode(out[:vals]) + if err != nil { + return decoded, err + } + if vals != decoded { + return decoded, xerrors.New("parquet: dict eof exception") + } + d.nvals -= vals + return vals, nil +} + +// Decode spaced is like Decode but will space out the data leaving slots for null values +// based on the provided bitmap. +func (d *DictFloat64Decoder) DecodeSpaced(out []float64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + vals := utils.MinInt(len(out), d.nvals) + decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) + if err != nil { + return decoded, err + } + if vals != decoded { + return decoded, xerrors.New("parquet: dict spaced eof exception") + } + d.nvals -= vals + return vals, nil +} + +// Float64DictConverter is a helper for dictionary handling which is used for converting +// run length encoded indexes into the actual values that are stored in the dictionary index page. +type Float64DictConverter struct { + valueDecoder Float64Decoder + dict []float64 + zeroVal float64 +} + +// ensure validates that we've decoded dictionary values up to the index +// provided so that we don't need to decode the entire dictionary at start. +func (dc *Float64DictConverter) ensure(idx utils.IndexType) error { + if len(dc.dict) <= int(idx) { + if cap(dc.dict) <= int(idx) { + val := make([]float64, int(idx+1)-len(dc.dict)) + n, err := dc.valueDecoder.Decode(val) + if err != nil { + return err + } + dc.dict = append(dc.dict, val[:n]...) + } else { + cur := len(dc.dict) + n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1]) + if err != nil { + return err + } + dc.dict = dc.dict[:cur+n] + } + } + return nil +} + +// IsValid verifies that the set of indexes passed in are all valid indexes +// in the dictionary and if necessary decodes dictionary indexes up to the index +// requested. +func (dc *Float64DictConverter) IsValid(idxes ...utils.IndexType) bool { + min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes))) + dc.ensure(utils.IndexType(max)) + + return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict) +} + +// Fill populates the slice passed in entirely with the value at dictionary index indicated by val +func (dc *Float64DictConverter) Fill(out interface{}, val utils.IndexType) error { + o := out.([]float64) + if err := dc.ensure(val); err != nil { + return err + } + o[0] = dc.dict[val] + for i := 1; i < len(o); i *= 2 { + copy(o[i:], o[:i]) + } + return nil +} + +// FillZero populates the entire slice of out with the zero value for float64 +func (dc *Float64DictConverter) FillZero(out interface{}) { + o := out.([]float64) + o[0] = dc.zeroVal + for i := 1; i < len(o); i *= 2 { + copy(o[i:], o[:i]) + } +} + +// Copy populates the slice provided with the values in the dictionary at the indexes +// in the vals slice. +func (dc *Float64DictConverter) Copy(out interface{}, vals []utils.IndexType) error { + o := out.([]float64) + for idx, val := range vals { + o[idx] = dc.dict[val] + } + return nil +} + +// BooleanEncoder is the interface for all encoding types that implement encoding +// bool values. +type BooleanEncoder interface { + TypedEncoder + Put([]bool) + PutSpaced([]bool, []byte, int64) +} + +// BooleanDecoder is the interface for all encoding types that implement decoding +// bool values. +type BooleanDecoder interface { + TypedDecoder + Decode([]bool) (int, error) + DecodeSpaced([]bool, int, []byte, int64) (int, error) +} + +// the boolEncoderTraits struct is used to make it easy to create encoders and decoders based on type +type boolEncoderTraits struct{} + +// Encoder returns an encoder for bool type data, using the specified encoding type and whether or not +// it should be dictionary encoded. +// dictionary encoding does not exist for this type and Encoder will panic if useDict is true +func (boolEncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder { + + switch e { + case format.Encoding_PLAIN: + return &PlainBooleanEncoder{encoder: newEncoderBase(e, descr, mem)} + default: + panic("unimplemented encoding type") + } +} + +// boolDecoderTraits is a helper struct for providing information regardless of the type +// and used as a generic way to create a Decoder or Dictionary Decoder for bool values +type boolDecoderTraits struct{} + +// BytesRequired returns the number of bytes required to store n bool values. +func (boolDecoderTraits) BytesRequired(n int) int { + return arrow.BooleanTraits.BytesRequired(n) +} + +// Decoder returns a decoder for bool typed data of the requested encoding type if available +func (boolDecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder { + if useDict { + panic("dictionary decoding unimplemented for bool") + } + + switch e { + case parquet.Encodings.Plain: + return &PlainBooleanDecoder{decoder: newDecoderBase(format.Encoding(e), descr)} + default: + panic("unimplemented encoding type") + } +} + +// ByteArrayEncoder is the interface for all encoding types that implement encoding +// parquet.ByteArray values. +type ByteArrayEncoder interface { + TypedEncoder + Put([]parquet.ByteArray) + PutSpaced([]parquet.ByteArray, []byte, int64) +} + +// ByteArrayDecoder is the interface for all encoding types that implement decoding +// parquet.ByteArray values. +type ByteArrayDecoder interface { + TypedDecoder + Decode([]parquet.ByteArray) (int, error) + DecodeSpaced([]parquet.ByteArray, int, []byte, int64) (int, error) +} + +// the byteArrayEncoderTraits struct is used to make it easy to create encoders and decoders based on type +type byteArrayEncoderTraits struct{} + +// Encoder returns an encoder for byteArray type data, using the specified encoding type and whether or not +// it should be dictionary encoded. +func (byteArrayEncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder { + + switch e { + case format.Encoding_PLAIN: + return &PlainByteArrayEncoder{encoder: newEncoderBase(e, descr, mem)} + case format.Encoding_DELTA_LENGTH_BYTE_ARRAY: + return &DeltaLengthByteArrayEncoder{ + encoder: newEncoderBase(e, descr, mem), + lengthEncoder: &DeltaBitPackInt32Encoder{ + &deltaBitPackEncoder{encoder: newEncoderBase(e, descr, mem)}}, + } + case format.Encoding_DELTA_BYTE_ARRAY: + return &DeltaByteArrayEncoder{ + encoder: newEncoderBase(e, descr, mem), + } + default: + panic("unimplemented encoding type") + } +} + +// byteArrayDecoderTraits is a helper struct for providing information regardless of the type +// and used as a generic way to create a Decoder or Dictionary Decoder for byteArray values +type byteArrayDecoderTraits struct{} + +// BytesRequired returns the number of bytes required to store n byteArray values. +func (byteArrayDecoderTraits) BytesRequired(n int) int { + return parquet.ByteArrayTraits.BytesRequired(n) +} + +// Decoder returns a decoder for byteArray typed data of the requested encoding type if available +func (byteArrayDecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder { + if useDict { + return &DictByteArrayDecoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}} + } + + switch e { + case parquet.Encodings.Plain: + return &PlainByteArrayDecoder{decoder: newDecoderBase(format.Encoding(e), descr)} + case parquet.Encodings.DeltaLengthByteArray: + if mem == nil { + mem = memory.DefaultAllocator + } + return &DeltaLengthByteArrayDecoder{ + decoder: newDecoderBase(format.Encoding(e), descr), + mem: mem, + } + case parquet.Encodings.DeltaByteArray: + if mem == nil { + mem = memory.DefaultAllocator + } + return &DeltaByteArrayDecoder{ + DeltaLengthByteArrayDecoder: &DeltaLengthByteArrayDecoder{ + decoder: newDecoderBase(format.Encoding(e), descr), + mem: mem, + }} + default: + panic("unimplemented encoding type") + } +} + +// DictByteArrayEncoder is an encoder for parquet.ByteArray data using dictionary encoding +type DictByteArrayEncoder struct { + dictEncoder +} + +// Type returns the underlying physical type that can be encoded with this encoder +func (enc *DictByteArrayEncoder) Type() parquet.Type { + return parquet.Types.ByteArray +} + +// DictByteArrayDecoder is a decoder for decoding dictionary encoded data for parquet.ByteArray columns +type DictByteArrayDecoder struct { + dictDecoder +} + +// Type returns the underlying physical type that can be decoded with this decoder +func (DictByteArrayDecoder) Type() parquet.Type { + return parquet.Types.ByteArray +} + +// Decode populates the passed in slice with min(len(out), remaining values) values, +// decoding using hte dictionary to get the actual values. Returns the number of values +// actually decoded and any error encountered. +func (d *DictByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) { + vals := utils.MinInt(len(out), d.nvals) + decoded, err := d.decode(out[:vals]) + if err != nil { + return decoded, err + } + if vals != decoded { + return decoded, xerrors.New("parquet: dict eof exception") + } + d.nvals -= vals + return vals, nil +} + +// Decode spaced is like Decode but will space out the data leaving slots for null values +// based on the provided bitmap. +func (d *DictByteArrayDecoder) DecodeSpaced(out []parquet.ByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + vals := utils.MinInt(len(out), d.nvals) + decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) + if err != nil { + return decoded, err + } + if vals != decoded { + return decoded, xerrors.New("parquet: dict spaced eof exception") + } + d.nvals -= vals + return vals, nil +} + +// ByteArrayDictConverter is a helper for dictionary handling which is used for converting +// run length encoded indexes into the actual values that are stored in the dictionary index page. +type ByteArrayDictConverter struct { + valueDecoder ByteArrayDecoder + dict []parquet.ByteArray + zeroVal parquet.ByteArray +} + +// ensure validates that we've decoded dictionary values up to the index +// provided so that we don't need to decode the entire dictionary at start. +func (dc *ByteArrayDictConverter) ensure(idx utils.IndexType) error { + if len(dc.dict) <= int(idx) { + if cap(dc.dict) <= int(idx) { + val := make([]parquet.ByteArray, int(idx+1)-len(dc.dict)) + n, err := dc.valueDecoder.Decode(val) + if err != nil { + return err + } + dc.dict = append(dc.dict, val[:n]...) + } else { + cur := len(dc.dict) + n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1]) + if err != nil { + return err + } + dc.dict = dc.dict[:cur+n] + } + } + return nil +} + +// IsValid verifies that the set of indexes passed in are all valid indexes +// in the dictionary and if necessary decodes dictionary indexes up to the index +// requested. +func (dc *ByteArrayDictConverter) IsValid(idxes ...utils.IndexType) bool { + min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes))) + dc.ensure(utils.IndexType(max)) + + return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict) +} + +// Fill populates the slice passed in entirely with the value at dictionary index indicated by val +func (dc *ByteArrayDictConverter) Fill(out interface{}, val utils.IndexType) error { + o := out.([]parquet.ByteArray) + if err := dc.ensure(val); err != nil { + return err + } + o[0] = dc.dict[val] + for i := 1; i < len(o); i *= 2 { + copy(o[i:], o[:i]) + } + return nil +} + +// FillZero populates the entire slice of out with the zero value for parquet.ByteArray +func (dc *ByteArrayDictConverter) FillZero(out interface{}) { + o := out.([]parquet.ByteArray) + o[0] = dc.zeroVal + for i := 1; i < len(o); i *= 2 { + copy(o[i:], o[:i]) + } +} + +// Copy populates the slice provided with the values in the dictionary at the indexes +// in the vals slice. +func (dc *ByteArrayDictConverter) Copy(out interface{}, vals []utils.IndexType) error { + o := out.([]parquet.ByteArray) + for idx, val := range vals { + o[idx] = dc.dict[val] + } + return nil +} + +// FixedLenByteArrayEncoder is the interface for all encoding types that implement encoding +// parquet.FixedLenByteArray values. +type FixedLenByteArrayEncoder interface { + TypedEncoder + Put([]parquet.FixedLenByteArray) + PutSpaced([]parquet.FixedLenByteArray, []byte, int64) +} + +// FixedLenByteArrayDecoder is the interface for all encoding types that implement decoding +// parquet.FixedLenByteArray values. +type FixedLenByteArrayDecoder interface { + TypedDecoder + Decode([]parquet.FixedLenByteArray) (int, error) + DecodeSpaced([]parquet.FixedLenByteArray, int, []byte, int64) (int, error) +} + +// the fixedLenByteArrayEncoderTraits struct is used to make it easy to create encoders and decoders based on type +type fixedLenByteArrayEncoderTraits struct{} + +// Encoder returns an encoder for fixedLenByteArray type data, using the specified encoding type and whether or not +// it should be dictionary encoded. +func (fixedLenByteArrayEncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder { + + switch e { + case format.Encoding_PLAIN: + return &PlainFixedLenByteArrayEncoder{encoder: newEncoderBase(e, descr, mem)} + default: + panic("unimplemented encoding type") + } +} + +// fixedLenByteArrayDecoderTraits is a helper struct for providing information regardless of the type +// and used as a generic way to create a Decoder or Dictionary Decoder for fixedLenByteArray values +type fixedLenByteArrayDecoderTraits struct{} + +// BytesRequired returns the number of bytes required to store n fixedLenByteArray values. +func (fixedLenByteArrayDecoderTraits) BytesRequired(n int) int { + return parquet.FixedLenByteArrayTraits.BytesRequired(n) +} + +// Decoder returns a decoder for fixedLenByteArray typed data of the requested encoding type if available +func (fixedLenByteArrayDecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder { + if useDict { + return &DictFixedLenByteArrayDecoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}} + } + + switch e { + case parquet.Encodings.Plain: + return &PlainFixedLenByteArrayDecoder{decoder: newDecoderBase(format.Encoding(e), descr)} + default: + panic("unimplemented encoding type") + } +} + +// DictFixedLenByteArrayEncoder is an encoder for parquet.FixedLenByteArray data using dictionary encoding +type DictFixedLenByteArrayEncoder struct { + dictEncoder +} + +// Type returns the underlying physical type that can be encoded with this encoder +func (enc *DictFixedLenByteArrayEncoder) Type() parquet.Type { + return parquet.Types.FixedLenByteArray +} + +// DictFixedLenByteArrayDecoder is a decoder for decoding dictionary encoded data for parquet.FixedLenByteArray columns +type DictFixedLenByteArrayDecoder struct { + dictDecoder +} + +// Type returns the underlying physical type that can be decoded with this decoder +func (DictFixedLenByteArrayDecoder) Type() parquet.Type { + return parquet.Types.FixedLenByteArray +} + +// Decode populates the passed in slice with min(len(out), remaining values) values, +// decoding using hte dictionary to get the actual values. Returns the number of values +// actually decoded and any error encountered. +func (d *DictFixedLenByteArrayDecoder) Decode(out []parquet.FixedLenByteArray) (int, error) { + vals := utils.MinInt(len(out), d.nvals) + decoded, err := d.decode(out[:vals]) + if err != nil { + return decoded, err + } + if vals != decoded { + return decoded, xerrors.New("parquet: dict eof exception") + } + d.nvals -= vals + return vals, nil +} + +// Decode spaced is like Decode but will space out the data leaving slots for null values +// based on the provided bitmap. +func (d *DictFixedLenByteArrayDecoder) DecodeSpaced(out []parquet.FixedLenByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + vals := utils.MinInt(len(out), d.nvals) + decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) + if err != nil { + return decoded, err + } + if vals != decoded { + return decoded, xerrors.New("parquet: dict spaced eof exception") + } + d.nvals -= vals + return vals, nil +} + +// FixedLenByteArrayDictConverter is a helper for dictionary handling which is used for converting +// run length encoded indexes into the actual values that are stored in the dictionary index page. +type FixedLenByteArrayDictConverter struct { + valueDecoder FixedLenByteArrayDecoder + dict []parquet.FixedLenByteArray + zeroVal parquet.FixedLenByteArray +} + +// ensure validates that we've decoded dictionary values up to the index +// provided so that we don't need to decode the entire dictionary at start. +func (dc *FixedLenByteArrayDictConverter) ensure(idx utils.IndexType) error { + if len(dc.dict) <= int(idx) { + if cap(dc.dict) <= int(idx) { + val := make([]parquet.FixedLenByteArray, int(idx+1)-len(dc.dict)) + n, err := dc.valueDecoder.Decode(val) + if err != nil { + return err + } + dc.dict = append(dc.dict, val[:n]...) + } else { + cur := len(dc.dict) + n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1]) + if err != nil { + return err + } + dc.dict = dc.dict[:cur+n] + } + } + return nil +} + +// IsValid verifies that the set of indexes passed in are all valid indexes +// in the dictionary and if necessary decodes dictionary indexes up to the index +// requested. +func (dc *FixedLenByteArrayDictConverter) IsValid(idxes ...utils.IndexType) bool { + min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes))) + dc.ensure(utils.IndexType(max)) + + return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict) +} + +// Fill populates the slice passed in entirely with the value at dictionary index indicated by val +func (dc *FixedLenByteArrayDictConverter) Fill(out interface{}, val utils.IndexType) error { + o := out.([]parquet.FixedLenByteArray) + if err := dc.ensure(val); err != nil { + return err + } + o[0] = dc.dict[val] + for i := 1; i < len(o); i *= 2 { + copy(o[i:], o[:i]) + } + return nil +} + +// FillZero populates the entire slice of out with the zero value for parquet.FixedLenByteArray +func (dc *FixedLenByteArrayDictConverter) FillZero(out interface{}) { + o := out.([]parquet.FixedLenByteArray) + o[0] = dc.zeroVal + for i := 1; i < len(o); i *= 2 { + copy(o[i:], o[:i]) + } +} + +// Copy populates the slice provided with the values in the dictionary at the indexes +// in the vals slice. +func (dc *FixedLenByteArrayDictConverter) Copy(out interface{}, vals []utils.IndexType) error { + o := out.([]parquet.FixedLenByteArray) + for idx, val := range vals { + o[idx] = dc.dict[val] + } + return nil +} + +// NewDictConverter creates a dict converter of the appropriate type, using the passed in +// decoder as the decoder to decode the dictionary index. +func NewDictConverter(dict TypedDecoder) utils.DictionaryConverter { + switch dict.Type() { + case parquet.Types.Int32: + return &Int32DictConverter{valueDecoder: dict.(Int32Decoder), dict: make([]int32, 0, dict.ValuesLeft())} + case parquet.Types.Int64: + return &Int64DictConverter{valueDecoder: dict.(Int64Decoder), dict: make([]int64, 0, dict.ValuesLeft())} + case parquet.Types.Float: + return &Float32DictConverter{valueDecoder: dict.(Float32Decoder), dict: make([]float32, 0, dict.ValuesLeft())} + case parquet.Types.Double: + return &Float64DictConverter{valueDecoder: dict.(Float64Decoder), dict: make([]float64, 0, dict.ValuesLeft())} + case parquet.Types.ByteArray: + return &ByteArrayDictConverter{valueDecoder: dict.(ByteArrayDecoder), dict: make([]parquet.ByteArray, 0, dict.ValuesLeft())} + case parquet.Types.FixedLenByteArray: + return &FixedLenByteArrayDictConverter{valueDecoder: dict.(FixedLenByteArrayDecoder), dict: make([]parquet.FixedLenByteArray, 0, dict.ValuesLeft())} + default: + return nil + } +} + +// helper function to get encoding traits object for the physical type indicated +func getEncodingTraits(t parquet.Type) EncoderTraits { + switch t { + case parquet.Types.Int32: + return Int32EncoderTraits + case parquet.Types.Int64: + return Int64EncoderTraits + case parquet.Types.Int96: + return Int96EncoderTraits + case parquet.Types.Float: + return Float32EncoderTraits + case parquet.Types.Double: + return Float64EncoderTraits + case parquet.Types.Boolean: + return BooleanEncoderTraits + case parquet.Types.ByteArray: + return ByteArrayEncoderTraits + case parquet.Types.FixedLenByteArray: + return FixedLenByteArrayEncoderTraits + default: + return nil + } +} + +// helper function to get decoding traits object for the physical type indicated +func getDecodingTraits(t parquet.Type) DecoderTraits { + switch t { + case parquet.Types.Int32: + return Int32DecoderTraits + case parquet.Types.Int64: + return Int64DecoderTraits + case parquet.Types.Int96: + return Int96DecoderTraits + case parquet.Types.Float: + return Float32DecoderTraits + case parquet.Types.Double: + return Float64DecoderTraits + case parquet.Types.Boolean: + return BooleanDecoderTraits + case parquet.Types.ByteArray: + return ByteArrayDecoderTraits + case parquet.Types.FixedLenByteArray: + return FixedLenByteArrayDecoderTraits + default: + return nil + } +} diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl new file mode 100644 index 00000000000..509266b6878 --- /dev/null +++ b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl @@ -0,0 +1,341 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/schema" + format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/parquet/internal/utils" +) + +// fully typed encoder interfaces to enable writing against encoder/decoders +// without having to care about what encoding type is actually being used. + +var ( +{{range .In}} + {{.Name}}EncoderTraits {{.lower}}EncoderTraits + {{.Name}}DecoderTraits {{.lower}}DecoderTraits +{{- end}} +) + +{{range .In}} +// {{.Name}}Encoder is the interface for all encoding types that implement encoding +// {{.name}} values. +type {{.Name}}Encoder interface { + TypedEncoder + Put([]{{.name}}) + PutSpaced([]{{.name}}, []byte, int64) +} + +// {{.Name}}Decoder is the interface for all encoding types that implement decoding +// {{.name}} values. +type {{.Name}}Decoder interface { + TypedDecoder + Decode([]{{.name}}) (int, error) + DecodeSpaced([]{{.name}}, int, []byte, int64) (int, error) +} + +// the {{.lower}}EncoderTraits struct is used to make it easy to create encoders and decoders based on type +type {{.lower}}EncoderTraits struct{} + +// Encoder returns an encoder for {{.lower}} type data, using the specified encoding type and whether or not +// it should be dictionary encoded. +{{- if or (eq .Name "Boolean") (eq .Name "Int96")}} +// dictionary encoding does not exist for this type and Encoder will panic if useDict is true +{{- end }} +func ({{.lower}}EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder { + {{/* if useDict { +{{- if or (eq .Name "Boolean") (eq .Name "Int96")}} + panic("parquet: no {{.name}} dictionary encoding") +{{- else}} + return &Dict{{.Name}}Encoder{newDictEncoderBase(descr, New{{if and (ne .Name "ByteArray") (ne .Name "FixedLenByteArray")}}{{.Name}}Dictionary(){{else}}BinaryDictionary(mem){{end}}, mem)} +{{- end}} + } */}} + + switch e { + case format.Encoding_PLAIN: + return &Plain{{.Name}}Encoder{encoder: newEncoderBase(e, descr, mem)} +{{- if or (eq .Name "Int32") (eq .Name "Int64")}} + case format.Encoding_DELTA_BINARY_PACKED: + return DeltaBitPack{{.Name}}Encoder{&deltaBitPackEncoder{ + encoder: newEncoderBase(e, descr, mem)}} +{{- end}} +{{- if eq .Name "ByteArray"}} + case format.Encoding_DELTA_LENGTH_BYTE_ARRAY: + return &DeltaLengthByteArrayEncoder{ + encoder: newEncoderBase(e, descr, mem), + lengthEncoder: &DeltaBitPackInt32Encoder{ + &deltaBitPackEncoder{encoder: newEncoderBase(e, descr, mem)}}, + } + case format.Encoding_DELTA_BYTE_ARRAY: + return &DeltaByteArrayEncoder{ + encoder: newEncoderBase(e, descr, mem), + } +{{- end}} + default: + panic("unimplemented encoding type") + } +} + +// {{.lower}}DecoderTraits is a helper struct for providing information regardless of the type +// and used as a generic way to create a Decoder or Dictionary Decoder for {{.lower}} values +type {{.lower}}DecoderTraits struct{} + +// BytesRequired returns the number of bytes required to store n {{.lower}} values. +func ({{.lower}}DecoderTraits) BytesRequired(n int) int { + return {{.prefix}}.{{.Name}}Traits.BytesRequired(n) +} + +// Decoder returns a decoder for {{.lower}} typed data of the requested encoding type if available +func ({{.lower}}DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder { + if useDict { +{{- if and (ne .Name "Boolean") (ne .Name "Int96")}} + return &Dict{{.Name}}Decoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}} +{{- else}} + panic("dictionary decoding unimplemented for {{.lower}}") +{{- end}} + } + + switch e { + case parquet.Encodings.Plain: + return &Plain{{.Name}}Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} +{{- if or (eq .Name "Int32") (eq .Name "Int64")}} + case parquet.Encodings.DeltaBinaryPacked: + if mem == nil { + mem = memory.DefaultAllocator + } + return &DeltaBitPack{{.Name}}Decoder{ + deltaBitPackDecoder: &deltaBitPackDecoder{ + decoder: newDecoderBase(format.Encoding(e), descr), + mem: mem, + }} +{{- end}} +{{- if eq .Name "ByteArray"}} + case parquet.Encodings.DeltaLengthByteArray: + if mem == nil { + mem = memory.DefaultAllocator + } + return &DeltaLengthByteArrayDecoder{ + decoder: newDecoderBase(format.Encoding(e), descr), + mem: mem, + } + case parquet.Encodings.DeltaByteArray: + if mem == nil { + mem = memory.DefaultAllocator + } + return &DeltaByteArrayDecoder{ + DeltaLengthByteArrayDecoder: &DeltaLengthByteArrayDecoder{ + decoder: newDecoderBase(format.Encoding(e), descr), + mem: mem, + }} +{{- end}} + default: + panic("unimplemented encoding type") + } +} + +{{if and (ne .Name "Boolean") (ne .Name "Int96")}} +// Dict{{.Name}}Encoder is an encoder for {{.name}} data using dictionary encoding +type Dict{{.Name}}Encoder struct { + dictEncoder +} + +// Type returns the underlying physical type that can be encoded with this encoder +func (enc *Dict{{.Name}}Encoder) Type() parquet.Type { + return parquet.Types.{{if .physical}}{{.physical}}{{else}}{{.Name}}{{end}} +} + +{{if and (ne .Name "ByteArray") (ne .Name "FixedLenByteArray")}} +// WriteDict populates the byte slice with the dictionary index +func (enc *Dict{{.Name}}Encoder) WriteDict(out []byte) { + enc.memo.CopyValues({{.prefix}}.{{.Name}}Traits.CastFromBytes(out)) +} + +// Put encodes the values passed in, adding to the index as needed. +func (enc *Dict{{.Name}}Encoder) Put(in []{{.name}}) { + for _, val := range in { + enc.dictEncoder.Put(val) + } +} + +// PutSpaced is the same as Put but for when the data being encoded has slots open for +// null values, using the bitmap provided to skip values as needed. +func (enc *Dict{{.Name}}Encoder) PutSpaced(in []{{.name}}, validBits []byte, validBitsOffset int64) { + utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error { + for i := int64(0); i < length; i++ { + enc.dictEncoder.Put(in[i+pos]) + } + return nil + }) +} +{{end}} + +// Dict{{.Name}}Decoder is a decoder for decoding dictionary encoded data for {{.name}} columns +type Dict{{.Name}}Decoder struct { + dictDecoder +} + +// Type returns the underlying physical type that can be decoded with this decoder +func (Dict{{.Name}}Decoder) Type() parquet.Type { + return parquet.Types.{{if .physical}}{{.physical}}{{else}}{{.Name}}{{end}} +} + +// Decode populates the passed in slice with min(len(out), remaining values) values, +// decoding using hte dictionary to get the actual values. Returns the number of values +// actually decoded and any error encountered. +func (d *Dict{{.Name}}Decoder) Decode(out []{{.name}}) (int, error) { + vals := utils.MinInt(len(out), d.nvals) + decoded, err := d.decode(out[:vals]) + if err != nil { + return decoded, err + } + if vals != decoded { + return decoded, xerrors.New("parquet: dict eof exception") + } + d.nvals -= vals + return vals, nil +} + +// Decode spaced is like Decode but will space out the data leaving slots for null values +// based on the provided bitmap. +func (d *Dict{{.Name}}Decoder) DecodeSpaced(out []{{.name}}, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + vals := utils.MinInt(len(out), d.nvals) + decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) + if err != nil { + return decoded, err + } + if vals != decoded { + return decoded, xerrors.New("parquet: dict spaced eof exception") + } + d.nvals -= vals + return vals, nil +} + +// {{.Name}}DictConverter is a helper for dictionary handling which is used for converting +// run length encoded indexes into the actual values that are stored in the dictionary index page. +type {{.Name}}DictConverter struct { + valueDecoder {{.Name}}Decoder + dict []{{.name}} + zeroVal {{.name}} +} + +// ensure validates that we've decoded dictionary values up to the index +// provided so that we don't need to decode the entire dictionary at start. +func (dc *{{.Name}}DictConverter) ensure(idx utils.IndexType) error { + if len(dc.dict) <= int(idx) { + if cap(dc.dict) <= int(idx) { + val := make([]{{.name}}, int(idx+1)-len(dc.dict)) + n, err := dc.valueDecoder.Decode(val) + if err != nil { + return err + } + dc.dict = append(dc.dict, val[:n]...) + } else { + cur := len(dc.dict) + n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1]) + if err != nil { + return err + } + dc.dict = dc.dict[:cur+n] + } + } + return nil +} + +// IsValid verifies that the set of indexes passed in are all valid indexes +// in the dictionary and if necessary decodes dictionary indexes up to the index +// requested. +func (dc *{{.Name}}DictConverter) IsValid(idxes ...utils.IndexType) bool { + min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes))) + dc.ensure(utils.IndexType(max)) + + return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict) +} + +// Fill populates the slice passed in entirely with the value at dictionary index indicated by val +func (dc *{{.Name}}DictConverter) Fill(out interface{}, val utils.IndexType) error { + o := out.([]{{.name}}) + if err := dc.ensure(val); err != nil { + return err + } + o[0] = dc.dict[val] + for i := 1; i < len(o); i *= 2 { + copy(o[i:], o[:i]) + } + return nil +} + +// FillZero populates the entire slice of out with the zero value for {{.name}} +func (dc *{{.Name}}DictConverter) FillZero(out interface{}) { + o := out.([]{{.name}}) + o[0] = dc.zeroVal + for i := 1; i < len(o); i *= 2 { + copy(o[i:], o[:i]) + } +} + +// Copy populates the slice provided with the values in the dictionary at the indexes +// in the vals slice. +func (dc *{{.Name}}DictConverter) Copy(out interface{}, vals []utils.IndexType) error { + o := out.([]{{.name}}) + for idx, val := range vals { + o[idx] = dc.dict[val] + } + return nil +} +{{end}} + +{{end}} + +// NewDictConverter creates a dict converter of the appropriate type, using the passed in +// decoder as the decoder to decode the dictionary index. +func NewDictConverter(dict TypedDecoder) utils.DictionaryConverter { + switch dict.Type() { + {{ range .In }}{{ if and (ne .Name "Boolean") (ne .Name "Int96") -}} + case parquet.Types.{{if .physical }}{{.physical}}{{else}}{{.Name}}{{end}}: + return &{{.Name}}DictConverter{valueDecoder: dict.({{.Name}}Decoder), dict: make([]{{.name}}, 0, dict.ValuesLeft())} + {{ end }}{{ end -}} + default: + return nil + } +} + +// helper function to get encoding traits object for the physical type indicated +func getEncodingTraits(t parquet.Type) EncoderTraits { + switch t { + {{ range .In -}} + case parquet.Types.{{if .physical}}{{.physical}}{{else}}{{.Name}}{{end}}: + return {{.Name}}EncoderTraits + {{ end -}} + default: + return nil + } +} + +// helper function to get decoding traits object for the physical type indicated +func getDecodingTraits(t parquet.Type) DecoderTraits { + switch t { + {{ range .In -}} + case parquet.Types.{{if .physical}}{{.physical}}{{else}}{{.Name}}{{end}}: + return {{.Name}}DecoderTraits + {{ end -}} + default: + return nil + } +} diff --git a/go/parquet/internal/encoding/types.go b/go/parquet/internal/encoding/types.go new file mode 100644 index 00000000000..fa3661e1119 --- /dev/null +++ b/go/parquet/internal/encoding/types.go @@ -0,0 +1,497 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "io" + "sync" + + "github.com/apache/arrow/go/arrow/bitutil" + "github.com/apache/arrow/go/arrow/memory" + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/utils" + "golang.org/x/xerrors" +) + +// TypedDecoder is the general interface for all decoder types which can +// then be type asserted to a specific Type Decoder +type TypedDecoder interface { + // SetData updates the data in the decoder with the passed in byte slice and the + // stated number of values as expected to be decoded. + SetData(buffered int, buf []byte) error + // Encoding returns the encoding type that this decoder decodes data of + Encoding() parquet.Encoding + // ValuesLeft returns the number of remaining values to be decoded + ValuesLeft() int + // Type returns the physical type this can decode. + Type() parquet.Type +} + +// DictDecoder is a special TypedDecoder which implements dictionary decoding +type DictDecoder interface { + TypedDecoder + // SetDict takes in a decoder which can decode the dictionary index to be used + SetDict(TypedDecoder) +} + +// TypedEncoder is the general interface for all encoding types which +// can then be type asserted to a specific Type Encoder +type TypedEncoder interface { + // Bytes returns the current slice of bytes that have been encoded but does not pass ownership + Bytes() []byte + // Reset resets the encoder and dumps all the data to let it be reused. + Reset() + // ReserveForWrite reserves n bytes in the buffer so that the next n bytes written will not + // cause a memory allocation. + ReserveForWrite(n int) + // EstimatedDataEncodedSize returns the estimated number of bytes in the buffer + // so far. + EstimatedDataEncodedSize() int64 + // FlushValues finishes up any unwritten data and returns the buffer of data passing + // ownership to the caller, Release needs to be called on the Buffer to free the memory + FlushValues() Buffer + // Encoding returns the type of encoding that this encoder operates with + Encoding() parquet.Encoding + // Allocator returns the allocator that was used when creating this encoder + Allocator() memory.Allocator + // Type returns the underlying physical type this encodes. + Type() parquet.Type +} + +// DictEncoder is a special kind of TypedEncoder which implements Dictionary +// encoding. +type DictEncoder interface { + TypedEncoder + // WriteIndices populates the byte slice with the final indexes of data and returns + // the number of bytes written + WriteIndices(out []byte) int + // DictEncodedSize returns the current size of the encoded dictionary index. + DictEncodedSize() int + // BitWidth returns the bitwidth needed to encode all of the index values based + // on the number of values in the dictionary index. + BitWidth() int + // WriteDict populates out with the dictionary index values, out should be sized to at least + // as many bytes as DictEncodedSize + WriteDict(out []byte) + // NumEntries returns the number of values currently in the dictionary index. + NumEntries() int +} + +var bufferPool = sync.Pool{ + New: func() interface{} { + return memory.NewResizableBuffer(memory.DefaultAllocator) + }, +} + +// Buffer is an interface used as a general interface for handling buffers +// regardless of the underlying implementation. +type Buffer interface { + Len() int + Buf() []byte + Bytes() []byte + Resize(int) + Release() +} + +// poolBuffer is a buffer that will release the allocated buffer to a pool +// of buffers when release is called in order to allow it to be reused to +// cut down on the number of allocations. +type poolBuffer struct { + buf *memory.Buffer +} + +func (p poolBuffer) Resize(n int) { p.buf.ResizeNoShrink(n) } + +func (p poolBuffer) Len() int { return p.buf.Len() } + +func (p poolBuffer) Bytes() []byte { return p.buf.Bytes() } + +func (p poolBuffer) Buf() []byte { return p.buf.Buf() } + +func (p poolBuffer) Release() { + if p.buf.Mutable() { + memory.Set(p.buf.Buf(), 0) + p.buf.ResizeNoShrink(0) + bufferPool.Put(p.buf) + return + } + + p.buf.Release() +} + +// PooledBufferWriter uses buffers from the buffer pool to back it while +// implementing io.Writer and io.WriterAt interfaces +type PooledBufferWriter struct { + buf *memory.Buffer + pos int + offset int +} + +// NewPooledBufferWriter returns a new buffer with 'initial' bytes reserved +// and pre-allocated to guarantee that writing that many more bytes will not +// require another allocation. +func NewPooledBufferWriter(initial int) *PooledBufferWriter { + ret := &PooledBufferWriter{} + ret.Reserve(initial) + return ret +} + +// SetOffset sets an offset in the buffer which will ensure that all references +// to offsets and sizes in the buffer will be offset by this many bytes, allowing +// the writer to reserve space in the buffer. +func (b *PooledBufferWriter) SetOffset(offset int) { + b.pos -= b.offset + b.offset = offset + b.pos += offset +} + +// Reserve pre-allocates nbytes to ensure that the next write of that many bytes +// will not require another allocation. +func (b *PooledBufferWriter) Reserve(nbytes int) { + if b.buf == nil { + b.buf = bufferPool.Get().(*memory.Buffer) + } + + newCap := utils.MaxInt(b.buf.Cap()+b.offset, 256) + for newCap < b.pos+nbytes { + newCap = bitutil.NextPowerOf2(newCap) + } + b.buf.Reserve(newCap) +} + +// Reset will release any current memory and initialize it with the new +// allocated bytes. +func (b *PooledBufferWriter) Reset(initial int) { + if b.buf != nil { + memory.Set(b.buf.Buf(), 0) + b.buf.ResizeNoShrink(0) + bufferPool.Put(b.buf) + b.buf = nil + } + + b.pos = 0 + b.offset = 0 + b.Reserve(initial) +} + +// Finish returns the current buffer, with the responsibility for releasing +// the memory on the caller, resetting this writer to be re-used +func (b *PooledBufferWriter) Finish() Buffer { + if b.buf.Len() < b.pos { + b.buf.ResizeNoShrink(b.pos) + } + buf := poolBuffer{b.buf} + + b.buf = nil + b.Reset(0) + return buf +} + +// WriteAt writes the bytes from p into this buffer starting at offset. +// +// Does not affect the internal position of the writer. +func (b *PooledBufferWriter) WriteAt(p []byte, offset int64) (n int, err error) { + if len(p) == 0 { + return 0, nil + } + offset += int64(b.offset) + need := int(offset) + len(p) + + if need >= b.buf.Cap() { + b.Reserve(need - b.pos) + } + n = copy(b.buf.Buf()[offset:], p) + + if need > b.buf.Len() { + b.buf.ResizeNoShrink(need) + } + return +} + +func (b *PooledBufferWriter) Write(buf []byte) (int, error) { + if len(buf) == 0 { + return 0, nil + } + b.Reserve(len(buf)) + return b.UnsafeWrite(buf) +} + +func (b *PooledBufferWriter) UnsafeWriteCopy(ncopies int, pattern []byte) (int, error) { + nbytes := len(pattern) * ncopies + slc := b.buf.Buf()[b.pos : b.pos+nbytes] + copy(slc, pattern) + for j := len(pattern); j < len(slc); j *= 2 { + copy(slc[j:], slc[:j]) + } + b.pos += nbytes + return nbytes, nil +} + +// UnsafeWrite does not check the capacity / length before writing. +func (b *PooledBufferWriter) UnsafeWrite(buf []byte) (n int, err error) { + n = copy(b.buf.Buf()[b.pos:], buf) + b.pos += n + return +} + +func (b *PooledBufferWriter) Tell() int64 { + return int64(b.pos) +} + +// Bytes returns the current bytes slice of slice Len +func (b *PooledBufferWriter) Bytes() []byte { + if b.buf.Len() < b.pos { + b.buf.ResizeNoShrink(b.pos) + } + return b.buf.Bytes()[b.offset:] +} + +// Len provides the current Length of the byte slice +func (b *PooledBufferWriter) Len() int { + if b.buf.Len() < b.pos { + b.buf.ResizeNoShrink(b.pos) + } + return b.buf.Len() - b.offset +} + +// BufferWriter is a utility class for building and writing to a memory.Buffer +// with a given allocator that fulfills the interfaces io.Write, io.WriteAt +// and io.Seeker, while providing the ability to pre-allocate memory. +type BufferWriter struct { + buffer *memory.Buffer + pos int + mem memory.Allocator + + offset int +} + +// NewBufferWriterFromBuffer wraps the provided buffer to allow it to fulfill these +// interfaces. +func NewBufferWriterFromBuffer(b *memory.Buffer, mem memory.Allocator) *BufferWriter { + return &BufferWriter{b, 0, mem, 0} +} + +// NewBufferWriter constructs a buffer with initially reserved/allocated memory. +func NewBufferWriter(initial int, mem memory.Allocator) *BufferWriter { + buf := memory.NewResizableBuffer(mem) + buf.Reserve(initial) + return &BufferWriter{buffer: buf, mem: mem} +} + +func (b *BufferWriter) SetOffset(offset int) { + b.offset = offset +} + +// Bytes returns the current bytes slice of slice Len +func (b *BufferWriter) Bytes() []byte { + return b.buffer.Bytes()[b.offset:] +} + +// Len provides the current Length of the byte slice +func (b *BufferWriter) Len() int { + return b.buffer.Len() - b.offset +} + +// Cap returns the current capacity of the underlying buffer +func (b *BufferWriter) Cap() int { + return b.buffer.Cap() - b.offset +} + +// Finish returns the current buffer, with the responsibility for releasing +// the memory on the caller, resetting this writer to be re-used +func (b *BufferWriter) Finish() *memory.Buffer { + buf := b.buffer + b.buffer = nil + b.Reset(0) + return buf +} + +func (b *BufferWriter) Truncate() { + b.pos = 0 + b.offset = 0 + + if b.buffer == nil { + b.Reserve(1024) + } else { + b.buffer.ResizeNoShrink(0) + } +} + +// Reset will release any current memory and initialize it with the new +// allocated bytes. +func (b *BufferWriter) Reset(initial int) { + if b.buffer != nil { + b.buffer.Release() + } + + b.pos = 0 + b.offset = 0 + b.Reserve(initial) +} + +// Reserve ensures that there is at least enough capacity to write nbytes +// without another allocation, may allocate more than that in order to +// efficiently reduce allocations +func (b *BufferWriter) Reserve(nbytes int) { + if b.buffer == nil { + b.buffer = memory.NewResizableBuffer(b.mem) + } + newCap := utils.MaxInt(b.buffer.Cap()+b.offset, 256) + for newCap < b.pos+nbytes+b.offset { + newCap = bitutil.NextPowerOf2(newCap) + } + b.buffer.Reserve(newCap) +} + +// WriteAt writes the bytes from p into this buffer starting at offset. +// +// Does not affect the internal position of the writer. +func (b *BufferWriter) WriteAt(p []byte, offset int64) (n int, err error) { + if len(p) == 0 { + return 0, nil + } + offset += int64(b.offset) + need := int(offset) + len(p) + + if need >= b.buffer.Cap() { + b.Reserve(need - b.pos) + } + copy(b.buffer.Buf()[offset:], p) + + if need > b.buffer.Len() { + b.buffer.ResizeNoShrink(need) + } + return len(p), nil +} + +func (b *BufferWriter) Write(buf []byte) (int, error) { + if len(buf) == 0 { + return 0, nil + } + if b.buffer == nil { + b.Reserve(len(buf)) + } + + if b.pos+b.offset+len(buf) >= b.buffer.Cap() { + b.Reserve(len(buf)) + } + return b.UnsafeWrite(buf) +} + +func (b *BufferWriter) UnsafeWriteCopy(ncopies int, pattern []byte) (int, error) { + nbytes := len(pattern) * ncopies + slc := b.buffer.Buf()[b.pos : b.pos+nbytes] + copy(slc, pattern) + for j := len(pattern); j < len(slc); j *= 2 { + copy(slc[j:], slc[:j]) + } + b.pos += nbytes + b.buffer.ResizeNoShrink(b.pos) + return nbytes, nil +} + +// UnsafeWrite does not check the capacity / length before writing. +func (b *BufferWriter) UnsafeWrite(buf []byte) (int, error) { + copy(b.buffer.Buf()[b.pos+b.offset:], buf) + b.pos += len(buf) + b.buffer.ResizeNoShrink(b.pos) + return len(buf), nil +} + +// Seek fulfills the io.Seeker interface returning it's new position +// whence must be io.SeekStart, io.SeekCurrent or io.SeekEnd or it will be ignored. +func (b *BufferWriter) Seek(offset int64, whence int) (int64, error) { + newPos, offs := 0, int(offset) + offs += b.offset + switch whence { + case io.SeekStart: + newPos = offs + case io.SeekCurrent: + newPos = b.pos + offs + case io.SeekEnd: + newPos = b.buffer.Len() + offs + } + if newPos < 0 { + return 0, xerrors.New("negative result pos") + } + b.pos = newPos + return int64(newPos), nil +} + +func (b *BufferWriter) Tell() int64 { + return int64(b.pos) +} + +// MemoTable interface that can be used to swap out implementations of the hash table +// used for handling dictionary encoding. Dictionary encoding is built against this interface +// to make it easy for code generation and changing implementations. +// +// Values should remember the order they are inserted to generate a valid dictionary index +type MemoTable interface { + // Reset drops everything in the table allowing it to be reused + Reset() + // Size returns the current number of unique values stored in the table + // including whether or not a null value has been passed in using GetOrInsertNull + Size() int + // CopyValues populates out with the values currently in the table, out must + // be a slice of the appropriate type for the table type. + CopyValues(out interface{}) + // CopyValuesSubset is like CopyValues but only copies a subset of values starting + // at the indicated index. + CopyValuesSubset(start int, out interface{}) + // Get returns the index of the table the specified value is, and a boolean indicating + // whether or not the value was found in the table. Will panic if val is not the appropriate + // type for the underlying table. + Get(val interface{}) (int, bool) + // GetOrInsert is the same as Get, except if the value is not currently in the table it will + // be inserted into the table. + GetOrInsert(val interface{}) (idx int, existed bool, err error) + // GetNull returns the index of the null value and whether or not it was found in the table + GetNull() (int, bool) + // GetOrInsertNull returns the index of the null value, if it didn't already exist in the table, + // it is inserted. + GetOrInsertNull() (idx int, existed bool) +} + +// BinaryMemoTable is an extension of the MemoTable interface adding extra methods +// for handling byte arrays/strings/fixed length byte arrays. +type BinaryMemoTable interface { + MemoTable + // ValuesSize returns the total number of bytes needed to copy all of the values + // from this table. + ValuesSize() int + // CopyOffsets populates out with the start and end offsets of each value in the + // table data. Out should be sized to Size()+1 to accomodate all of the offsets. + CopyOffsets(out []int8) + // CopyOffsetsSubset is like CopyOffsets but only gets a subset of the offsets + // starting at the specified index. + CopyOffsetsSubset(start int, out []int8) + // CopyFixedWidthValues exists to cope with the fact that the table doesn't track + // the fixed width when inserting the null value into the databuffer populating + // a zero length byte slice for the null value (if found). + CopyFixedWidthValues(start int, width int, out []byte) + // VisitValues calls visitFn on each value in the table starting with the index specified + VisitValues(start int, visitFn func([]byte)) + // Retain increases the reference count of the separately stored binary data that is + // kept alongside the table which contains all of the values in the table. This is + // safe to call simultaneously across multiple goroutines. + Retain() + // Release decreases the reference count by 1 of the separately stored binary data + // kept alongside the table containing the values. When the reference count goes to + // 0, the memory is freed. This is safe to call across multiple goroutines simultaneoulsy. + Release() +} diff --git a/go/parquet/internal/testutils/utils.go b/go/parquet/internal/testutils/utils.go new file mode 100644 index 00000000000..503c60044ab --- /dev/null +++ b/go/parquet/internal/testutils/utils.go @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testutils + +import ( + "reflect" + + "github.com/apache/arrow/go/parquet" +) + +var typeToParquetTypeMap = map[reflect.Type]parquet.Type{ + reflect.TypeOf(true): parquet.Types.Boolean, + reflect.TypeOf(int32(0)): parquet.Types.Int32, + reflect.TypeOf(int64(0)): parquet.Types.Int64, + reflect.TypeOf(float32(0)): parquet.Types.Float, + reflect.TypeOf(float64(0)): parquet.Types.Double, + reflect.TypeOf(parquet.ByteArray{}): parquet.Types.ByteArray, + reflect.TypeOf(parquet.Int96{}): parquet.Types.Int96, + reflect.TypeOf(parquet.FixedLenByteArray{}): parquet.Types.FixedLenByteArray, +} + +func TypeToParquetType(typ reflect.Type) parquet.Type { + ret, ok := typeToParquetTypeMap[typ] + if !ok { + panic("invalid type for parquet type") + } + return ret +} diff --git a/go/parquet/internal/utils/Makefile b/go/parquet/internal/utils/Makefile index 1de4308dc55..41cc68df5bc 100644 --- a/go/parquet/internal/utils/Makefile +++ b/go/parquet/internal/utils/Makefile @@ -18,10 +18,10 @@ PERL_FIXUP_ROTATE=perl -i -pe 's/(ro[rl]\s+\w{2,3})$$/\1, 1/' C2GOASM=c2goasm -CC=clang +CC=clang-11 C_FLAGS=-target x86_64-unknown-none -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 \ -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -O3 -fno-builtin -ffast-math -fno-jump-tables -I_lib -ASM_FLAGS_AVX2=-mavx2 -mfma -mllvm -force-vector-width=32 +ASM_FLAGS_AVX2=-mavx2 -mfma ASM_FLAGS_SSE4=-msse4 ASM_FLAGS_BMI2=-mbmi2 ASM_FLAGS_POPCNT=-mpopcnt diff --git a/go/parquet/internal/utils/_lib/bit_packing_avx2.s b/go/parquet/internal/utils/_lib/bit_packing_avx2.s index 222bc3ce413..84a5cca2ea3 100644 --- a/go/parquet/internal/utils/_lib/bit_packing_avx2.s +++ b/go/parquet/internal/utils/_lib/bit_packing_avx2.s @@ -4007,6 +4007,6 @@ unpack32_avx2: # @unpack32_avx2 .Lfunc_end0: .size unpack32_avx2, .Lfunc_end0-unpack32_avx2 # -- End function - .ident "Ubuntu clang version 11.1.0-++20210204121720+1fdec59bffc1-1~exp1~20210203232336.162" + .ident "Debian clang version 11.1.0-++20210428103820+1fdec59bffc1-1~exp1~20210428204437.162" .section ".note.GNU-stack","",@progbits .addrsig diff --git a/go/parquet/internal/utils/_lib/min_max_avx2.s b/go/parquet/internal/utils/_lib/min_max_avx2.s index dbf9a895ae3..ec24a731d69 100644 --- a/go/parquet/internal/utils/_lib/min_max_avx2.s +++ b/go/parquet/internal/utils/_lib/min_max_avx2.s @@ -15,173 +15,89 @@ int32_max_min_avx2: # @int32_max_min_avx2 # %bb.0: push rbp mov rbp, rsp - and rsp, -32 - sub rsp, 64 + and rsp, -8 test esi, esi jle .LBB0_1 # %bb.2: mov r8d, esi cmp esi, 31 - ja .LBB0_6 + ja .LBB0_4 # %bb.3: - mov eax, -2147483648 - mov r9d, 2147483647 - xor r11d, r11d - jmp .LBB0_4 + mov r10d, -2147483648 + mov eax, 2147483647 + xor r9d, r9d + jmp .LBB0_7 .LBB0_1: - mov r9d, 2147483647 - mov eax, -2147483648 - jmp .LBB0_14 -.LBB0_6: - mov r11d, r8d - and r11d, -32 - lea rax, [r11 - 32] - mov r10, rax - shr r10, 5 - add r10, 1 - mov r9d, r10d - and r9d, 3 - cmp rax, 96 - jae .LBB0_8 -# %bb.7: - vpbroadcastd ymm0, dword ptr [rip + .LCPI0_0] # ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648] - vpbroadcastd ymm1, dword ptr [rip + .LCPI0_1] # ymm1 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647] - xor eax, eax - vmovdqa ymm2, ymm1 - vmovdqa ymm4, ymm1 - vmovdqa ymm6, ymm1 - vmovdqa ymm3, ymm0 - vmovdqa ymm5, ymm0 - vmovdqa ymm7, ymm0 - jmp .LBB0_10 -.LBB0_8: - and r10, -4 - vpbroadcastd ymm0, dword ptr [rip + .LCPI0_0] # ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648] - neg r10 - vpbroadcastd ymm1, dword ptr [rip + .LCPI0_1] # ymm1 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647] + mov eax, 2147483647 + mov esi, -2147483648 + jmp .LBB0_8 +.LBB0_4: + mov r9d, r8d + vpbroadcastd ymm4, dword ptr [rip + .LCPI0_0] # ymm4 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648] + and r9d, -32 + vpbroadcastd ymm0, dword ptr [rip + .LCPI0_1] # ymm0 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647] xor eax, eax - vmovdqa ymm2, ymm1 - vmovdqa ymm4, ymm1 - vmovdqa ymm6, ymm1 + vmovdqa ymm1, ymm0 + vmovdqa ymm2, ymm0 vmovdqa ymm3, ymm0 - vmovdqa ymm5, ymm0 - vmovdqa ymm7, ymm0 + vmovdqa ymm5, ymm4 + vmovdqa ymm6, ymm4 + vmovdqa ymm7, ymm4 .p2align 4, 0x90 -.LBB0_9: # =>This Inner Loop Header: Depth=1 +.LBB0_5: # =>This Inner Loop Header: Depth=1 vmovdqu ymm8, ymmword ptr [rdi + 4*rax] vmovdqu ymm9, ymmword ptr [rdi + 4*rax + 32] vmovdqu ymm10, ymmword ptr [rdi + 4*rax + 64] vmovdqu ymm11, ymmword ptr [rdi + 4*rax + 96] - vpminsd ymm6, ymm6, ymm11 - vpminsd ymm4, ymm4, ymm10 - vpminsd ymm1, ymm1, ymm8 - vpminsd ymm2, ymm2, ymm9 + vpminsd ymm0, ymm0, ymm8 + vpminsd ymm1, ymm1, ymm9 + vpminsd ymm2, ymm2, ymm10 + vpminsd ymm3, ymm3, ymm11 + vpmaxsd ymm4, ymm4, ymm8 + vpmaxsd ymm5, ymm5, ymm9 + vpmaxsd ymm6, ymm6, ymm10 vpmaxsd ymm7, ymm7, ymm11 - vpmaxsd ymm5, ymm5, ymm10 - vpmaxsd ymm0, ymm0, ymm8 - vpmaxsd ymm3, ymm3, ymm9 - vmovdqu ymm8, ymmword ptr [rdi + 4*rax + 224] - vmovdqu ymm9, ymmword ptr [rdi + 4*rax + 192] - vmovdqu ymm10, ymmword ptr [rdi + 4*rax + 128] - vmovdqu ymm11, ymmword ptr [rdi + 4*rax + 160] - vmovdqu ymm12, ymmword ptr [rdi + 4*rax + 256] - vmovdqu ymm13, ymmword ptr [rdi + 4*rax + 320] - vmovdqu ymm14, ymmword ptr [rdi + 4*rax + 352] - vpminsd ymm15, ymm8, ymm14 - vpminsd ymm6, ymm6, ymm15 - vmovdqa ymmword ptr [rsp], ymm6 # 32-byte Spill - vpminsd ymm15, ymm9, ymm13 - vpminsd ymm4, ymm4, ymm15 - vpminsd ymm15, ymm10, ymm12 - vpminsd ymm1, ymm1, ymm15 - vmovdqu ymm15, ymmword ptr [rdi + 4*rax + 288] - vpminsd ymm6, ymm11, ymm15 - vpminsd ymm2, ymm2, ymm6 - vpmaxsd ymm6, ymm8, ymm14 - vpmaxsd ymm7, ymm7, ymm6 - vpmaxsd ymm6, ymm9, ymm13 - vpmaxsd ymm5, ymm5, ymm6 - vpmaxsd ymm6, ymm10, ymm12 - vpmaxsd ymm0, ymm0, ymm6 - vpmaxsd ymm6, ymm11, ymm15 - vpmaxsd ymm3, ymm3, ymm6 - vmovdqu ymm6, ymmword ptr [rdi + 4*rax + 416] - vpminsd ymm2, ymm2, ymm6 - vpmaxsd ymm3, ymm3, ymm6 - vmovdqu ymm6, ymmword ptr [rdi + 4*rax + 384] - vpminsd ymm1, ymm1, ymm6 - vpmaxsd ymm0, ymm0, ymm6 - vmovdqu ymm6, ymmword ptr [rdi + 4*rax + 448] - vpminsd ymm4, ymm4, ymm6 - vpmaxsd ymm5, ymm5, ymm6 - vmovdqu ymm8, ymmword ptr [rdi + 4*rax + 480] - vpminsd ymm6, ymm8, ymmword ptr [rsp] # 32-byte Folded Reload - vpmaxsd ymm7, ymm7, ymm8 - sub rax, -128 - add r10, 4 - jne .LBB0_9 -.LBB0_10: - test r9, r9 - je .LBB0_13 -# %bb.11: - lea rax, [rdi + 4*rax] - neg r9 - .p2align 4, 0x90 -.LBB0_12: # =>This Inner Loop Header: Depth=1 - vmovdqu ymm8, ymmword ptr [rax] - vmovdqu ymm9, ymmword ptr [rax + 32] - vmovdqu ymm10, ymmword ptr [rax + 64] - vmovdqu ymm11, ymmword ptr [rax + 96] - vpminsd ymm2, ymm2, ymm9 - vpminsd ymm1, ymm1, ymm8 - vpminsd ymm4, ymm4, ymm10 - vpminsd ymm6, ymm6, ymm11 - vpmaxsd ymm3, ymm3, ymm9 - vpmaxsd ymm0, ymm0, ymm8 - vpmaxsd ymm5, ymm5, ymm10 - vpmaxsd ymm7, ymm7, ymm11 - sub rax, -128 - inc r9 - jne .LBB0_12 -.LBB0_13: - vpminsd ymm2, ymm2, ymm6 - vpminsd ymm1, ymm1, ymm4 - vpminsd ymm1, ymm1, ymm2 - vpmaxsd ymm2, ymm3, ymm7 - vpmaxsd ymm0, ymm0, ymm5 - vpmaxsd ymm0, ymm0, ymm2 - vextracti128 xmm2, ymm0, 1 - vpmaxsd xmm0, xmm0, xmm2 - vpshufd xmm2, xmm0, 78 # xmm2 = xmm0[2,3,0,1] - vpmaxsd xmm0, xmm0, xmm2 - vpshufd xmm2, xmm0, 229 # xmm2 = xmm0[1,1,2,3] - vpmaxsd xmm0, xmm0, xmm2 - vmovd eax, xmm0 - vextracti128 xmm0, ymm1, 1 - vpminsd xmm0, xmm1, xmm0 + add rax, 32 + cmp r9, rax + jne .LBB0_5 +# %bb.6: + vpmaxsd ymm4, ymm4, ymm5 + vpmaxsd ymm4, ymm4, ymm6 + vpmaxsd ymm4, ymm4, ymm7 + vextracti128 xmm5, ymm4, 1 + vpmaxsd xmm4, xmm4, xmm5 + vpshufd xmm5, xmm4, 78 # xmm5 = xmm4[2,3,0,1] + vpmaxsd xmm4, xmm4, xmm5 + vpshufd xmm5, xmm4, 229 # xmm5 = xmm4[1,1,2,3] + vpmaxsd xmm4, xmm4, xmm5 + vmovd r10d, xmm4 + vpminsd ymm0, ymm0, ymm1 + vpminsd ymm0, ymm0, ymm2 + vpminsd ymm0, ymm0, ymm3 + vextracti128 xmm1, ymm0, 1 + vpminsd xmm0, xmm0, xmm1 vpshufd xmm1, xmm0, 78 # xmm1 = xmm0[2,3,0,1] vpminsd xmm0, xmm0, xmm1 vpshufd xmm1, xmm0, 229 # xmm1 = xmm0[1,1,2,3] vpminsd xmm0, xmm0, xmm1 - vmovd r9d, xmm0 - cmp r11, r8 - je .LBB0_14 -.LBB0_4: - mov esi, eax + vmovd eax, xmm0 + mov esi, r10d + cmp r9, r8 + je .LBB0_8 .p2align 4, 0x90 -.LBB0_5: # =>This Inner Loop Header: Depth=1 - mov eax, dword ptr [rdi + 4*r11] - cmp r9d, eax - cmovg r9d, eax - cmp esi, eax - cmovge eax, esi - add r11, 1 - mov esi, eax - cmp r8, r11 - jne .LBB0_5 -.LBB0_14: - mov dword ptr [rcx], eax - mov dword ptr [rdx], r9d +.LBB0_7: # =>This Inner Loop Header: Depth=1 + mov esi, dword ptr [rdi + 4*r9] + cmp eax, esi + cmovg eax, esi + cmp r10d, esi + cmovge esi, r10d + add r9, 1 + mov r10d, esi + cmp r8, r9 + jne .LBB0_7 +.LBB0_8: + mov dword ptr [rcx], esi + mov dword ptr [rdx], eax mov rsp, rbp pop rbp vzeroupper @@ -196,173 +112,89 @@ uint32_max_min_avx2: # @uint32_max_min_avx2 # %bb.0: push rbp mov rbp, rsp - and rsp, -32 - sub rsp, 64 + and rsp, -8 test esi, esi jle .LBB1_1 # %bb.2: mov r8d, esi cmp esi, 31 - ja .LBB1_6 + ja .LBB1_4 # %bb.3: - xor r11d, r11d - mov r9d, -1 - xor esi, esi - jmp .LBB1_4 + xor r9d, r9d + mov eax, -1 + xor r10d, r10d + jmp .LBB1_7 .LBB1_1: - mov r9d, -1 + mov eax, -1 xor esi, esi - jmp .LBB1_14 -.LBB1_6: - mov r11d, r8d - and r11d, -32 - lea rax, [r11 - 32] - mov r10, rax - shr r10, 5 - add r10, 1 - mov r9d, r10d - and r9d, 3 - cmp rax, 96 - jae .LBB1_8 -# %bb.7: - vpxor xmm0, xmm0, xmm0 - vpcmpeqd ymm1, ymm1, ymm1 + jmp .LBB1_8 +.LBB1_4: + mov r9d, r8d + and r9d, -32 + vpxor xmm4, xmm4, xmm4 + vpcmpeqd ymm0, ymm0, ymm0 xor eax, eax - vpcmpeqd ymm2, ymm2, ymm2 - vpcmpeqd ymm4, ymm4, ymm4 - vpcmpeqd ymm6, ymm6, ymm6 - vpxor xmm3, xmm3, xmm3 - vpxor xmm5, xmm5, xmm5 - vpxor xmm7, xmm7, xmm7 - jmp .LBB1_10 -.LBB1_8: - and r10, -4 - neg r10 - vpxor xmm0, xmm0, xmm0 vpcmpeqd ymm1, ymm1, ymm1 - xor eax, eax vpcmpeqd ymm2, ymm2, ymm2 - vpcmpeqd ymm4, ymm4, ymm4 - vpcmpeqd ymm6, ymm6, ymm6 - vpxor xmm3, xmm3, xmm3 + vpcmpeqd ymm3, ymm3, ymm3 vpxor xmm5, xmm5, xmm5 + vpxor xmm6, xmm6, xmm6 vpxor xmm7, xmm7, xmm7 .p2align 4, 0x90 -.LBB1_9: # =>This Inner Loop Header: Depth=1 +.LBB1_5: # =>This Inner Loop Header: Depth=1 vmovdqu ymm8, ymmword ptr [rdi + 4*rax] vmovdqu ymm9, ymmword ptr [rdi + 4*rax + 32] vmovdqu ymm10, ymmword ptr [rdi + 4*rax + 64] vmovdqu ymm11, ymmword ptr [rdi + 4*rax + 96] - vpminud ymm6, ymm6, ymm11 - vpminud ymm4, ymm4, ymm10 - vpminud ymm1, ymm1, ymm8 - vpminud ymm2, ymm2, ymm9 + vpminud ymm0, ymm0, ymm8 + vpminud ymm1, ymm1, ymm9 + vpminud ymm2, ymm2, ymm10 + vpminud ymm3, ymm3, ymm11 + vpmaxud ymm4, ymm4, ymm8 + vpmaxud ymm5, ymm5, ymm9 + vpmaxud ymm6, ymm6, ymm10 vpmaxud ymm7, ymm7, ymm11 - vpmaxud ymm5, ymm5, ymm10 - vpmaxud ymm0, ymm0, ymm8 - vpmaxud ymm3, ymm3, ymm9 - vmovdqu ymm8, ymmword ptr [rdi + 4*rax + 224] - vmovdqu ymm9, ymmword ptr [rdi + 4*rax + 192] - vmovdqu ymm10, ymmword ptr [rdi + 4*rax + 128] - vmovdqu ymm11, ymmword ptr [rdi + 4*rax + 160] - vmovdqu ymm12, ymmword ptr [rdi + 4*rax + 256] - vmovdqu ymm13, ymmword ptr [rdi + 4*rax + 320] - vmovdqu ymm14, ymmword ptr [rdi + 4*rax + 352] - vpminud ymm15, ymm8, ymm14 - vpminud ymm6, ymm6, ymm15 - vmovdqa ymmword ptr [rsp], ymm6 # 32-byte Spill - vpminud ymm15, ymm9, ymm13 - vpminud ymm4, ymm4, ymm15 - vpminud ymm15, ymm10, ymm12 - vpminud ymm1, ymm1, ymm15 - vmovdqu ymm15, ymmword ptr [rdi + 4*rax + 288] - vpminud ymm6, ymm11, ymm15 - vpminud ymm2, ymm2, ymm6 - vpmaxud ymm6, ymm8, ymm14 - vpmaxud ymm7, ymm7, ymm6 - vpmaxud ymm6, ymm9, ymm13 - vpmaxud ymm5, ymm5, ymm6 - vpmaxud ymm6, ymm10, ymm12 - vpmaxud ymm0, ymm0, ymm6 - vpmaxud ymm6, ymm11, ymm15 - vpmaxud ymm3, ymm3, ymm6 - vmovdqu ymm6, ymmword ptr [rdi + 4*rax + 416] - vpminud ymm2, ymm2, ymm6 - vpmaxud ymm3, ymm3, ymm6 - vmovdqu ymm6, ymmword ptr [rdi + 4*rax + 384] - vpminud ymm1, ymm1, ymm6 - vpmaxud ymm0, ymm0, ymm6 - vmovdqu ymm6, ymmword ptr [rdi + 4*rax + 448] - vpminud ymm4, ymm4, ymm6 - vpmaxud ymm5, ymm5, ymm6 - vmovdqu ymm8, ymmword ptr [rdi + 4*rax + 480] - vpminud ymm6, ymm8, ymmword ptr [rsp] # 32-byte Folded Reload - vpmaxud ymm7, ymm7, ymm8 - sub rax, -128 - add r10, 4 - jne .LBB1_9 -.LBB1_10: - test r9, r9 - je .LBB1_13 -# %bb.11: - lea rax, [rdi + 4*rax] - neg r9 - .p2align 4, 0x90 -.LBB1_12: # =>This Inner Loop Header: Depth=1 - vmovdqu ymm8, ymmword ptr [rax] - vmovdqu ymm9, ymmword ptr [rax + 32] - vmovdqu ymm10, ymmword ptr [rax + 64] - vmovdqu ymm11, ymmword ptr [rax + 96] - vpminud ymm2, ymm2, ymm9 - vpminud ymm1, ymm1, ymm8 - vpminud ymm4, ymm4, ymm10 - vpminud ymm6, ymm6, ymm11 - vpmaxud ymm3, ymm3, ymm9 - vpmaxud ymm0, ymm0, ymm8 - vpmaxud ymm5, ymm5, ymm10 - vpmaxud ymm7, ymm7, ymm11 - sub rax, -128 - inc r9 - jne .LBB1_12 -.LBB1_13: - vpminud ymm2, ymm2, ymm6 - vpminud ymm1, ymm1, ymm4 - vpminud ymm1, ymm1, ymm2 - vpmaxud ymm2, ymm3, ymm7 - vpmaxud ymm0, ymm0, ymm5 - vpmaxud ymm0, ymm0, ymm2 - vextracti128 xmm2, ymm0, 1 - vpmaxud xmm0, xmm0, xmm2 - vpshufd xmm2, xmm0, 78 # xmm2 = xmm0[2,3,0,1] - vpmaxud xmm0, xmm0, xmm2 - vpshufd xmm2, xmm0, 229 # xmm2 = xmm0[1,1,2,3] - vpmaxud xmm0, xmm0, xmm2 - vmovd esi, xmm0 - vextracti128 xmm0, ymm1, 1 - vpminud xmm0, xmm1, xmm0 + add rax, 32 + cmp r9, rax + jne .LBB1_5 +# %bb.6: + vpmaxud ymm4, ymm4, ymm5 + vpmaxud ymm4, ymm4, ymm6 + vpmaxud ymm4, ymm4, ymm7 + vextracti128 xmm5, ymm4, 1 + vpmaxud xmm4, xmm4, xmm5 + vpshufd xmm5, xmm4, 78 # xmm5 = xmm4[2,3,0,1] + vpmaxud xmm4, xmm4, xmm5 + vpshufd xmm5, xmm4, 229 # xmm5 = xmm4[1,1,2,3] + vpmaxud xmm4, xmm4, xmm5 + vmovd r10d, xmm4 + vpminud ymm0, ymm0, ymm1 + vpminud ymm0, ymm0, ymm2 + vpminud ymm0, ymm0, ymm3 + vextracti128 xmm1, ymm0, 1 + vpminud xmm0, xmm0, xmm1 vpshufd xmm1, xmm0, 78 # xmm1 = xmm0[2,3,0,1] vpminud xmm0, xmm0, xmm1 vpshufd xmm1, xmm0, 229 # xmm1 = xmm0[1,1,2,3] vpminud xmm0, xmm0, xmm1 - vmovd r9d, xmm0 - cmp r11, r8 - je .LBB1_14 -.LBB1_4: - mov eax, esi + vmovd eax, xmm0 + mov esi, r10d + cmp r9, r8 + je .LBB1_8 .p2align 4, 0x90 -.LBB1_5: # =>This Inner Loop Header: Depth=1 - mov esi, dword ptr [rdi + 4*r11] - cmp r9d, esi - cmovae r9d, esi +.LBB1_7: # =>This Inner Loop Header: Depth=1 + mov esi, dword ptr [rdi + 4*r9] cmp eax, esi - cmova esi, eax - add r11, 1 - mov eax, esi - cmp r8, r11 - jne .LBB1_5 -.LBB1_14: + cmovae eax, esi + cmp r10d, esi + cmova esi, r10d + add r9, 1 + mov r10d, esi + cmp r8, r9 + jne .LBB1_7 +.LBB1_8: mov dword ptr [rcx], esi - mov dword ptr [rdx], r9d + mov dword ptr [rdx], eax mov rsp, rbp pop rbp vzeroupper @@ -384,387 +216,102 @@ int64_max_min_avx2: # @int64_max_min_avx2 # %bb.0: push rbp mov rbp, rsp - and rsp, -32 - sub rsp, 224 - movabs r9, 9223372036854775807 + and rsp, -8 + movabs rax, 9223372036854775807 test esi, esi jle .LBB2_1 # %bb.2: mov r8d, esi - cmp esi, 31 - ja .LBB2_6 + cmp esi, 15 + ja .LBB2_4 # %bb.3: - lea rsi, [r9 + 1] - xor r11d, r11d - jmp .LBB2_4 + lea r10, [rax + 1] + xor r9d, r9d + jmp .LBB2_7 .LBB2_1: - lea rsi, [r9 + 1] - jmp .LBB2_14 -.LBB2_6: - mov r11d, r8d - and r11d, -32 - lea rax, [r11 - 32] - mov r10, rax - shr r10, 5 - add r10, 1 - mov r9d, r10d - and r9d, 3 - cmp rax, 96 - jae .LBB2_8 -# %bb.7: - vpbroadcastq ymm15, qword ptr [rip + .LCPI2_0] # ymm15 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] - vpbroadcastq ymm11, qword ptr [rip + .LCPI2_1] # ymm11 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807] - xor eax, eax - vmovdqa ymmword ptr [rsp + 32], ymm11 # 32-byte Spill - vmovdqa ymm3, ymm11 - vmovdqa ymm9, ymm11 - vmovdqa ymm5, ymm11 - vmovdqa ymm4, ymm11 - vmovdqa ymm6, ymm11 - vmovdqa ymmword ptr [rsp + 96], ymm11 # 32-byte Spill - vmovdqa ymmword ptr [rsp + 64], ymm15 # 32-byte Spill - vmovdqa ymm2, ymm15 - vmovdqa ymm8, ymm15 - vmovdqa ymm12, ymm15 - vmovdqa ymm13, ymm15 - vmovdqa ymm14, ymm15 - vmovdqa ymmword ptr [rsp], ymm15 # 32-byte Spill - jmp .LBB2_10 -.LBB2_8: - and r10, -4 - vpbroadcastq ymm15, qword ptr [rip + .LCPI2_0] # ymm15 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] - neg r10 - vpbroadcastq ymm11, qword ptr [rip + .LCPI2_1] # ymm11 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807] + lea rsi, [rax + 1] + jmp .LBB2_8 +.LBB2_4: + mov r9d, r8d + vpbroadcastq ymm4, qword ptr [rip + .LCPI2_0] # ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] + and r9d, -16 + vpbroadcastq ymm0, qword ptr [rip + .LCPI2_1] # ymm0 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807] xor eax, eax - vmovdqa ymmword ptr [rsp + 32], ymm11 # 32-byte Spill - vmovdqa ymm3, ymm11 - vmovdqa ymm9, ymm11 - vmovdqa ymm5, ymm11 - vmovdqa ymm4, ymm11 - vmovdqa ymm6, ymm11 - vmovdqa ymmword ptr [rsp + 96], ymm11 # 32-byte Spill - vmovdqa ymmword ptr [rsp + 64], ymm15 # 32-byte Spill - vmovdqa ymm2, ymm15 - vmovdqa ymm8, ymm15 - vmovdqa ymm12, ymm15 - vmovdqa ymm13, ymm15 - vmovdqa ymm14, ymm15 - vmovdqa ymmword ptr [rsp], ymm15 # 32-byte Spill - .p2align 4, 0x90 -.LBB2_9: # =>This Inner Loop Header: Depth=1 - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 224] - vmovdqa ymm10, ymm8 - vmovdqa ymm8, ymm2 - vmovdqa ymm2, ymm3 - vmovdqa ymm3, ymm9 - vpcmpgtq ymm9, ymm0, ymm11 - vblendvpd ymm1, ymm0, ymm11, ymm9 - vmovapd ymmword ptr [rsp + 160], ymm1 # 32-byte Spill - vpcmpgtq ymm9, ymm15, ymm0 - vblendvpd ymm0, ymm0, ymm15, ymm9 - vmovapd ymmword ptr [rsp + 128], ymm0 # 32-byte Spill - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 192] - vpcmpgtq ymm9, ymm0, ymm6 - vblendvpd ymm7, ymm0, ymm6, ymm9 - vpcmpgtq ymm9, ymm14, ymm0 - vblendvpd ymm14, ymm0, ymm14, ymm9 - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 160] - vpcmpgtq ymm9, ymm0, ymm4 - vblendvpd ymm6, ymm0, ymm4, ymm9 - vpcmpgtq ymm9, ymm13, ymm0 - vblendvpd ymm13, ymm0, ymm13, ymm9 - vmovdqu ymm9, ymmword ptr [rdi + 8*rax + 128] - vpcmpgtq ymm0, ymm9, ymm5 - vblendvpd ymm1, ymm9, ymm5, ymm0 - vpcmpgtq ymm5, ymm12, ymm9 - vblendvpd ymm12, ymm9, ymm12, ymm5 - vmovdqu ymm5, ymmword ptr [rdi + 8*rax + 96] - vpcmpgtq ymm9, ymm5, ymm3 - vblendvpd ymm9, ymm5, ymm3, ymm9 - vpcmpgtq ymm4, ymm10, ymm5 - vblendvpd ymm10, ymm5, ymm10, ymm4 - vmovdqu ymm4, ymmword ptr [rdi + 8*rax + 64] - vpcmpgtq ymm5, ymm4, ymm2 - vblendvpd ymm5, ymm4, ymm2, ymm5 - vpcmpgtq ymm3, ymm8, ymm4 - vblendvpd ymm0, ymm4, ymm8, ymm3 - vmovdqu ymm2, ymmword ptr [rdi + 8*rax] - vmovdqa ymm4, ymmword ptr [rsp + 96] # 32-byte Reload - vpcmpgtq ymm3, ymm2, ymm4 - vblendvpd ymm3, ymm2, ymm4, ymm3 - vmovdqa ymm11, ymmword ptr [rsp] # 32-byte Reload - vpcmpgtq ymm4, ymm11, ymm2 - vblendvpd ymm4, ymm2, ymm11, ymm4 - vmovdqu ymm2, ymmword ptr [rdi + 8*rax + 32] - vmovdqa ymm15, ymmword ptr [rsp + 32] # 32-byte Reload - vpcmpgtq ymm11, ymm2, ymm15 - vblendvpd ymm11, ymm2, ymm15, ymm11 - vmovdqa ymm8, ymmword ptr [rsp + 64] # 32-byte Reload - vpcmpgtq ymm15, ymm8, ymm2 - vblendvpd ymm2, ymm2, ymm8, ymm15 - vmovdqu ymm8, ymmword ptr [rdi + 8*rax + 288] - vpcmpgtq ymm15, ymm8, ymm11 - vblendvpd ymm11, ymm8, ymm11, ymm15 - vmovapd ymmword ptr [rsp + 32], ymm11 # 32-byte Spill - vpcmpgtq ymm11, ymm2, ymm8 - vblendvpd ymm2, ymm8, ymm2, ymm11 - vmovapd ymmword ptr [rsp], ymm2 # 32-byte Spill - vmovdqu ymm11, ymmword ptr [rdi + 8*rax + 256] - vpcmpgtq ymm2, ymm11, ymm3 - vblendvpd ymm8, ymm11, ymm3, ymm2 - vpcmpgtq ymm3, ymm4, ymm11 - vblendvpd ymm3, ymm11, ymm4, ymm3 - vmovdqu ymm11, ymmword ptr [rdi + 8*rax + 320] - vpcmpgtq ymm4, ymm11, ymm5 - vblendvpd ymm4, ymm11, ymm5, ymm4 - vpcmpgtq ymm5, ymm0, ymm11 - vblendvpd ymm5, ymm11, ymm0, ymm5 - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 352] - vpcmpgtq ymm11, ymm0, ymm9 - vblendvpd ymm9, ymm0, ymm9, ymm11 - vpcmpgtq ymm11, ymm10, ymm0 - vblendvpd ymm10, ymm0, ymm10, ymm11 - vmovdqu ymm11, ymmword ptr [rdi + 8*rax + 384] - vpcmpgtq ymm0, ymm11, ymm1 - vblendvpd ymm2, ymm11, ymm1, ymm0 - vpcmpgtq ymm1, ymm12, ymm11 - vblendvpd ymm12, ymm11, ymm12, ymm1 - vmovdqu ymm1, ymmword ptr [rdi + 8*rax + 416] - vpcmpgtq ymm11, ymm1, ymm6 - vblendvpd ymm6, ymm1, ymm6, ymm11 - vpcmpgtq ymm11, ymm13, ymm1 - vblendvpd ymm1, ymm1, ymm13, ymm11 - vmovdqu ymm11, ymmword ptr [rdi + 8*rax + 448] - vpcmpgtq ymm13, ymm11, ymm7 - vblendvpd ymm7, ymm11, ymm7, ymm13 - vpcmpgtq ymm13, ymm14, ymm11 - vblendvpd ymm13, ymm11, ymm14, ymm13 - vmovdqu ymm11, ymmword ptr [rdi + 8*rax + 480] - vmovdqa ymm0, ymmword ptr [rsp + 160] # 32-byte Reload - vpcmpgtq ymm14, ymm11, ymm0 - vblendvpd ymm14, ymm11, ymm0, ymm14 - vmovdqa ymm0, ymmword ptr [rsp + 128] # 32-byte Reload - vpcmpgtq ymm15, ymm0, ymm11 - vblendvpd ymm15, ymm11, ymm0, ymm15 - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 736] - vpcmpgtq ymm11, ymm0, ymm14 - vblendvpd ymm11, ymm0, ymm14, ymm11 - vmovapd ymmword ptr [rsp + 160], ymm11 # 32-byte Spill - vpcmpgtq ymm14, ymm15, ymm0 - vblendvpd ymm0, ymm0, ymm15, ymm14 - vmovapd ymmword ptr [rsp + 128], ymm0 # 32-byte Spill - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 704] - vpcmpgtq ymm14, ymm0, ymm7 - vblendvpd ymm7, ymm0, ymm7, ymm14 - vpcmpgtq ymm14, ymm13, ymm0 - vblendvpd ymm14, ymm0, ymm13, ymm14 - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 672] - vpcmpgtq ymm13, ymm0, ymm6 - vblendvpd ymm6, ymm0, ymm6, ymm13 - vpcmpgtq ymm13, ymm1, ymm0 - vblendvpd ymm13, ymm0, ymm1, ymm13 - vmovdqu ymm1, ymmword ptr [rdi + 8*rax + 640] - vpcmpgtq ymm0, ymm1, ymm2 - vblendvpd ymm0, ymm1, ymm2, ymm0 - vpcmpgtq ymm2, ymm12, ymm1 - vblendvpd ymm12, ymm1, ymm12, ymm2 - vmovdqu ymm1, ymmword ptr [rdi + 8*rax + 608] - vpcmpgtq ymm2, ymm1, ymm9 - vblendvpd ymm9, ymm1, ymm9, ymm2 - vpcmpgtq ymm2, ymm10, ymm1 - vblendvpd ymm10, ymm1, ymm10, ymm2 - vmovdqu ymm1, ymmword ptr [rdi + 8*rax + 576] - vpcmpgtq ymm2, ymm1, ymm4 - vblendvpd ymm2, ymm1, ymm4, ymm2 - vpcmpgtq ymm4, ymm5, ymm1 - vblendvpd ymm1, ymm1, ymm5, ymm4 - vmovdqu ymm4, ymmword ptr [rdi + 8*rax + 512] - vpcmpgtq ymm5, ymm4, ymm8 - vblendvpd ymm5, ymm4, ymm8, ymm5 - vpcmpgtq ymm8, ymm3, ymm4 - vblendvpd ymm3, ymm4, ymm3, ymm8 - vmovdqu ymm4, ymmword ptr [rdi + 8*rax + 544] - vmovdqa ymm11, ymmword ptr [rsp + 32] # 32-byte Reload - vpcmpgtq ymm8, ymm4, ymm11 - vblendvpd ymm8, ymm4, ymm11, ymm8 - vmovdqa ymm15, ymmword ptr [rsp] # 32-byte Reload - vpcmpgtq ymm11, ymm15, ymm4 - vblendvpd ymm4, ymm4, ymm15, ymm11 - vmovdqu ymm11, ymmword ptr [rdi + 8*rax + 800] - vpcmpgtq ymm15, ymm11, ymm8 - vblendvpd ymm8, ymm11, ymm8, ymm15 - vmovapd ymmword ptr [rsp + 32], ymm8 # 32-byte Spill - vpcmpgtq ymm8, ymm4, ymm11 - vblendvpd ymm4, ymm11, ymm4, ymm8 - vmovapd ymmword ptr [rsp + 64], ymm4 # 32-byte Spill - vmovdqu ymm4, ymmword ptr [rdi + 8*rax + 768] - vpcmpgtq ymm11, ymm4, ymm5 - vblendvpd ymm5, ymm4, ymm5, ymm11 - vmovapd ymmword ptr [rsp + 96], ymm5 # 32-byte Spill - vpcmpgtq ymm5, ymm3, ymm4 - vblendvpd ymm3, ymm4, ymm3, ymm5 - vmovapd ymmword ptr [rsp], ymm3 # 32-byte Spill - vmovdqu ymm4, ymmword ptr [rdi + 8*rax + 832] - vpcmpgtq ymm3, ymm4, ymm2 - vblendvpd ymm3, ymm4, ymm2, ymm3 - vpcmpgtq ymm2, ymm1, ymm4 - vblendvpd ymm2, ymm4, ymm1, ymm2 - vmovdqu ymm1, ymmword ptr [rdi + 8*rax + 864] - vpcmpgtq ymm4, ymm1, ymm9 - vblendvpd ymm9, ymm1, ymm9, ymm4 - vpcmpgtq ymm5, ymm10, ymm1 - vblendvpd ymm8, ymm1, ymm10, ymm5 - vmovdqu ymm1, ymmword ptr [rdi + 8*rax + 896] - vpcmpgtq ymm5, ymm1, ymm0 - vblendvpd ymm5, ymm1, ymm0, ymm5 - vpcmpgtq ymm0, ymm12, ymm1 - vblendvpd ymm12, ymm1, ymm12, ymm0 - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 928] - vpcmpgtq ymm1, ymm0, ymm6 - vblendvpd ymm4, ymm0, ymm6, ymm1 - vpcmpgtq ymm1, ymm13, ymm0 - vblendvpd ymm13, ymm0, ymm13, ymm1 - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 960] - vpcmpgtq ymm1, ymm0, ymm7 - vblendvpd ymm6, ymm0, ymm7, ymm1 - vpcmpgtq ymm1, ymm14, ymm0 - vblendvpd ymm14, ymm0, ymm14, ymm1 - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 992] - vmovdqa ymm7, ymmword ptr [rsp + 160] # 32-byte Reload - vpcmpgtq ymm1, ymm0, ymm7 - vblendvpd ymm11, ymm0, ymm7, ymm1 - vmovdqa ymm7, ymmword ptr [rsp + 128] # 32-byte Reload - vpcmpgtq ymm1, ymm7, ymm0 - vblendvpd ymm15, ymm0, ymm7, ymm1 - sub rax, -128 - add r10, 4 - jne .LBB2_9 -.LBB2_10: - test r9, r9 - vmovdqa ymm7, ymm5 - vmovdqa ymm5, ymm9 - vmovdqa ymm9, ymmword ptr [rsp + 96] # 32-byte Reload - vmovdqa ymm10, ymm3 - je .LBB2_13 -# %bb.11: - lea rax, [rdi + 8*rax] - neg r9 + vmovdqa ymm3, ymm0 + vmovdqa ymm2, ymm0 + vmovdqa ymm1, ymm0 + vmovdqa ymm7, ymm4 + vmovdqa ymm6, ymm4 + vmovdqa ymm5, ymm4 .p2align 4, 0x90 -.LBB2_12: # =>This Inner Loop Header: Depth=1 - vmovdqu ymm0, ymmword ptr [rax + 32] - vmovdqa ymm3, ymmword ptr [rsp + 32] # 32-byte Reload - vpcmpgtq ymm1, ymm0, ymm3 - vblendvpd ymm3, ymm0, ymm3, ymm1 - vmovapd ymmword ptr [rsp + 32], ymm3 # 32-byte Spill - vmovdqa ymm3, ymmword ptr [rsp + 64] # 32-byte Reload - vpcmpgtq ymm1, ymm3, ymm0 - vblendvpd ymm3, ymm0, ymm3, ymm1 - vmovapd ymmword ptr [rsp + 64], ymm3 # 32-byte Spill - vmovdqu ymm0, ymmword ptr [rax] - vpcmpgtq ymm1, ymm0, ymm9 - vblendvpd ymm9, ymm0, ymm9, ymm1 - vmovdqa ymm3, ymmword ptr [rsp] # 32-byte Reload - vpcmpgtq ymm1, ymm3, ymm0 - vblendvpd ymm3, ymm0, ymm3, ymm1 - vmovapd ymmword ptr [rsp], ymm3 # 32-byte Spill - vmovdqu ymm0, ymmword ptr [rax + 64] - vpcmpgtq ymm1, ymm0, ymm10 - vblendvpd ymm10, ymm0, ymm10, ymm1 - vpcmpgtq ymm1, ymm2, ymm0 - vblendvpd ymm2, ymm0, ymm2, ymm1 - vmovdqu ymm0, ymmword ptr [rax + 96] - vpcmpgtq ymm1, ymm0, ymm5 - vblendvpd ymm5, ymm0, ymm5, ymm1 - vpcmpgtq ymm1, ymm8, ymm0 - vblendvpd ymm8, ymm0, ymm8, ymm1 - vmovdqu ymm0, ymmword ptr [rax + 128] - vpcmpgtq ymm1, ymm0, ymm7 - vblendvpd ymm7, ymm0, ymm7, ymm1 - vpcmpgtq ymm1, ymm12, ymm0 - vblendvpd ymm12, ymm0, ymm12, ymm1 - vmovdqu ymm0, ymmword ptr [rax + 160] - vpcmpgtq ymm1, ymm0, ymm4 - vblendvpd ymm4, ymm0, ymm4, ymm1 - vpcmpgtq ymm1, ymm13, ymm0 - vblendvpd ymm13, ymm0, ymm13, ymm1 - vmovdqu ymm0, ymmword ptr [rax + 192] - vpcmpgtq ymm1, ymm0, ymm6 - vblendvpd ymm6, ymm0, ymm6, ymm1 - vpcmpgtq ymm1, ymm14, ymm0 - vblendvpd ymm14, ymm0, ymm14, ymm1 - vmovdqu ymm0, ymmword ptr [rax + 224] - vpcmpgtq ymm1, ymm0, ymm11 - vblendvpd ymm11, ymm0, ymm11, ymm1 - vpcmpgtq ymm1, ymm15, ymm0 - vblendvpd ymm15, ymm0, ymm15, ymm1 - add rax, 256 - inc r9 - jne .LBB2_12 -.LBB2_13: - vmovdqa ymm1, ymmword ptr [rsp + 64] # 32-byte Reload - vpcmpgtq ymm0, ymm1, ymm13 - vblendvpd ymm0, ymm13, ymm1, ymm0 - vpcmpgtq ymm1, ymm8, ymm15 - vblendvpd ymm1, ymm15, ymm8, ymm1 - vmovdqa ymm3, ymmword ptr [rsp] # 32-byte Reload - vpcmpgtq ymm8, ymm3, ymm12 - vblendvpd ymm8, ymm12, ymm3, ymm8 - vmovdqa ymm3, ymm9 - vpcmpgtq ymm9, ymm2, ymm14 - vblendvpd ymm2, ymm14, ymm2, ymm9 - vpcmpgtq ymm9, ymm8, ymm2 - vblendvpd ymm2, ymm2, ymm8, ymm9 - vpcmpgtq ymm8, ymm0, ymm1 - vblendvpd ymm0, ymm1, ymm0, ymm8 - vpcmpgtq ymm1, ymm2, ymm0 - vblendvpd ymm0, ymm0, ymm2, ymm1 +.LBB2_5: # =>This Inner Loop Header: Depth=1 + vmovdqu ymm8, ymmword ptr [rdi + 8*rax] + vpcmpgtq ymm9, ymm8, ymm0 + vblendvpd ymm0, ymm8, ymm0, ymm9 + vmovdqu ymm9, ymmword ptr [rdi + 8*rax + 32] + vpcmpgtq ymm10, ymm9, ymm3 + vblendvpd ymm3, ymm9, ymm3, ymm10 + vmovdqu ymm10, ymmword ptr [rdi + 8*rax + 64] + vpcmpgtq ymm11, ymm10, ymm2 + vblendvpd ymm2, ymm10, ymm2, ymm11 + vmovdqu ymm11, ymmword ptr [rdi + 8*rax + 96] + vpcmpgtq ymm12, ymm11, ymm1 + vblendvpd ymm1, ymm11, ymm1, ymm12 + vpcmpgtq ymm12, ymm4, ymm8 + vblendvpd ymm4, ymm8, ymm4, ymm12 + vpcmpgtq ymm8, ymm7, ymm9 + vblendvpd ymm7, ymm9, ymm7, ymm8 + vpcmpgtq ymm8, ymm6, ymm10 + vblendvpd ymm6, ymm10, ymm6, ymm8 + vpcmpgtq ymm8, ymm5, ymm11 + vblendvpd ymm5, ymm11, ymm5, ymm8 + add rax, 16 + cmp r9, rax + jne .LBB2_5 +# %bb.6: + vpcmpgtq ymm8, ymm4, ymm7 + vblendvpd ymm4, ymm7, ymm4, ymm8 + vpcmpgtq ymm7, ymm4, ymm6 + vblendvpd ymm4, ymm6, ymm4, ymm7 + vpcmpgtq ymm6, ymm4, ymm5 + vblendvpd ymm4, ymm5, ymm4, ymm6 + vextractf128 xmm5, ymm4, 1 + vpcmpgtq xmm6, xmm4, xmm5 + vblendvpd xmm4, xmm5, xmm4, xmm6 + vpermilps xmm5, xmm4, 78 # xmm5 = xmm4[2,3,0,1] + vpcmpgtq xmm6, xmm4, xmm5 + vblendvpd xmm4, xmm5, xmm4, xmm6 + vmovq r10, xmm4 + vpcmpgtq ymm4, ymm3, ymm0 + vblendvpd ymm0, ymm3, ymm0, ymm4 + vpcmpgtq ymm3, ymm2, ymm0 + vblendvpd ymm0, ymm2, ymm0, ymm3 + vpcmpgtq ymm2, ymm1, ymm0 + vblendvpd ymm0, ymm1, ymm0, ymm2 vextractf128 xmm1, ymm0, 1 - vpcmpgtq xmm2, xmm0, xmm1 + vpcmpgtq xmm2, xmm1, xmm0 vblendvpd xmm0, xmm1, xmm0, xmm2 vpermilps xmm1, xmm0, 78 # xmm1 = xmm0[2,3,0,1] - vpcmpgtq xmm2, xmm0, xmm1 + vpcmpgtq xmm2, xmm1, xmm0 vblendvpd xmm0, xmm1, xmm0, xmm2 - vmovdqa ymm2, ymmword ptr [rsp + 32] # 32-byte Reload - vpcmpgtq ymm1, ymm4, ymm2 - vblendvpd ymm1, ymm4, ymm2, ymm1 - vpcmpgtq ymm2, ymm11, ymm5 - vblendvpd ymm2, ymm11, ymm5, ymm2 - vpcmpgtq ymm4, ymm7, ymm3 - vblendvpd ymm4, ymm7, ymm3, ymm4 - vpcmpgtq ymm5, ymm6, ymm10 - vblendvpd ymm3, ymm6, ymm10, ymm5 - vpcmpgtq ymm5, ymm3, ymm4 - vblendvpd ymm3, ymm3, ymm4, ymm5 - vpcmpgtq ymm4, ymm2, ymm1 - vblendvpd ymm1, ymm2, ymm1, ymm4 - vpcmpgtq ymm2, ymm1, ymm3 - vblendvpd ymm1, ymm1, ymm3, ymm2 - vextractf128 xmm2, ymm1, 1 - vpcmpgtq xmm3, xmm2, xmm1 - vblendvpd xmm1, xmm2, xmm1, xmm3 - vpermilps xmm2, xmm1, 78 # xmm2 = xmm1[2,3,0,1] - vpcmpgtq xmm3, xmm2, xmm1 - vblendvpd xmm1, xmm2, xmm1, xmm3 - vmovq rsi, xmm0 - vmovq r9, xmm1 - cmp r11, r8 - je .LBB2_14 -.LBB2_4: - mov rax, rsi + vmovq rax, xmm0 + mov rsi, r10 + cmp r9, r8 + je .LBB2_8 .p2align 4, 0x90 -.LBB2_5: # =>This Inner Loop Header: Depth=1 - mov rsi, qword ptr [rdi + 8*r11] - cmp r9, rsi - cmovg r9, rsi +.LBB2_7: # =>This Inner Loop Header: Depth=1 + mov rsi, qword ptr [rdi + 8*r9] cmp rax, rsi - cmovge rsi, rax - add r11, 1 - mov rax, rsi - cmp r8, r11 - jne .LBB2_5 -.LBB2_14: + cmovg rax, rsi + cmp r10, rsi + cmovge rsi, r10 + add r9, 1 + mov r10, rsi + cmp r8, r9 + jne .LBB2_7 +.LBB2_8: mov qword ptr [rcx], rsi - mov qword ptr [rdx], r9 + mov qword ptr [rdx], rax mov rsp, rbp pop rbp vzeroupper @@ -784,576 +331,136 @@ uint64_max_min_avx2: # @uint64_max_min_avx2 # %bb.0: push rbp mov rbp, rsp - and rsp, -32 - sub rsp, 288 + and rsp, -8 test esi, esi jle .LBB3_1 # %bb.2: mov r8d, esi - cmp esi, 31 - ja .LBB3_6 + cmp esi, 15 + ja .LBB3_4 # %bb.3: - mov r9, -1 - xor r11d, r11d - xor esi, esi - jmp .LBB3_4 + mov rax, -1 + xor r9d, r9d + xor r10d, r10d + jmp .LBB3_7 .LBB3_1: - mov r9, -1 + mov rax, -1 xor esi, esi - jmp .LBB3_14 -.LBB3_6: - mov r11d, r8d - and r11d, -32 - lea rax, [r11 - 32] - mov r10, rax - shr r10, 5 - add r10, 1 - mov r9d, r10d - and r9d, 3 - cmp rax, 96 - jae .LBB3_8 -# %bb.7: - vpxor xmm4, xmm4, xmm4 - vpcmpeqd ymm0, ymm0, ymm0 - vmovdqa ymmword ptr [rsp + 64], ymm0 # 32-byte Spill - xor eax, eax - vpcmpeqd ymm0, ymm0, ymm0 - vmovdqa ymmword ptr [rsp + 96], ymm0 # 32-byte Spill - vpcmpeqd ymm5, ymm5, ymm5 - vpcmpeqd ymm7, ymm7, ymm7 - vpcmpeqd ymm12, ymm12, ymm12 - vpcmpeqd ymm10, ymm10, ymm10 - vpcmpeqd ymm11, ymm11, ymm11 - vpcmpeqd ymm13, ymm13, ymm13 - vpxor xmm0, xmm0, xmm0 - vmovdqa ymmword ptr [rsp + 32], ymm0 # 32-byte Spill - vpxor xmm0, xmm0, xmm0 - vmovdqa ymmword ptr [rsp], ymm0 # 32-byte Spill - vpxor xmm3, xmm3, xmm3 - vpxor xmm9, xmm9, xmm9 - vpxor xmm8, xmm8, xmm8 - vpxor xmm15, xmm15, xmm15 - vpxor xmm0, xmm0, xmm0 - jmp .LBB3_10 -.LBB3_8: - and r10, -4 - neg r10 - vpxor xmm4, xmm4, xmm4 - vpcmpeqd ymm0, ymm0, ymm0 - vmovdqa ymmword ptr [rsp + 64], ymm0 # 32-byte Spill + jmp .LBB3_8 +.LBB3_4: + mov r9d, r8d + and r9d, -16 + vpxor xmm5, xmm5, xmm5 + vpcmpeqd ymm1, ymm1, ymm1 xor eax, eax - vpbroadcastq ymm14, qword ptr [rip + .LCPI3_0] # ymm14 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] - vpcmpeqd ymm0, ymm0, ymm0 - vmovdqa ymmword ptr [rsp + 96], ymm0 # 32-byte Spill - vpcmpeqd ymm5, ymm5, ymm5 - vpcmpeqd ymm7, ymm7, ymm7 - vpcmpeqd ymm12, ymm12, ymm12 - vpcmpeqd ymm10, ymm10, ymm10 - vpcmpeqd ymm11, ymm11, ymm11 - vpcmpeqd ymm13, ymm13, ymm13 - vpxor xmm0, xmm0, xmm0 - vmovdqa ymmword ptr [rsp + 32], ymm0 # 32-byte Spill - vpxor xmm0, xmm0, xmm0 - vmovdqa ymmword ptr [rsp], ymm0 # 32-byte Spill - vpxor xmm3, xmm3, xmm3 - vpxor xmm9, xmm9, xmm9 + vpbroadcastq ymm0, qword ptr [rip + .LCPI3_0] # ymm0 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] + vpcmpeqd ymm4, ymm4, ymm4 + vpcmpeqd ymm3, ymm3, ymm3 + vpcmpeqd ymm2, ymm2, ymm2 vpxor xmm8, xmm8, xmm8 - vpxor xmm15, xmm15, xmm15 - vpxor xmm0, xmm0, xmm0 + vpxor xmm7, xmm7, xmm7 + vpxor xmm6, xmm6, xmm6 .p2align 4, 0x90 -.LBB3_9: # =>This Inner Loop Header: Depth=1 - vmovdqu ymm1, ymmword ptr [rdi + 8*rax + 224] - vpxor ymm2, ymm14, ymm1 - vmovdqa ymm6, ymm3 - vpxor ymm3, ymm13, ymm14 - vpcmpgtq ymm3, ymm2, ymm3 - vblendvpd ymm3, ymm1, ymm13, ymm3 - vmovapd ymmword ptr [rsp + 128], ymm3 # 32-byte Spill - vpxor ymm3, ymm14, ymm0 - vpcmpgtq ymm2, ymm3, ymm2 - vblendvpd ymm0, ymm1, ymm0, ymm2 - vmovapd ymmword ptr [rsp + 224], ymm0 # 32-byte Spill - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 192] - vpxor ymm1, ymm14, ymm0 - vpxor ymm2, ymm11, ymm14 - vpcmpgtq ymm2, ymm1, ymm2 - vblendvpd ymm2, ymm0, ymm11, ymm2 - vmovapd ymmword ptr [rsp + 160], ymm2 # 32-byte Spill - vpxor ymm2, ymm15, ymm14 - vpcmpgtq ymm1, ymm2, ymm1 - vblendvpd ymm0, ymm0, ymm15, ymm1 - vmovapd ymmword ptr [rsp + 192], ymm0 # 32-byte Spill - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 160] - vpxor ymm1, ymm14, ymm0 - vpxor ymm2, ymm10, ymm14 - vpcmpgtq ymm2, ymm1, ymm2 - vmovdqa ymm3, ymm8 - vblendvpd ymm8, ymm0, ymm10, ymm2 - vpxor ymm2, ymm14, ymm3 - vpcmpgtq ymm1, ymm2, ymm1 - vblendvpd ymm13, ymm0, ymm3, ymm1 - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 128] - vpxor ymm2, ymm14, ymm0 - vpxor ymm1, ymm12, ymm14 - vpcmpgtq ymm1, ymm2, ymm1 - vblendvpd ymm1, ymm0, ymm12, ymm1 - vpxor ymm3, ymm9, ymm14 - vpcmpgtq ymm2, ymm3, ymm2 - vblendvpd ymm12, ymm0, ymm9, ymm2 - vmovdqu ymm2, ymmword ptr [rdi + 8*rax + 96] - vpxor ymm0, ymm14, ymm7 - vpxor ymm3, ymm14, ymm2 - vpcmpgtq ymm0, ymm3, ymm0 - vblendvpd ymm0, ymm2, ymm7, ymm0 - vmovdqa ymm15, ymm4 - vpxor ymm4, ymm14, ymm6 - vpcmpgtq ymm3, ymm4, ymm3 - vblendvpd ymm10, ymm2, ymm6, ymm3 - vmovdqu ymm2, ymmword ptr [rdi + 8*rax + 64] - vpxor ymm3, ymm14, ymm5 - vpxor ymm4, ymm14, ymm2 - vpcmpgtq ymm3, ymm4, ymm3 - vblendvpd ymm5, ymm2, ymm5, ymm3 - vmovdqa ymm6, ymmword ptr [rsp] # 32-byte Reload - vpxor ymm3, ymm14, ymm6 - vpcmpgtq ymm3, ymm3, ymm4 - vblendvpd ymm9, ymm2, ymm6, ymm3 - vmovdqu ymm2, ymmword ptr [rdi + 8*rax] - vmovdqa ymm7, ymmword ptr [rsp + 64] # 32-byte Reload - vpxor ymm3, ymm14, ymm7 - vpxor ymm4, ymm14, ymm2 - vpcmpgtq ymm3, ymm4, ymm3 - vblendvpd ymm3, ymm2, ymm7, ymm3 - vpxor ymm11, ymm15, ymm14 - vpcmpgtq ymm4, ymm11, ymm4 - vblendvpd ymm4, ymm2, ymm15, ymm4 - vmovdqu ymm2, ymmword ptr [rdi + 8*rax + 32] - vmovdqa ymm15, ymmword ptr [rsp + 96] # 32-byte Reload - vpxor ymm11, ymm15, ymm14 - vpxor ymm7, ymm14, ymm2 - vpcmpgtq ymm11, ymm7, ymm11 - vblendvpd ymm11, ymm2, ymm15, ymm11 - vmovdqa ymm6, ymmword ptr [rsp + 32] # 32-byte Reload - vpxor ymm15, ymm14, ymm6 - vpcmpgtq ymm7, ymm15, ymm7 - vblendvpd ymm2, ymm2, ymm6, ymm7 - vmovdqu ymm6, ymmword ptr [rdi + 8*rax + 288] - vxorpd ymm7, ymm11, ymm14 - vpxor ymm15, ymm14, ymm6 - vpcmpgtq ymm7, ymm15, ymm7 - vblendvpd ymm7, ymm6, ymm11, ymm7 - vmovapd ymmword ptr [rsp + 96], ymm7 # 32-byte Spill - vxorpd ymm7, ymm14, ymm2 - vpcmpgtq ymm7, ymm7, ymm15 - vblendvpd ymm2, ymm6, ymm2, ymm7 - vmovapd ymmword ptr [rsp + 64], ymm2 # 32-byte Spill - vmovdqu ymm6, ymmword ptr [rdi + 8*rax + 256] - vxorpd ymm7, ymm14, ymm3 - vpxor ymm11, ymm14, ymm6 - vpcmpgtq ymm7, ymm11, ymm7 - vblendvpd ymm2, ymm6, ymm3, ymm7 - vmovapd ymmword ptr [rsp], ymm2 # 32-byte Spill - vxorpd ymm7, ymm14, ymm4 - vpcmpgtq ymm7, ymm7, ymm11 - vblendvpd ymm2, ymm6, ymm4, ymm7 - vmovapd ymmword ptr [rsp + 32], ymm2 # 32-byte Spill - vmovdqu ymm6, ymmword ptr [rdi + 8*rax + 320] - vxorpd ymm7, ymm14, ymm5 - vpxor ymm11, ymm14, ymm6 - vpcmpgtq ymm7, ymm11, ymm7 +.LBB3_5: # =>This Inner Loop Header: Depth=1 + vmovdqu ymm9, ymmword ptr [rdi + 8*rax] + vpxor ymm10, ymm1, ymm0 + vpxor ymm11, ymm9, ymm0 + vpcmpgtq ymm10, ymm11, ymm10 + vblendvpd ymm1, ymm9, ymm1, ymm10 + vpxor ymm10, ymm5, ymm0 + vpcmpgtq ymm10, ymm10, ymm11 + vblendvpd ymm5, ymm9, ymm5, ymm10 + vmovdqu ymm9, ymmword ptr [rdi + 8*rax + 32] + vpxor ymm10, ymm4, ymm0 + vpxor ymm11, ymm9, ymm0 + vpcmpgtq ymm10, ymm11, ymm10 + vblendvpd ymm4, ymm9, ymm4, ymm10 + vpxor ymm10, ymm8, ymm0 + vpcmpgtq ymm10, ymm10, ymm11 + vmovdqu ymm11, ymmword ptr [rdi + 8*rax + 64] + vblendvpd ymm8, ymm9, ymm8, ymm10 + vpxor ymm9, ymm3, ymm0 + vpxor ymm10, ymm11, ymm0 + vpcmpgtq ymm9, ymm10, ymm9 + vblendvpd ymm3, ymm11, ymm3, ymm9 + vpxor ymm9, ymm7, ymm0 + vpcmpgtq ymm9, ymm9, ymm10 + vblendvpd ymm7, ymm11, ymm7, ymm9 + vmovdqu ymm9, ymmword ptr [rdi + 8*rax + 96] + vpxor ymm10, ymm2, ymm0 + vpxor ymm11, ymm9, ymm0 + vpcmpgtq ymm10, ymm11, ymm10 + vblendvpd ymm2, ymm9, ymm2, ymm10 + vpxor ymm10, ymm6, ymm0 + vpcmpgtq ymm10, ymm10, ymm11 + vblendvpd ymm6, ymm9, ymm6, ymm10 + add rax, 16 + cmp r9, rax + jne .LBB3_5 +# %bb.6: + vpxor ymm9, ymm8, ymm0 + vpxor ymm10, ymm5, ymm0 + vpcmpgtq ymm9, ymm10, ymm9 + vblendvpd ymm5, ymm8, ymm5, ymm9 + vxorpd ymm8, ymm5, ymm0 + vpxor ymm9, ymm7, ymm0 + vpcmpgtq ymm8, ymm8, ymm9 + vblendvpd ymm5, ymm7, ymm5, ymm8 + vxorpd ymm7, ymm5, ymm0 + vpxor ymm8, ymm6, ymm0 + vpcmpgtq ymm7, ymm7, ymm8 vblendvpd ymm5, ymm6, ymm5, ymm7 - vxorpd ymm7, ymm9, ymm14 - vpcmpgtq ymm7, ymm7, ymm11 - vblendvpd ymm7, ymm6, ymm9, ymm7 - vmovdqu ymm6, ymmword ptr [rdi + 8*rax + 352] - vxorpd ymm9, ymm14, ymm0 - vpxor ymm11, ymm14, ymm6 - vpcmpgtq ymm9, ymm11, ymm9 - vblendvpd ymm9, ymm6, ymm0, ymm9 - vxorpd ymm0, ymm10, ymm14 - vpcmpgtq ymm0, ymm0, ymm11 - vblendvpd ymm10, ymm6, ymm10, ymm0 - vmovdqu ymm6, ymmword ptr [rdi + 8*rax + 384] - vxorpd ymm0, ymm14, ymm1 - vpxor ymm11, ymm14, ymm6 - vpcmpgtq ymm0, ymm11, ymm0 - vblendvpd ymm4, ymm6, ymm1, ymm0 - vxorpd ymm1, ymm12, ymm14 - vpcmpgtq ymm1, ymm1, ymm11 - vblendvpd ymm3, ymm6, ymm12, ymm1 - vmovdqu ymm11, ymmword ptr [rdi + 8*rax + 416] - vxorpd ymm6, ymm8, ymm14 - vpxor ymm12, ymm11, ymm14 - vpcmpgtq ymm6, ymm12, ymm6 - vblendvpd ymm6, ymm11, ymm8, ymm6 - vxorpd ymm8, ymm13, ymm14 - vpcmpgtq ymm8, ymm8, ymm12 - vblendvpd ymm12, ymm11, ymm13, ymm8 - vmovdqu ymm11, ymmword ptr [rdi + 8*rax + 448] - vmovdqa ymm0, ymmword ptr [rsp + 160] # 32-byte Reload - vpxor ymm8, ymm14, ymm0 - vpxor ymm13, ymm11, ymm14 - vpcmpgtq ymm8, ymm13, ymm8 - vblendvpd ymm8, ymm11, ymm0, ymm8 - vmovdqa ymm0, ymmword ptr [rsp + 192] # 32-byte Reload - vpxor ymm15, ymm14, ymm0 - vpcmpgtq ymm13, ymm15, ymm13 - vblendvpd ymm13, ymm11, ymm0, ymm13 - vmovdqu ymm11, ymmword ptr [rdi + 8*rax + 480] - vmovdqa ymm1, ymmword ptr [rsp + 128] # 32-byte Reload - vpxor ymm15, ymm14, ymm1 - vpxor ymm0, ymm11, ymm14 - vpcmpgtq ymm15, ymm0, ymm15 - vblendvpd ymm1, ymm11, ymm1, ymm15 - vmovdqa ymm2, ymmword ptr [rsp + 224] # 32-byte Reload - vpxor ymm15, ymm14, ymm2 - vpcmpgtq ymm0, ymm15, ymm0 - vblendvpd ymm15, ymm11, ymm2, ymm0 - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 736] - vxorpd ymm11, ymm14, ymm1 - vpxor ymm2, ymm14, ymm0 - vpcmpgtq ymm11, ymm2, ymm11 - vblendvpd ymm1, ymm0, ymm1, ymm11 - vmovapd ymmword ptr [rsp + 128], ymm1 # 32-byte Spill - vxorpd ymm1, ymm15, ymm14 - vpcmpgtq ymm1, ymm1, ymm2 - vblendvpd ymm0, ymm0, ymm15, ymm1 - vmovapd ymmword ptr [rsp + 224], ymm0 # 32-byte Spill - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 704] - vxorpd ymm1, ymm8, ymm14 - vpxor ymm2, ymm14, ymm0 - vpcmpgtq ymm1, ymm2, ymm1 - vblendvpd ymm1, ymm0, ymm8, ymm1 - vmovapd ymmword ptr [rsp + 160], ymm1 # 32-byte Spill - vxorpd ymm1, ymm13, ymm14 - vpcmpgtq ymm1, ymm1, ymm2 - vblendvpd ymm0, ymm0, ymm13, ymm1 - vmovapd ymmword ptr [rsp + 192], ymm0 # 32-byte Spill - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 672] - vxorpd ymm1, ymm14, ymm6 - vpxor ymm2, ymm14, ymm0 - vpcmpgtq ymm1, ymm2, ymm1 - vblendvpd ymm15, ymm0, ymm6, ymm1 - vxorpd ymm1, ymm12, ymm14 - vpcmpgtq ymm1, ymm1, ymm2 - vblendvpd ymm13, ymm0, ymm12, ymm1 - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 640] - vxorpd ymm1, ymm14, ymm4 - vpxor ymm2, ymm14, ymm0 - vpcmpgtq ymm1, ymm2, ymm1 - vblendvpd ymm12, ymm0, ymm4, ymm1 - vxorpd ymm1, ymm14, ymm3 - vpcmpgtq ymm1, ymm1, ymm2 - vblendvpd ymm8, ymm0, ymm3, ymm1 - vmovdqu ymm2, ymmword ptr [rdi + 8*rax + 608] - vxorpd ymm1, ymm9, ymm14 - vpxor ymm3, ymm14, ymm2 - vpcmpgtq ymm1, ymm3, ymm1 - vblendvpd ymm1, ymm2, ymm9, ymm1 - vxorpd ymm4, ymm10, ymm14 - vpcmpgtq ymm3, ymm4, ymm3 - vblendvpd ymm10, ymm2, ymm10, ymm3 - vmovdqu ymm2, ymmword ptr [rdi + 8*rax + 576] - vxorpd ymm3, ymm14, ymm5 - vpxor ymm4, ymm14, ymm2 - vpcmpgtq ymm3, ymm4, ymm3 - vblendvpd ymm5, ymm2, ymm5, ymm3 - vxorpd ymm3, ymm14, ymm7 - vpcmpgtq ymm3, ymm3, ymm4 - vblendvpd ymm9, ymm2, ymm7, ymm3 - vmovdqu ymm2, ymmword ptr [rdi + 8*rax + 512] - vmovdqa ymm0, ymmword ptr [rsp] # 32-byte Reload - vpxor ymm3, ymm14, ymm0 - vpxor ymm4, ymm14, ymm2 - vpcmpgtq ymm3, ymm4, ymm3 - vblendvpd ymm3, ymm2, ymm0, ymm3 - vmovdqa ymm0, ymmword ptr [rsp + 32] # 32-byte Reload - vpxor ymm6, ymm14, ymm0 - vpcmpgtq ymm4, ymm6, ymm4 - vblendvpd ymm4, ymm2, ymm0, ymm4 - vmovdqu ymm2, ymmword ptr [rdi + 8*rax + 544] - vmovdqa ymm0, ymmword ptr [rsp + 96] # 32-byte Reload - vpxor ymm6, ymm14, ymm0 - vpxor ymm7, ymm14, ymm2 + vextractf128 xmm6, ymm5, 1 + vxorpd xmm8, xmm6, xmm0 + vxorpd xmm7, xmm5, xmm0 + vpcmpgtq xmm7, xmm7, xmm8 + vblendvpd xmm5, xmm6, xmm5, xmm7 + vpermilps xmm6, xmm5, 78 # xmm6 = xmm5[2,3,0,1] + vxorpd xmm8, xmm5, xmm0 + vxorpd xmm7, xmm6, xmm0 + vpcmpgtq xmm7, xmm8, xmm7 + vblendvpd xmm5, xmm6, xmm5, xmm7 + vpxor ymm6, ymm1, ymm0 + vpxor ymm7, ymm4, ymm0 vpcmpgtq ymm6, ymm7, ymm6 - vblendvpd ymm6, ymm2, ymm0, ymm6 - vmovdqa ymm0, ymmword ptr [rsp + 64] # 32-byte Reload - vpxor ymm11, ymm14, ymm0 - vpcmpgtq ymm7, ymm11, ymm7 - vblendvpd ymm2, ymm2, ymm0, ymm7 - vmovdqu ymm7, ymmword ptr [rdi + 8*rax + 800] - vxorpd ymm11, ymm14, ymm6 - vpxor ymm0, ymm14, ymm7 - vpcmpgtq ymm11, ymm0, ymm11 - vblendvpd ymm6, ymm7, ymm6, ymm11 - vmovapd ymmword ptr [rsp + 96], ymm6 # 32-byte Spill - vxorpd ymm6, ymm14, ymm2 - vpcmpgtq ymm0, ymm6, ymm0 - vblendvpd ymm0, ymm7, ymm2, ymm0 - vmovapd ymmword ptr [rsp + 32], ymm0 # 32-byte Spill - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 768] - vxorpd ymm2, ymm14, ymm3 - vpxor ymm7, ymm14, ymm0 - vpcmpgtq ymm2, ymm7, ymm2 - vblendvpd ymm2, ymm0, ymm3, ymm2 - vmovapd ymmword ptr [rsp + 64], ymm2 # 32-byte Spill - vxorpd ymm2, ymm14, ymm4 - vpcmpgtq ymm2, ymm2, ymm7 - vblendvpd ymm4, ymm0, ymm4, ymm2 - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 832] - vxorpd ymm2, ymm14, ymm5 - vpxor ymm3, ymm14, ymm0 - vpcmpgtq ymm2, ymm3, ymm2 - vblendvpd ymm5, ymm0, ymm5, ymm2 - vxorpd ymm2, ymm9, ymm14 - vpcmpgtq ymm2, ymm2, ymm3 - vblendvpd ymm0, ymm0, ymm9, ymm2 - vmovapd ymmword ptr [rsp], ymm0 # 32-byte Spill - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 864] - vxorpd ymm2, ymm14, ymm1 - vpxor ymm3, ymm14, ymm0 - vpcmpgtq ymm2, ymm3, ymm2 - vblendvpd ymm7, ymm0, ymm1, ymm2 - vxorpd ymm1, ymm10, ymm14 - vpcmpgtq ymm1, ymm1, ymm3 - vblendvpd ymm3, ymm0, ymm10, ymm1 - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 896] - vxorpd ymm1, ymm12, ymm14 - vpxor ymm2, ymm14, ymm0 - vpcmpgtq ymm1, ymm2, ymm1 - vblendvpd ymm12, ymm0, ymm12, ymm1 - vxorpd ymm1, ymm8, ymm14 - vpcmpgtq ymm1, ymm1, ymm2 - vblendvpd ymm9, ymm0, ymm8, ymm1 - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 928] - vxorpd ymm1, ymm15, ymm14 - vpxor ymm2, ymm14, ymm0 - vpcmpgtq ymm1, ymm2, ymm1 - vblendvpd ymm10, ymm0, ymm15, ymm1 - vxorpd ymm1, ymm13, ymm14 - vpcmpgtq ymm1, ymm1, ymm2 - vblendvpd ymm8, ymm0, ymm13, ymm1 - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 960] - vmovdqa ymm6, ymmword ptr [rsp + 160] # 32-byte Reload - vpxor ymm1, ymm14, ymm6 - vpxor ymm2, ymm14, ymm0 - vpcmpgtq ymm1, ymm2, ymm1 - vblendvpd ymm11, ymm0, ymm6, ymm1 - vmovdqa ymm6, ymmword ptr [rsp + 192] # 32-byte Reload - vpxor ymm1, ymm14, ymm6 - vpcmpgtq ymm1, ymm1, ymm2 - vblendvpd ymm15, ymm0, ymm6, ymm1 - vmovdqu ymm0, ymmword ptr [rdi + 8*rax + 992] - vmovdqa ymm6, ymmword ptr [rsp + 128] # 32-byte Reload - vpxor ymm1, ymm14, ymm6 - vpxor ymm2, ymm14, ymm0 - vpcmpgtq ymm1, ymm2, ymm1 - vblendvpd ymm13, ymm0, ymm6, ymm1 - vmovdqa ymm6, ymmword ptr [rsp + 224] # 32-byte Reload - vpxor ymm1, ymm14, ymm6 - vpcmpgtq ymm1, ymm1, ymm2 - vblendvpd ymm0, ymm0, ymm6, ymm1 - sub rax, -128 - add r10, 4 - jne .LBB3_9 -.LBB3_10: - vmovaps ymmword ptr [rsp + 128], ymm10 # 32-byte Spill - test r9, r9 - vmovdqa ymm10, ymm12 - vmovdqa ymm12, ymm3 - je .LBB3_13 -# %bb.11: - lea rax, [rdi + 8*rax] - neg r9 - vpbroadcastq ymm14, qword ptr [rip + .LCPI3_0] # ymm14 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] - .p2align 4, 0x90 -.LBB3_12: # =>This Inner Loop Header: Depth=1 - vmovdqu ymm1, ymmword ptr [rax + 32] - vmovdqa ymm6, ymm7 - vmovdqa ymm7, ymm5 - vmovdqa ymm5, ymm4 - vmovdqa ymm4, ymmword ptr [rsp + 96] # 32-byte Reload - vpxor ymm2, ymm14, ymm4 - vpxor ymm3, ymm14, ymm1 - vpcmpgtq ymm2, ymm3, ymm2 - vblendvpd ymm4, ymm1, ymm4, ymm2 - vmovapd ymmword ptr [rsp + 96], ymm4 # 32-byte Spill - vmovdqa ymm4, ymmword ptr [rsp + 32] # 32-byte Reload - vpxor ymm2, ymm14, ymm4 - vpcmpgtq ymm2, ymm2, ymm3 - vblendvpd ymm4, ymm1, ymm4, ymm2 - vmovapd ymmword ptr [rsp + 32], ymm4 # 32-byte Spill - vmovdqu ymm1, ymmword ptr [rax] - vmovdqa ymm4, ymmword ptr [rsp + 64] # 32-byte Reload - vpxor ymm2, ymm14, ymm4 - vpxor ymm3, ymm14, ymm1 - vpcmpgtq ymm2, ymm3, ymm2 - vblendvpd ymm4, ymm1, ymm4, ymm2 - vmovapd ymmword ptr [rsp + 64], ymm4 # 32-byte Spill - vmovdqa ymm4, ymm5 - vmovdqa ymm5, ymm7 - vmovdqa ymm7, ymm6 - vpxor ymm2, ymm14, ymm4 - vpcmpgtq ymm2, ymm2, ymm3 - vmovdqu ymm3, ymmword ptr [rax + 64] - vblendvpd ymm4, ymm1, ymm4, ymm2 - vpxor ymm1, ymm14, ymm3 - vpxor ymm2, ymm14, ymm5 - vpcmpgtq ymm2, ymm1, ymm2 - vblendvpd ymm5, ymm3, ymm5, ymm2 - vmovdqa ymm6, ymmword ptr [rsp] # 32-byte Reload - vpxor ymm2, ymm14, ymm6 - vpcmpgtq ymm1, ymm2, ymm1 - vblendvpd ymm6, ymm3, ymm6, ymm1 - vmovapd ymmword ptr [rsp], ymm6 # 32-byte Spill - vmovdqu ymm1, ymmword ptr [rax + 96] - vpxor ymm2, ymm14, ymm1 - vpxor ymm3, ymm14, ymm7 - vpcmpgtq ymm3, ymm2, ymm3 - vblendvpd ymm7, ymm1, ymm7, ymm3 - vpxor ymm3, ymm12, ymm14 - vpcmpgtq ymm2, ymm3, ymm2 - vmovdqu ymm3, ymmword ptr [rax + 128] - vblendvpd ymm12, ymm1, ymm12, ymm2 - vpxor ymm1, ymm14, ymm3 - vpxor ymm2, ymm10, ymm14 - vpcmpgtq ymm2, ymm1, ymm2 - vblendvpd ymm10, ymm3, ymm10, ymm2 - vpxor ymm2, ymm9, ymm14 - vpcmpgtq ymm1, ymm2, ymm1 - vblendvpd ymm9, ymm3, ymm9, ymm1 - vmovdqu ymm1, ymmword ptr [rax + 160] - vpxor ymm2, ymm14, ymm1 - vmovdqa ymm6, ymmword ptr [rsp + 128] # 32-byte Reload - vpxor ymm3, ymm14, ymm6 - vpcmpgtq ymm3, ymm2, ymm3 - vblendvpd ymm6, ymm1, ymm6, ymm3 - vmovapd ymmword ptr [rsp + 128], ymm6 # 32-byte Spill - vpxor ymm3, ymm8, ymm14 - vpcmpgtq ymm2, ymm3, ymm2 - vmovdqu ymm3, ymmword ptr [rax + 192] - vblendvpd ymm8, ymm1, ymm8, ymm2 - vpxor ymm1, ymm14, ymm3 - vpxor ymm2, ymm11, ymm14 - vpcmpgtq ymm2, ymm1, ymm2 - vblendvpd ymm11, ymm3, ymm11, ymm2 - vpxor ymm2, ymm15, ymm14 - vpcmpgtq ymm1, ymm2, ymm1 - vblendvpd ymm15, ymm3, ymm15, ymm1 - vmovdqu ymm1, ymmword ptr [rax + 224] - vpxor ymm2, ymm14, ymm1 - vpxor ymm3, ymm13, ymm14 - vpcmpgtq ymm3, ymm2, ymm3 - vblendvpd ymm13, ymm1, ymm13, ymm3 - vpxor ymm3, ymm14, ymm0 - vpcmpgtq ymm2, ymm3, ymm2 - vblendvpd ymm0, ymm1, ymm0, ymm2 - add rax, 256 - inc r9 - jne .LBB3_12 -.LBB3_13: - vpbroadcastq ymm14, qword ptr [rip + .LCPI3_0] # ymm14 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] - vmovdqa ymm3, ymmword ptr [rsp] # 32-byte Reload - vpxor ymm1, ymm14, ymm3 - vpxor ymm2, ymm15, ymm14 - vpcmpgtq ymm1, ymm1, ymm2 - vblendvpd ymm1, ymm15, ymm3, ymm1 - vpxor ymm2, ymm14, ymm4 - vpxor ymm3, ymm9, ymm14 - vpcmpgtq ymm2, ymm2, ymm3 - vblendvpd ymm2, ymm9, ymm4, ymm2 - vpxor ymm3, ymm12, ymm14 - vpxor ymm9, ymm14, ymm0 - vpcmpgtq ymm3, ymm3, ymm9 - vblendvpd ymm0, ymm0, ymm12, ymm3 - vmovdqa ymm4, ymmword ptr [rsp + 32] # 32-byte Reload - vpxor ymm3, ymm14, ymm4 - vpxor ymm9, ymm8, ymm14 - vpcmpgtq ymm3, ymm3, ymm9 - vblendvpd ymm3, ymm8, ymm4, ymm3 - vxorpd ymm6, ymm14, ymm3 - vxorpd ymm9, ymm14, ymm0 - vpcmpgtq ymm6, ymm6, ymm9 - vblendvpd ymm0, ymm0, ymm3, ymm6 - vxorpd ymm3, ymm14, ymm2 - vxorpd ymm6, ymm14, ymm1 - vpcmpgtq ymm3, ymm3, ymm6 - vblendvpd ymm1, ymm1, ymm2, ymm3 - vxorpd ymm2, ymm14, ymm1 - vxorpd ymm3, ymm14, ymm0 - vpcmpgtq ymm2, ymm2, ymm3 - vblendvpd ymm0, ymm0, ymm1, ymm2 - vextractf128 xmm1, ymm0, 1 - vxorpd xmm2, xmm14, xmm1 - vxorpd xmm3, xmm14, xmm0 - vpcmpgtq xmm2, xmm3, xmm2 - vblendvpd xmm0, xmm1, xmm0, xmm2 - vpermilps xmm1, xmm0, 78 # xmm1 = xmm0[2,3,0,1] - vxorpd xmm2, xmm14, xmm0 - vxorpd xmm3, xmm14, xmm1 - vpcmpgtq xmm2, xmm2, xmm3 - vblendvpd xmm0, xmm1, xmm0, xmm2 - vpxor ymm1, ymm14, ymm5 - vpxor ymm2, ymm11, ymm14 - vpcmpgtq ymm1, ymm2, ymm1 - vblendvpd ymm1, ymm11, ymm5, ymm1 - vmovdqa ymm4, ymmword ptr [rsp + 64] # 32-byte Reload - vpxor ymm2, ymm14, ymm4 - vpxor ymm3, ymm10, ymm14 - vpcmpgtq ymm2, ymm3, ymm2 - vblendvpd ymm2, ymm10, ymm4, ymm2 - vpxor ymm3, ymm14, ymm7 - vpxor ymm5, ymm13, ymm14 - vpcmpgtq ymm3, ymm5, ymm3 - vblendvpd ymm3, ymm13, ymm7, ymm3 - vmovdqa ymm6, ymmword ptr [rsp + 96] # 32-byte Reload - vpxor ymm4, ymm14, ymm6 - vmovdqa ymm7, ymmword ptr [rsp + 128] # 32-byte Reload - vpxor ymm5, ymm14, ymm7 - vpcmpgtq ymm4, ymm5, ymm4 - vblendvpd ymm4, ymm7, ymm6, ymm4 - vxorpd ymm5, ymm14, ymm4 - vxorpd ymm6, ymm14, ymm3 - vpcmpgtq ymm5, ymm6, ymm5 - vblendvpd ymm3, ymm3, ymm4, ymm5 - vxorpd ymm4, ymm14, ymm2 - vxorpd ymm5, ymm14, ymm1 - vpcmpgtq ymm4, ymm5, ymm4 - vblendvpd ymm1, ymm1, ymm2, ymm4 - vxorpd ymm2, ymm14, ymm1 - vxorpd ymm4, ymm14, ymm3 - vpcmpgtq ymm2, ymm4, ymm2 - vblendvpd ymm1, ymm3, ymm1, ymm2 + vblendvpd ymm1, ymm4, ymm1, ymm6 + vxorpd ymm4, ymm1, ymm0 + vpxor ymm6, ymm3, ymm0 + vpcmpgtq ymm4, ymm6, ymm4 + vblendvpd ymm1, ymm3, ymm1, ymm4 + vmovq r10, xmm5 + vxorpd ymm3, ymm1, ymm0 + vpxor ymm4, ymm2, ymm0 + vpcmpgtq ymm3, ymm4, ymm3 + vblendvpd ymm1, ymm2, ymm1, ymm3 vextractf128 xmm2, ymm1, 1 - vxorpd xmm3, xmm14, xmm1 - vxorpd xmm4, xmm14, xmm2 + vxorpd xmm3, xmm1, xmm0 + vxorpd xmm4, xmm2, xmm0 vpcmpgtq xmm3, xmm4, xmm3 vblendvpd xmm1, xmm2, xmm1, xmm3 vpermilps xmm2, xmm1, 78 # xmm2 = xmm1[2,3,0,1] - vxorpd xmm3, xmm14, xmm1 - vxorpd xmm4, xmm14, xmm2 - vpcmpgtq xmm3, xmm4, xmm3 - vblendvpd xmm1, xmm2, xmm1, xmm3 - vmovq rsi, xmm0 - vmovq r9, xmm1 - cmp r11, r8 - je .LBB3_14 -.LBB3_4: - mov rax, rsi + vxorpd xmm3, xmm1, xmm0 + vxorpd xmm0, xmm2, xmm0 + vpcmpgtq xmm0, xmm0, xmm3 + vblendvpd xmm0, xmm2, xmm1, xmm0 + vmovq rax, xmm0 + mov rsi, r10 + cmp r9, r8 + je .LBB3_8 .p2align 4, 0x90 -.LBB3_5: # =>This Inner Loop Header: Depth=1 - mov rsi, qword ptr [rdi + 8*r11] - cmp r9, rsi - cmovae r9, rsi +.LBB3_7: # =>This Inner Loop Header: Depth=1 + mov rsi, qword ptr [rdi + 8*r9] cmp rax, rsi - cmova rsi, rax - add r11, 1 - mov rax, rsi - cmp r8, r11 - jne .LBB3_5 -.LBB3_14: + cmovae rax, rsi + cmp r10, rsi + cmova rsi, r10 + add r9, 1 + mov r10, rsi + cmp r8, r9 + jne .LBB3_7 +.LBB3_8: mov qword ptr [rcx], rsi - mov qword ptr [rdx], r9 + mov qword ptr [rdx], rax mov rsp, rbp pop rbp vzeroupper @@ -1361,6 +468,6 @@ uint64_max_min_avx2: # @uint64_max_min_avx2 .Lfunc_end3: .size uint64_max_min_avx2, .Lfunc_end3-uint64_max_min_avx2 # -- End function - .ident "Ubuntu clang version 11.1.0-++20210204121720+1fdec59bffc1-1~exp1~20210203232336.162" + .ident "Debian clang version 11.1.0-++20210428103820+1fdec59bffc1-1~exp1~20210428204437.162" .section ".note.GNU-stack","",@progbits .addrsig diff --git a/go/parquet/internal/utils/_lib/min_max_sse4.s b/go/parquet/internal/utils/_lib/min_max_sse4.s index 98f30e3ed1d..893a0a73f02 100644 --- a/go/parquet/internal/utils/_lib/min_max_sse4.s +++ b/go/parquet/internal/utils/_lib/min_max_sse4.s @@ -608,6 +608,6 @@ uint64_max_min_sse4: # @uint64_max_min_sse4 .Lfunc_end3: .size uint64_max_min_sse4, .Lfunc_end3-uint64_max_min_sse4 # -- End function - .ident "Ubuntu clang version 11.1.0-++20210204121720+1fdec59bffc1-1~exp1~20210203232336.162" + .ident "Debian clang version 11.1.0-++20210428103820+1fdec59bffc1-1~exp1~20210428204437.162" .section ".note.GNU-stack","",@progbits .addrsig diff --git a/go/parquet/internal/utils/_lib/unpack_bool_avx2.s b/go/parquet/internal/utils/_lib/unpack_bool_avx2.s index 1bc1be53d4d..6ac34887c00 100644 --- a/go/parquet/internal/utils/_lib/unpack_bool_avx2.s +++ b/go/parquet/internal/utils/_lib/unpack_bool_avx2.s @@ -1,6293 +1,104 @@ .text .intel_syntax noprefix .file "unpack_bool.c" - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5 # -- Begin function bytes_to_bools_avx2 -.LCPI0_0: - .long 24 # 0x18 - .long 25 # 0x19 - .long 26 # 0x1a - .long 27 # 0x1b - .long 28 # 0x1c - .long 29 # 0x1d - .long 30 # 0x1e - .long 31 # 0x1f -.LCPI0_1: - .long 16 # 0x10 - .long 17 # 0x11 - .long 18 # 0x12 - .long 19 # 0x13 - .long 20 # 0x14 - .long 21 # 0x15 - .long 22 # 0x16 - .long 23 # 0x17 -.LCPI0_2: - .long 8 # 0x8 - .long 9 # 0x9 - .long 10 # 0xa - .long 11 # 0xb - .long 12 # 0xc - .long 13 # 0xd - .long 14 # 0xe - .long 15 # 0xf -.LCPI0_3: - .long 0 # 0x0 - .long 1 # 0x1 - .long 2 # 0x2 - .long 3 # 0x3 - .long 4 # 0x4 - .long 5 # 0x5 - .long 6 # 0x6 - .long 7 # 0x7 -.LCPI0_4: - .zero 32,1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3 -.LCPI0_5: - .quad 1 # 0x1 -.LCPI0_6: - .quad 2 # 0x2 -.LCPI0_7: - .quad 3 # 0x3 -.LCPI0_8: - .quad 4 # 0x4 -.LCPI0_9: - .quad 5 # 0x5 -.LCPI0_10: - .quad 6 # 0x6 -.LCPI0_11: - .quad 7 # 0x7 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2 -.LCPI0_12: - .long 32 # 0x20 - .text - .globl bytes_to_bools_avx2 + .globl bytes_to_bools_avx2 # -- Begin function bytes_to_bools_avx2 .p2align 4, 0x90 .type bytes_to_bools_avx2,@function bytes_to_bools_avx2: # @bytes_to_bools_avx2 # %bb.0: push rbp mov rbp, rsp - push r15 - push r14 - push r13 - push r12 - push rbx - and rsp, -32 - sub rsp, 960 + and rsp, -8 test esi, esi - jle .LBB0_1051 + jle .LBB0_5 # %bb.1: - mov r9d, ecx - mov r8, rdx - mov r10d, esi - cmp esi, 32 - jae .LBB0_3 -.LBB0_2: - xor r12d, r12d -.LBB0_1055: - lea ecx, [8*r12] - jmp .LBB0_1057 - .p2align 4, 0x90 -.LBB0_1056: # in Loop: Header=BB0_1057 Depth=1 - add r12, 1 - add ecx, 8 - cmp r10, r12 - je .LBB0_1051 -.LBB0_1057: # =>This Inner Loop Header: Depth=1 - mov edx, ecx - mov ecx, ecx - cmp edx, r9d - jge .LBB0_1056 -# %bb.1058: # in Loop: Header=BB0_1057 Depth=1 - movzx edx, byte ptr [rdi + r12] - and dl, 1 - mov byte ptr [r8 + rcx], dl - mov rdx, rcx - or rdx, 1 - cmp edx, r9d - jge .LBB0_1056 -# %bb.1059: # in Loop: Header=BB0_1057 Depth=1 - movzx ebx, byte ptr [rdi + r12] - shr bl - and bl, 1 - mov byte ptr [r8 + rdx], bl - mov rdx, rcx - or rdx, 2 - cmp edx, r9d - jge .LBB0_1056 -# %bb.1060: # in Loop: Header=BB0_1057 Depth=1 - movzx ebx, byte ptr [rdi + r12] - shr bl, 2 - and bl, 1 - mov byte ptr [r8 + rdx], bl - mov rdx, rcx - or rdx, 3 - cmp edx, r9d - jge .LBB0_1056 -# %bb.1061: # in Loop: Header=BB0_1057 Depth=1 - movzx ebx, byte ptr [rdi + r12] - shr bl, 3 - and bl, 1 - mov byte ptr [r8 + rdx], bl - mov rdx, rcx - or rdx, 4 - cmp edx, r9d - jge .LBB0_1056 -# %bb.1062: # in Loop: Header=BB0_1057 Depth=1 - movzx ebx, byte ptr [rdi + r12] - shr bl, 4 - and bl, 1 - mov byte ptr [r8 + rdx], bl - mov rdx, rcx - or rdx, 5 - cmp edx, r9d - jge .LBB0_1056 -# %bb.1063: # in Loop: Header=BB0_1057 Depth=1 - movzx ebx, byte ptr [rdi + r12] - shr bl, 5 - and bl, 1 - mov byte ptr [r8 + rdx], bl - mov rdx, rcx - or rdx, 6 - cmp edx, r9d - jge .LBB0_1056 -# %bb.1064: # in Loop: Header=BB0_1057 Depth=1 - movzx ebx, byte ptr [rdi + r12] - shr bl, 6 - and bl, 1 - mov byte ptr [r8 + rdx], bl - mov rdx, rcx - or rdx, 7 - cmp edx, r9d - jge .LBB0_1056 -# %bb.1065: # in Loop: Header=BB0_1057 Depth=1 - movzx ebx, byte ptr [rdi + r12] - shr bl, 7 - mov byte ptr [r8 + rdx], bl - jmp .LBB0_1056 -.LBB0_3: - mov dword ptr [rsp + 16], r9d # 4-byte Spill - mov qword ptr [rsp + 48], r10 # 8-byte Spill - lea rsi, [r10 - 1] - mov ecx, 8 - mov eax, esi - mul ecx - seto r14b - mov rbx, rsi - shr rbx, 32 - lea rcx, [r8 + 6] - mov edx, 8 - mov rax, rsi - mul rdx - seto sil - add rcx, rax - setb dl - lea rcx, [r8 + 7] - add rcx, rax - setb r13b - lea rcx, [r8 + 5] - add rcx, rax - setb r9b - lea rcx, [r8 + 4] - add rcx, rax - setb r15b - lea rcx, [r8 + 3] - add rcx, rax - setb r11b - lea rcx, [r8 + 2] - add rcx, rax - setb r10b - lea rcx, [r8 + 1] - add rcx, rax - setb cl - add rax, r8 - setb al - xor r12d, r12d - test rbx, rbx - jne .LBB0_1052 -# %bb.4: - test r14b, r14b - jne .LBB0_1052 -# %bb.5: - test dl, dl - jne .LBB0_1052 -# %bb.6: - test sil, sil - jne .LBB0_1052 -# %bb.7: - test r13b, r13b - jne .LBB0_1052 -# %bb.8: - test sil, sil - jne .LBB0_1052 -# %bb.9: - test r9b, r9b - jne .LBB0_1052 -# %bb.10: - test sil, sil - jne .LBB0_1052 -# %bb.11: - test r15b, r15b - jne .LBB0_1052 -# %bb.12: - test sil, sil - jne .LBB0_1052 -# %bb.13: - test r11b, r11b - jne .LBB0_1052 -# %bb.14: - test sil, sil - jne .LBB0_1052 -# %bb.15: - test r10b, r10b - jne .LBB0_1052 -# %bb.16: - test sil, sil - mov r10, qword ptr [rsp + 48] # 8-byte Reload - jne .LBB0_1054 -# %bb.17: - test cl, cl - jne .LBB0_1054 -# %bb.18: - test sil, sil - mov r9d, dword ptr [rsp + 16] # 4-byte Reload - jne .LBB0_1055 -# %bb.19: - test al, al - jne .LBB0_1055 -# %bb.20: - test sil, sil - jne .LBB0_1055 -# %bb.21: - lea rax, [r8 + 8*r10] - cmp rax, rdi - jbe .LBB0_24 -# %bb.22: - lea rax, [rdi + r10] - cmp rax, r8 - ja .LBB0_2 -.LBB0_24: - mov r12d, r10d - and r12d, -32 - vmovd xmm0, r9d - vpbroadcastd ymm0, xmm0 - vmovdqa ymm9, ymmword ptr [rip + .LCPI0_0] # ymm9 = [24,25,26,27,28,29,30,31] - vmovdqa ymm8, ymmword ptr [rip + .LCPI0_1] # ymm8 = [16,17,18,19,20,21,22,23] - vmovdqa ymm3, ymmword ptr [rip + .LCPI0_2] # ymm3 = [8,9,10,11,12,13,14,15] - vmovdqa ymm2, ymmword ptr [rip + .LCPI0_3] # ymm2 = [0,1,2,3,4,5,6,7] - xor r11d, r11d - vbroadcastsd ymm1, qword ptr [rip + .LCPI0_5] # ymm1 = [1,1,1,1] - vmovaps ymmword ptr [rsp + 768], ymm1 # 32-byte Spill - vbroadcastsd ymm1, qword ptr [rip + .LCPI0_6] # ymm1 = [2,2,2,2] - vmovaps ymmword ptr [rsp + 736], ymm1 # 32-byte Spill - vbroadcastsd ymm1, qword ptr [rip + .LCPI0_7] # ymm1 = [3,3,3,3] - vmovaps ymmword ptr [rsp + 704], ymm1 # 32-byte Spill - vbroadcastsd ymm1, qword ptr [rip + .LCPI0_8] # ymm1 = [4,4,4,4] - vmovaps ymmword ptr [rsp + 672], ymm1 # 32-byte Spill - vbroadcastsd ymm1, qword ptr [rip + .LCPI0_9] # ymm1 = [5,5,5,5] - vmovaps ymmword ptr [rsp + 640], ymm1 # 32-byte Spill - vbroadcastsd ymm1, qword ptr [rip + .LCPI0_10] # ymm1 = [6,6,6,6] - vmovaps ymmword ptr [rsp + 608], ymm1 # 32-byte Spill - vbroadcastsd ymm1, qword ptr [rip + .LCPI0_11] # ymm1 = [7,7,7,7] - vmovaps ymmword ptr [rsp + 576], ymm1 # 32-byte Spill - vpbroadcastd ymm1, dword ptr [rip + .LCPI0_12] # ymm1 = [32,32,32,32,32,32,32,32] - vmovdqa ymmword ptr [rsp + 544], ymm1 # 32-byte Spill - jmp .LBB0_26 - .p2align 4, 0x90 -.LBB0_25: # in Loop: Header=BB0_26 Depth=1 - add r11, 32 - vmovdqa ymm1, ymmword ptr [rsp + 544] # 32-byte Reload - vpaddd ymm2, ymm2, ymm1 - vpaddd ymm3, ymm3, ymm1 - vpaddd ymm8, ymm8, ymm1 - vpaddd ymm9, ymm9, ymm1 - cmp r11, r12 - je .LBB0_1050 -.LBB0_26: # =>This Inner Loop Header: Depth=1 - vmovdqa ymmword ptr [rsp + 800], ymm2 # 32-byte Spill - vpslld ymm1, ymm2, 3 - vpcmpgtd xmm2, xmm0, xmm1 - vmovd ecx, xmm2 - # implicit-def: $ymm4 - test cl, 1 - je .LBB0_28 -# %bb.27: # in Loop: Header=BB0_26 Depth=1 - vpbroadcastb ymm4, byte ptr [rdi + r11] -.LBB0_28: # in Loop: Header=BB0_26 Depth=1 - mov r10, r11 - or r10, 1 - vpcmpgtd xmm2, xmm0, xmm1 - vpackssdw xmm2, xmm2, xmm2 - vpacksswb xmm2, xmm2, xmm2 - vpextrb ecx, xmm2, 1 - test cl, 1 - je .LBB0_30 -# %bb.29: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm2, xmm4, byte ptr [rdi + r10], 1 - vpblendd ymm4, ymm4, ymm2, 15 # ymm4 = ymm2[0,1,2,3],ymm4[4,5,6,7] -.LBB0_30: # in Loop: Header=BB0_26 Depth=1 - mov r14, r11 - or r14, 2 - vpcmpgtd xmm2, xmm0, xmm1 - vpackssdw xmm2, xmm2, xmm2 - vpacksswb xmm2, xmm2, xmm2 - vpextrb ecx, xmm2, 2 - test cl, 1 - je .LBB0_32 -# %bb.31: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm2, xmm4, byte ptr [rdi + r14], 2 - vpblendd ymm4, ymm4, ymm2, 15 # ymm4 = ymm2[0,1,2,3],ymm4[4,5,6,7] -.LBB0_32: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm13, ymm1, 1 - mov rdx, r11 - or rdx, 3 - vpcmpgtd xmm2, xmm0, xmm1 - vpackssdw xmm2, xmm2, xmm2 - vpacksswb xmm2, xmm2, xmm2 - vpextrb ecx, xmm2, 3 - test cl, 1 - je .LBB0_34 -# %bb.33: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm2, xmm4, byte ptr [rdi + rdx], 3 - vpblendd ymm4, ymm4, ymm2, 15 # ymm4 = ymm2[0,1,2,3],ymm4[4,5,6,7] -.LBB0_34: # in Loop: Header=BB0_26 Depth=1 - mov rcx, r11 - or rcx, 4 - vextracti128 xmm7, ymm0, 1 - vpcmpgtd xmm2, xmm7, xmm13 - vpextrb r9d, xmm2, 0 - test r9b, 1 - mov qword ptr [rsp + 272], rdx # 8-byte Spill - mov qword ptr [rsp + 264], rcx # 8-byte Spill - je .LBB0_36 -# %bb.35: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm2, xmm4, byte ptr [rdi + rcx], 4 - vpblendd ymm4, ymm4, ymm2, 15 # ymm4 = ymm2[0,1,2,3],ymm4[4,5,6,7] -.LBB0_36: # in Loop: Header=BB0_26 Depth=1 - mov r15, r11 - or r15, 5 - vpcmpgtd ymm6, ymm0, ymm1 - vpackssdw ymm2, ymm6, ymm0 - vextracti128 xmm2, ymm2, 1 - vpbroadcastd xmm2, xmm2 - vpacksswb xmm2, xmm2, xmm2 - vpextrb ecx, xmm2, 5 - test cl, 1 - je .LBB0_38 -# %bb.37: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm2, xmm4, byte ptr [rdi + r15], 5 - vpblendd ymm4, ymm4, ymm2, 15 # ymm4 = ymm2[0,1,2,3],ymm4[4,5,6,7] -.LBB0_38: # in Loop: Header=BB0_26 Depth=1 - mov rbx, r11 - or rbx, 6 - vpackssdw ymm2, ymm6, ymm0 - vpermq ymm2, ymm2, 232 # ymm2 = ymm2[0,2,2,3] - vpacksswb xmm2, xmm2, xmm2 - vpextrb ecx, xmm2, 6 - test cl, 1 - je .LBB0_40 -# %bb.39: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm2, xmm4, byte ptr [rdi + rbx], 6 - vpblendd ymm4, ymm4, ymm2, 15 # ymm4 = ymm2[0,1,2,3],ymm4[4,5,6,7] -.LBB0_40: # in Loop: Header=BB0_26 Depth=1 - vpslld ymm2, ymm3, 3 - mov rax, r11 - or rax, 7 - vpackssdw ymm5, ymm6, ymm0 - vpermq ymm5, ymm5, 232 # ymm5 = ymm5[0,2,2,3] - vpacksswb xmm5, xmm5, xmm5 - vpextrb ecx, xmm5, 7 - test cl, 1 - je .LBB0_42 -# %bb.41: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm5, xmm4, byte ptr [rdi + rax], 7 - vpblendd ymm4, ymm4, ymm5, 15 # ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7] -.LBB0_42: # in Loop: Header=BB0_26 Depth=1 - mov rsi, r11 - or rsi, 8 - vpcmpgtd xmm5, xmm0, xmm2 - vpextrb ecx, xmm5, 0 - test cl, 1 - je .LBB0_44 -# %bb.43: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm5, xmm4, byte ptr [rdi + rsi], 8 - vpblendd ymm4, ymm4, ymm5, 15 # ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7] -.LBB0_44: # in Loop: Header=BB0_26 Depth=1 - mov rdx, r11 - or rdx, 9 - vpcmpgtd xmm5, xmm0, xmm2 - vpackssdw xmm5, xmm5, xmm5 - vpacksswb xmm5, xmm5, xmm5 - vpextrb ecx, xmm5, 9 - test cl, 1 - mov qword ptr [rsp + 224], rdx # 8-byte Spill - je .LBB0_46 -# %bb.45: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm5, xmm4, byte ptr [rdi + rdx], 9 - vpblendd ymm4, ymm4, ymm5, 15 # ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7] -.LBB0_46: # in Loop: Header=BB0_26 Depth=1 - mov rdx, r11 - or rdx, 10 - vpcmpgtd xmm5, xmm0, xmm2 - vpackssdw xmm5, xmm5, xmm5 - vpacksswb xmm5, xmm5, xmm5 - vpextrb ecx, xmm5, 10 - test cl, 1 - vmovdqa ymmword ptr [rsp + 832], ymm3 # 32-byte Spill - mov qword ptr [rsp + 96], rsi # 8-byte Spill - je .LBB0_48 -# %bb.47: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm5, xmm4, byte ptr [rdi + rdx], 10 - vpblendd ymm4, ymm4, ymm5, 15 # ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7] -.LBB0_48: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm5, ymm2, 1 - mov rsi, r11 - or rsi, 11 - vpcmpgtd xmm3, xmm0, xmm2 - vpackssdw xmm3, xmm3, xmm3 - vpacksswb xmm3, xmm3, xmm3 - vpextrb ecx, xmm3, 11 - test cl, 1 - mov qword ptr [rsp + 152], r10 # 8-byte Spill - mov qword ptr [rsp + 296], r14 # 8-byte Spill - mov qword ptr [rsp + 104], r15 # 8-byte Spill - mov qword ptr [rsp + 288], rbx # 8-byte Spill - mov qword ptr [rsp + 232], rax # 8-byte Spill - je .LBB0_50 -# %bb.49: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm3, xmm4, byte ptr [rdi + rsi], 11 - vpblendd ymm4, ymm4, ymm3, 15 # ymm4 = ymm3[0,1,2,3],ymm4[4,5,6,7] -.LBB0_50: # in Loop: Header=BB0_26 Depth=1 - mov rcx, r11 - or rcx, 12 - vpcmpgtd xmm3, xmm7, xmm5 - vpextrb r14d, xmm3, 0 - test r14b, 1 - mov qword ptr [rsp + 256], rsi # 8-byte Spill - mov qword ptr [rsp + 248], rcx # 8-byte Spill - je .LBB0_52 -# %bb.51: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm3, xmm4, byte ptr [rdi + rcx], 12 - vpblendd ymm4, ymm4, ymm3, 15 # ymm4 = ymm3[0,1,2,3],ymm4[4,5,6,7] -.LBB0_52: # in Loop: Header=BB0_26 Depth=1 - mov rax, r11 - or rax, 13 - vpcmpgtd ymm7, ymm0, ymm2 - vpackssdw ymm3, ymm7, ymm0 - vextracti128 xmm3, ymm3, 1 - vpbroadcastd xmm3, xmm3 - vpacksswb xmm3, xmm3, xmm3 - vpextrb ecx, xmm3, 13 - test cl, 1 - je .LBB0_54 -# %bb.53: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm3, xmm4, byte ptr [rdi + rax], 13 - vpblendd ymm4, ymm4, ymm3, 15 # ymm4 = ymm3[0,1,2,3],ymm4[4,5,6,7] -.LBB0_54: # in Loop: Header=BB0_26 Depth=1 - mov rbx, r11 - or rbx, 14 - vpackssdw ymm3, ymm7, ymm0 - vpermq ymm3, ymm3, 232 # ymm3 = ymm3[0,2,2,3] - vpacksswb xmm3, xmm3, xmm3 - vpextrb ecx, xmm3, 14 - test cl, 1 - mov qword ptr [rsp + 80], rbx # 8-byte Spill - je .LBB0_56 -# %bb.55: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm3, xmm4, byte ptr [rdi + rbx], 14 - vpblendd ymm4, ymm4, ymm3, 15 # ymm4 = ymm3[0,1,2,3],ymm4[4,5,6,7] -.LBB0_56: # in Loop: Header=BB0_26 Depth=1 - vpslld ymm10, ymm8, 3 - mov rsi, r11 - or rsi, 15 - vpackssdw ymm3, ymm7, ymm0 - vpermq ymm3, ymm3, 232 # ymm3 = ymm3[0,2,2,3] - vpacksswb xmm3, xmm3, xmm3 - vpextrb ecx, xmm3, 15 - test cl, 1 - je .LBB0_58 -# %bb.57: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm3, xmm4, byte ptr [rdi + rsi], 15 - vpblendd ymm4, ymm4, ymm3, 15 # ymm4 = ymm3[0,1,2,3],ymm4[4,5,6,7] -.LBB0_58: # in Loop: Header=BB0_26 Depth=1 - mov r15, r11 - or r15, 16 - vpcmpgtd xmm3, xmm0, xmm10 - vmovd ecx, xmm3 - test cl, 1 - mov qword ptr [rsp + 64], r15 # 8-byte Spill - mov qword ptr [rsp + 72], rsi # 8-byte Spill - je .LBB0_60 -# %bb.59: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm3, ymm4, 1 - vpinsrb xmm3, xmm3, byte ptr [rdi + r15], 0 - vinserti128 ymm4, ymm4, xmm3, 1 -.LBB0_60: # in Loop: Header=BB0_26 Depth=1 - mov rsi, r11 - or rsi, 17 - vpcmpgtd xmm3, xmm0, xmm10 - vpackssdw xmm3, xmm3, xmm3 - vpermq ymm3, ymm3, 212 # ymm3 = ymm3[0,1,1,3] - vpacksswb ymm3, ymm3, ymm0 - vextracti128 xmm3, ymm3, 1 - vpextrb ecx, xmm3, 1 - test cl, 1 - je .LBB0_62 -# %bb.61: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm3, ymm4, 1 - vpinsrb xmm3, xmm3, byte ptr [rdi + rsi], 1 - vinserti128 ymm4, ymm4, xmm3, 1 -.LBB0_62: # in Loop: Header=BB0_26 Depth=1 - mov rbx, r11 - or rbx, 18 - vpcmpgtd xmm3, xmm0, xmm10 - vpackssdw xmm3, xmm3, xmm3 - vpermq ymm3, ymm3, 212 # ymm3 = ymm3[0,1,1,3] - vpacksswb ymm3, ymm3, ymm0 - vextracti128 xmm3, ymm3, 1 - vpextrb ecx, xmm3, 2 - test cl, 1 - je .LBB0_64 -# %bb.63: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm3, ymm4, 1 - vpinsrb xmm3, xmm3, byte ptr [rdi + rbx], 2 - vinserti128 ymm4, ymm4, xmm3, 1 -.LBB0_64: # in Loop: Header=BB0_26 Depth=1 - mov r15, r11 - or r15, 19 - vpcmpgtd xmm3, xmm0, xmm10 - vpackssdw xmm3, xmm3, xmm3 - vpermq ymm3, ymm3, 212 # ymm3 = ymm3[0,1,1,3] - vpacksswb ymm3, ymm3, ymm0 - vextracti128 xmm3, ymm3, 1 - vpextrb ecx, xmm3, 3 - test cl, 1 - vmovdqa ymmword ptr [rsp + 864], ymm8 # 32-byte Spill - je .LBB0_66 -# %bb.65: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm3, ymm4, 1 - vpinsrb xmm3, xmm3, byte ptr [rdi + r15], 3 - vinserti128 ymm4, ymm4, xmm3, 1 -.LBB0_66: # in Loop: Header=BB0_26 Depth=1 - mov r13, r11 - or r13, 20 - vpcmpgtd ymm8, ymm0, ymm10 - vpackssdw ymm3, ymm0, ymm8 - vpacksswb ymm3, ymm3, ymm0 - vextracti128 xmm3, ymm3, 1 - vpextrb ecx, xmm3, 4 - test cl, 1 - mov qword ptr [rsp + 56], r13 # 8-byte Spill - je .LBB0_68 -# %bb.67: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm3, ymm4, 1 - vpinsrb xmm3, xmm3, byte ptr [rdi + r13], 4 - vinserti128 ymm4, ymm4, xmm3, 1 -.LBB0_68: # in Loop: Header=BB0_26 Depth=1 - mov r13, r11 - or r13, 21 - vpackssdw ymm3, ymm0, ymm8 - vpacksswb ymm3, ymm3, ymm0 - vextracti128 xmm3, ymm3, 1 - vpextrb ecx, xmm3, 5 - test cl, 1 - mov qword ptr [rsp + 128], rbx # 8-byte Spill - je .LBB0_70 -# %bb.69: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm3, ymm4, 1 - vpinsrb xmm3, xmm3, byte ptr [rdi + r13], 5 - vinserti128 ymm4, ymm4, xmm3, 1 -.LBB0_70: # in Loop: Header=BB0_26 Depth=1 - mov r10, r11 - or r10, 22 - vpackssdw ymm3, ymm0, ymm8 - vpacksswb ymm3, ymm3, ymm0 - vextracti128 xmm3, ymm3, 1 - vpextrb ecx, xmm3, 6 - test cl, 1 - je .LBB0_72 -# %bb.71: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm3, ymm4, 1 - vpinsrb xmm3, xmm3, byte ptr [rdi + r10], 6 - vinserti128 ymm4, ymm4, xmm3, 1 -.LBB0_72: # in Loop: Header=BB0_26 Depth=1 - vpslld ymm11, ymm9, 3 - mov rbx, r11 - or rbx, 23 - vpackssdw ymm3, ymm0, ymm8 - vpacksswb ymm3, ymm3, ymm0 - vextracti128 xmm3, ymm3, 1 - vpextrb ecx, xmm3, 7 - test cl, 1 - mov qword ptr [rsp + 240], rbx # 8-byte Spill - vmovdqa ymmword ptr [rsp + 896], ymm9 # 32-byte Spill - je .LBB0_74 -# %bb.73: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm3, ymm4, 1 - vpinsrb xmm3, xmm3, byte ptr [rdi + rbx], 7 - vinserti128 ymm4, ymm4, xmm3, 1 -.LBB0_74: # in Loop: Header=BB0_26 Depth=1 - mov rbx, r11 - or rbx, 24 - vpcmpgtd ymm9, ymm0, ymm11 - vpermq ymm12, ymm9, 68 # ymm12 = ymm9[0,1,0,1] - vpacksswb ymm3, ymm0, ymm12 - vextracti128 xmm3, ymm3, 1 - vpextrb ecx, xmm3, 8 - test cl, 1 - mov qword ptr [rsp + 216], rbx # 8-byte Spill - je .LBB0_76 -# %bb.75: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm3, ymm4, 1 - vpinsrb xmm3, xmm3, byte ptr [rdi + rbx], 8 - vinserti128 ymm4, ymm4, xmm3, 1 -.LBB0_76: # in Loop: Header=BB0_26 Depth=1 - mov rbx, r11 - or rbx, 25 - vpcmpgtd xmm3, xmm0, xmm11 - vpackssdw xmm3, xmm3, xmm3 - vpermq ymm3, ymm3, 212 # ymm3 = ymm3[0,1,1,3] - vpacksswb ymm3, ymm0, ymm3 - vextracti128 xmm3, ymm3, 1 - vpextrb ecx, xmm3, 9 - test cl, 1 - mov qword ptr [rsp + 208], rbx # 8-byte Spill - je .LBB0_78 -# %bb.77: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm3, ymm4, 1 - vpinsrb xmm3, xmm3, byte ptr [rdi + rbx], 9 - vinserti128 ymm4, ymm4, xmm3, 1 -.LBB0_78: # in Loop: Header=BB0_26 Depth=1 - mov rbx, r11 - or rbx, 26 - vpcmpgtd xmm3, xmm0, xmm11 - vpackssdw xmm3, xmm3, xmm3 - vpermq ymm3, ymm3, 212 # ymm3 = ymm3[0,1,1,3] - vpacksswb ymm3, ymm0, ymm3 - vextracti128 xmm3, ymm3, 1 - vpextrb ecx, xmm3, 10 - test cl, 1 - mov qword ptr [rsp + 200], rbx # 8-byte Spill - je .LBB0_80 -# %bb.79: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm3, ymm4, 1 - vpinsrb xmm3, xmm3, byte ptr [rdi + rbx], 10 - vinserti128 ymm4, ymm4, xmm3, 1 -.LBB0_80: # in Loop: Header=BB0_26 Depth=1 - mov rbx, r11 - or rbx, 27 - vpcmpgtd xmm3, xmm0, xmm11 - vpackssdw xmm3, xmm3, xmm3 - vpermq ymm3, ymm3, 212 # ymm3 = ymm3[0,1,1,3] - vpacksswb ymm3, ymm0, ymm3 - vextracti128 xmm3, ymm3, 1 - vpextrb ecx, xmm3, 11 - test cl, 1 - mov qword ptr [rsp + 192], rbx # 8-byte Spill - mov qword ptr [rsp + 144], rdx # 8-byte Spill - mov qword ptr [rsp + 88], rax # 8-byte Spill - je .LBB0_82 -# %bb.81: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm3, ymm4, 1 - vpinsrb xmm3, xmm3, byte ptr [rdi + rbx], 11 - vinserti128 ymm4, ymm4, xmm3, 1 -.LBB0_82: # in Loop: Header=BB0_26 Depth=1 - mov rdx, r11 - or rdx, 28 - vpackssdw ymm3, ymm0, ymm9 - vpacksswb ymm3, ymm0, ymm3 - vextracti128 xmm3, ymm3, 1 - vpextrb ecx, xmm3, 12 - test cl, 1 - je .LBB0_84 -# %bb.83: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm3, ymm4, 1 - vpinsrb xmm3, xmm3, byte ptr [rdi + rdx], 12 - vinserti128 ymm4, ymm4, xmm3, 1 -.LBB0_84: # in Loop: Header=BB0_26 Depth=1 - mov rbx, r11 - or rbx, 29 - vpackssdw ymm3, ymm0, ymm9 - vpacksswb ymm3, ymm0, ymm3 - vextracti128 xmm3, ymm3, 1 - vpextrb ecx, xmm3, 13 - test cl, 1 - mov qword ptr [rsp + 176], rbx # 8-byte Spill - je .LBB0_86 -# %bb.85: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm3, ymm4, 1 - vpinsrb xmm3, xmm3, byte ptr [rdi + rbx], 13 - vinserti128 ymm4, ymm4, xmm3, 1 -.LBB0_86: # in Loop: Header=BB0_26 Depth=1 - mov rbx, r11 - or rbx, 30 - vpackssdw ymm3, ymm0, ymm9 - vpacksswb ymm3, ymm0, ymm3 - vextracti128 xmm3, ymm3, 1 - vpextrb ecx, xmm3, 14 - test cl, 1 - mov qword ptr [rsp + 168], rbx # 8-byte Spill - je .LBB0_88 -# %bb.87: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm3, ymm4, 1 - vpinsrb xmm3, xmm3, byte ptr [rdi + rbx], 14 - vinserti128 ymm4, ymm4, xmm3, 1 -.LBB0_88: # in Loop: Header=BB0_26 Depth=1 - mov rbx, r11 - or rbx, 31 - vpackssdw ymm3, ymm0, ymm9 - vpacksswb ymm3, ymm0, ymm3 - vextracti128 xmm3, ymm3, 1 - vpextrb ecx, xmm3, 15 - test cl, 1 - mov qword ptr [rsp + 160], rbx # 8-byte Spill - je .LBB0_90 -# %bb.89: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm3, ymm4, 1 - vpinsrb xmm3, xmm3, byte ptr [rdi + rbx], 15 - vinserti128 ymm4, ymm4, xmm3, 1 -.LBB0_90: # in Loop: Header=BB0_26 Depth=1 - vpmovzxdq ymm3, xmm1 # ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero - vmovdqa ymmword ptr [rsp + 512], ymm3 # 32-byte Spill - vpand ymm15, ymm4, ymmword ptr [rip + .LCPI0_4] - vpcmpgtd xmm3, xmm0, xmm1 - vmovd ecx, xmm3 - test cl, 1 - je .LBB0_92 -# %bb.91: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm3, ymmword ptr [rsp + 512] # 32-byte Reload - vmovq rcx, xmm3 - vpextrb byte ptr [r8 + rcx], xmm15, 0 -.LBB0_92: # in Loop: Header=BB0_26 Depth=1 - vpcmpgtd xmm3, xmm0, xmm1 - vpackssdw xmm3, xmm3, xmm3 - vpacksswb xmm3, xmm3, xmm3 - vpextrb ecx, xmm3, 1 - test cl, 1 - je .LBB0_94 -# %bb.93: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm3, ymmword ptr [rsp + 512] # 32-byte Reload - vpextrq rcx, xmm3, 1 - vpextrb byte ptr [r8 + rcx], xmm15, 1 -.LBB0_94: # in Loop: Header=BB0_26 Depth=1 - vpcmpgtd xmm3, xmm0, xmm1 - vpackssdw xmm3, xmm3, xmm3 - vpacksswb xmm3, xmm3, xmm3 - vpextrb ecx, xmm3, 2 - test cl, 1 - je .LBB0_96 -# %bb.95: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm3, ymmword ptr [rsp + 512] # 32-byte Reload - vextracti128 xmm3, ymm3, 1 - vmovq rcx, xmm3 - vpextrb byte ptr [r8 + rcx], xmm15, 2 -.LBB0_96: # in Loop: Header=BB0_26 Depth=1 - vpcmpgtd xmm1, xmm0, xmm1 - vpackssdw xmm1, xmm1, xmm1 - vpacksswb xmm1, xmm1, xmm1 - vpextrb ecx, xmm1, 3 - test cl, 1 - je .LBB0_98 -# %bb.97: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 512] # 32-byte Reload - vextracti128 xmm1, ymm1, 1 - vpextrq rcx, xmm1, 1 - vpextrb byte ptr [r8 + rcx], xmm15, 3 -.LBB0_98: # in Loop: Header=BB0_26 Depth=1 - vpmovzxdq ymm1, xmm13 # ymm1 = xmm13[0],zero,xmm13[1],zero,xmm13[2],zero,xmm13[3],zero - vmovdqa ymmword ptr [rsp + 480], ymm1 # 32-byte Spill - test r9b, 1 - je .LBB0_100 -# %bb.99: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 480] # 32-byte Reload - vmovq rcx, xmm1 - vpextrb byte ptr [r8 + rcx], xmm15, 4 -.LBB0_100: # in Loop: Header=BB0_26 Depth=1 - vpackssdw ymm1, ymm6, ymm0 - vextracti128 xmm1, ymm1, 1 - vpbroadcastd xmm1, xmm1 - vpacksswb xmm1, xmm1, xmm1 - vpextrb ecx, xmm1, 5 - test cl, 1 - je .LBB0_102 -# %bb.101: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 480] # 32-byte Reload - vpextrq rcx, xmm1, 1 - vpextrb byte ptr [r8 + rcx], xmm15, 5 -.LBB0_102: # in Loop: Header=BB0_26 Depth=1 - vpackssdw ymm1, ymm6, ymm0 - vpermq ymm1, ymm1, 232 # ymm1 = ymm1[0,2,2,3] - vpacksswb xmm1, xmm1, xmm1 - vpextrb ecx, xmm1, 6 - test cl, 1 - je .LBB0_104 -# %bb.103: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 480] # 32-byte Reload - vextracti128 xmm1, ymm1, 1 - vmovq rcx, xmm1 - vpextrb byte ptr [r8 + rcx], xmm15, 6 -.LBB0_104: # in Loop: Header=BB0_26 Depth=1 - vpackssdw ymm1, ymm6, ymm0 - vpermq ymm1, ymm1, 232 # ymm1 = ymm1[0,2,2,3] - vpacksswb xmm1, xmm1, xmm1 - vpextrb ecx, xmm1, 7 - test cl, 1 - je .LBB0_106 -# %bb.105: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 480] # 32-byte Reload - vextracti128 xmm1, ymm1, 1 - vpextrq rcx, xmm1, 1 - vpextrb byte ptr [r8 + rcx], xmm15, 7 -.LBB0_106: # in Loop: Header=BB0_26 Depth=1 - vpmovzxdq ymm1, xmm2 # ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero - vmovdqa ymmword ptr [rsp + 448], ymm1 # 32-byte Spill - vpcmpgtd xmm1, xmm0, xmm2 - vpextrb ecx, xmm1, 0 - test cl, 1 - je .LBB0_108 -# %bb.107: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 448] # 32-byte Reload - vmovq rcx, xmm1 - vpextrb byte ptr [r8 + rcx], xmm15, 8 -.LBB0_108: # in Loop: Header=BB0_26 Depth=1 - vpcmpgtd xmm1, xmm0, xmm2 - vpackssdw xmm1, xmm1, xmm1 - vpacksswb xmm1, xmm1, xmm1 - vpextrb ecx, xmm1, 9 - test cl, 1 - je .LBB0_110 -# %bb.109: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 448] # 32-byte Reload - vpextrq rcx, xmm1, 1 - vpextrb byte ptr [r8 + rcx], xmm15, 9 -.LBB0_110: # in Loop: Header=BB0_26 Depth=1 - vpcmpgtd xmm1, xmm0, xmm2 - vpackssdw xmm1, xmm1, xmm1 - vpacksswb xmm1, xmm1, xmm1 - vpextrb ecx, xmm1, 10 - test cl, 1 - je .LBB0_112 -# %bb.111: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 448] # 32-byte Reload - vextracti128 xmm1, ymm1, 1 - vmovq rcx, xmm1 - vpextrb byte ptr [r8 + rcx], xmm15, 10 -.LBB0_112: # in Loop: Header=BB0_26 Depth=1 - vpcmpgtd xmm1, xmm0, xmm2 - vpackssdw xmm1, xmm1, xmm1 - vpacksswb xmm1, xmm1, xmm1 - vpextrb ecx, xmm1, 11 - test cl, 1 - je .LBB0_114 -# %bb.113: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 448] # 32-byte Reload - vextracti128 xmm1, ymm1, 1 - vpextrq rcx, xmm1, 1 - vpextrb byte ptr [r8 + rcx], xmm15, 11 -.LBB0_114: # in Loop: Header=BB0_26 Depth=1 - mov qword ptr [rsp + 136], rsi # 8-byte Spill - vpmovzxdq ymm1, xmm5 # ymm1 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero - vmovdqa ymmword ptr [rsp + 416], ymm1 # 32-byte Spill - test r14b, 1 - je .LBB0_116 -# %bb.115: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 416] # 32-byte Reload - vmovq rcx, xmm1 - vpextrb byte ptr [r8 + rcx], xmm15, 12 -.LBB0_116: # in Loop: Header=BB0_26 Depth=1 - vpackssdw ymm1, ymm7, ymm0 - vextracti128 xmm1, ymm1, 1 - vpbroadcastd xmm1, xmm1 - vpacksswb xmm1, xmm1, xmm1 - vpextrb ecx, xmm1, 13 - test cl, 1 - mov r9, qword ptr [rsp + 152] # 8-byte Reload - mov rsi, qword ptr [rsp + 296] # 8-byte Reload - mov r14, qword ptr [rsp + 104] # 8-byte Reload - mov rax, qword ptr [rsp + 288] # 8-byte Reload - je .LBB0_118 -# %bb.117: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 416] # 32-byte Reload - vpextrq rcx, xmm1, 1 - vpextrb byte ptr [r8 + rcx], xmm15, 13 -.LBB0_118: # in Loop: Header=BB0_26 Depth=1 - vpackssdw ymm1, ymm7, ymm0 - vpermq ymm1, ymm1, 232 # ymm1 = ymm1[0,2,2,3] - vpacksswb xmm1, xmm1, xmm1 - vpextrb ecx, xmm1, 14 - test cl, 1 - je .LBB0_120 -# %bb.119: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 416] # 32-byte Reload - vextracti128 xmm1, ymm1, 1 - vmovq rcx, xmm1 - vpextrb byte ptr [r8 + rcx], xmm15, 14 -.LBB0_120: # in Loop: Header=BB0_26 Depth=1 - vpackssdw ymm1, ymm7, ymm0 - vpermq ymm1, ymm1, 232 # ymm1 = ymm1[0,2,2,3] - vpacksswb xmm1, xmm1, xmm1 - vpextrb ecx, xmm1, 15 - test cl, 1 - je .LBB0_122 -# %bb.121: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 416] # 32-byte Reload - vextracti128 xmm1, ymm1, 1 - vpextrq rcx, xmm1, 1 - vpextrb byte ptr [r8 + rcx], xmm15, 15 -.LBB0_122: # in Loop: Header=BB0_26 Depth=1 - vpmovzxdq ymm1, xmm10 # ymm1 = xmm10[0],zero,xmm10[1],zero,xmm10[2],zero,xmm10[3],zero - vmovdqa ymmword ptr [rsp + 384], ymm1 # 32-byte Spill - vpcmpgtd xmm1, xmm0, xmm10 - vmovd ecx, xmm1 - test cl, 1 - je .LBB0_124 -# %bb.123: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 384] # 32-byte Reload - vmovq rcx, xmm1 - vextracti128 xmm1, ymm15, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 0 -.LBB0_124: # in Loop: Header=BB0_26 Depth=1 - vpcmpgtd xmm1, xmm0, xmm10 - vpackssdw xmm1, xmm1, xmm1 - vpermq ymm1, ymm1, 212 # ymm1 = ymm1[0,1,1,3] - vpacksswb ymm1, ymm1, ymm0 - vextracti128 xmm1, ymm1, 1 - vpextrb ecx, xmm1, 1 - test cl, 1 - je .LBB0_126 -# %bb.125: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 384] # 32-byte Reload - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm15, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 1 -.LBB0_126: # in Loop: Header=BB0_26 Depth=1 - vpcmpgtd xmm1, xmm0, xmm10 - vpackssdw xmm1, xmm1, xmm1 - vpermq ymm1, ymm1, 212 # ymm1 = ymm1[0,1,1,3] - vpacksswb ymm1, ymm1, ymm0 - vextracti128 xmm1, ymm1, 1 - vpextrb ecx, xmm1, 2 - test cl, 1 - je .LBB0_128 -# %bb.127: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 384] # 32-byte Reload - vextracti128 xmm1, ymm1, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm15, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 2 -.LBB0_128: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm10, 1 - vpcmpgtd xmm2, xmm0, xmm10 - vpackssdw xmm2, xmm2, xmm2 - vpermq ymm2, ymm2, 212 # ymm2 = ymm2[0,1,1,3] - vpacksswb ymm2, ymm2, ymm0 - vextracti128 xmm2, ymm2, 1 - vpextrb ecx, xmm2, 3 - test cl, 1 - je .LBB0_130 -# %bb.129: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm2, ymmword ptr [rsp + 384] # 32-byte Reload - vextracti128 xmm2, ymm2, 1 - vpextrq rcx, xmm2, 1 - vextracti128 xmm2, ymm15, 1 - vpextrb byte ptr [r8 + rcx], xmm2, 3 -.LBB0_130: # in Loop: Header=BB0_26 Depth=1 - vpmovzxdq ymm1, xmm1 # ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero - vmovdqa ymmword ptr [rsp + 352], ymm1 # 32-byte Spill - vpackssdw ymm1, ymm0, ymm8 - vpacksswb ymm1, ymm1, ymm0 - vextracti128 xmm1, ymm1, 1 - vpextrb ecx, xmm1, 4 - test cl, 1 - je .LBB0_132 -# %bb.131: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 352] # 32-byte Reload - vmovq rcx, xmm1 - vextracti128 xmm1, ymm15, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 4 -.LBB0_132: # in Loop: Header=BB0_26 Depth=1 - vpackssdw ymm1, ymm0, ymm8 - vpacksswb ymm1, ymm1, ymm0 - vextracti128 xmm1, ymm1, 1 - vpextrb ecx, xmm1, 5 - test cl, 1 - je .LBB0_134 -# %bb.133: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 352] # 32-byte Reload - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm15, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 5 -.LBB0_134: # in Loop: Header=BB0_26 Depth=1 - vpackssdw ymm1, ymm0, ymm8 - vpacksswb ymm1, ymm1, ymm0 - vextracti128 xmm1, ymm1, 1 - vpextrb ecx, xmm1, 6 - test cl, 1 - je .LBB0_136 -# %bb.135: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 352] # 32-byte Reload - vextracti128 xmm1, ymm1, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm15, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 6 -.LBB0_136: # in Loop: Header=BB0_26 Depth=1 - vpackssdw ymm1, ymm0, ymm8 - vpacksswb ymm1, ymm1, ymm0 - vextracti128 xmm1, ymm1, 1 - vpextrb ecx, xmm1, 7 - test cl, 1 - je .LBB0_138 -# %bb.137: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 352] # 32-byte Reload - vextracti128 xmm1, ymm1, 1 - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm15, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 7 -.LBB0_138: # in Loop: Header=BB0_26 Depth=1 - vpmovzxdq ymm1, xmm11 # ymm1 = xmm11[0],zero,xmm11[1],zero,xmm11[2],zero,xmm11[3],zero - vmovdqa ymmword ptr [rsp + 320], ymm1 # 32-byte Spill - vpacksswb ymm1, ymm0, ymm12 - vextracti128 xmm1, ymm1, 1 - vpextrb ecx, xmm1, 8 - test cl, 1 - je .LBB0_140 -# %bb.139: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 320] # 32-byte Reload - vmovq rcx, xmm1 - vextracti128 xmm1, ymm15, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 8 -.LBB0_140: # in Loop: Header=BB0_26 Depth=1 - vpcmpgtd xmm1, xmm0, xmm11 - vpackssdw xmm1, xmm1, xmm1 - vpermq ymm1, ymm1, 212 # ymm1 = ymm1[0,1,1,3] - vpacksswb ymm1, ymm0, ymm1 - vextracti128 xmm1, ymm1, 1 - vpextrb ecx, xmm1, 9 - test cl, 1 - je .LBB0_142 -# %bb.141: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 320] # 32-byte Reload - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm15, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 9 -.LBB0_142: # in Loop: Header=BB0_26 Depth=1 - vpcmpgtd xmm1, xmm0, xmm11 - vpackssdw xmm1, xmm1, xmm1 - vpermq ymm1, ymm1, 212 # ymm1 = ymm1[0,1,1,3] - vpacksswb ymm1, ymm0, ymm1 - vextracti128 xmm1, ymm1, 1 - vpextrb ecx, xmm1, 10 - test cl, 1 - je .LBB0_144 -# %bb.143: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 320] # 32-byte Reload - vextracti128 xmm1, ymm1, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm15, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 10 -.LBB0_144: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm11, 1 - vpcmpgtd xmm4, xmm0, xmm11 - vpackssdw xmm4, xmm4, xmm4 - vpermq ymm4, ymm4, 212 # ymm4 = ymm4[0,1,1,3] - vpacksswb ymm4, ymm0, ymm4 - vextracti128 xmm4, ymm4, 1 - vpextrb ecx, xmm4, 11 - test cl, 1 - je .LBB0_146 -# %bb.145: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm2, ymmword ptr [rsp + 320] # 32-byte Reload - vextracti128 xmm4, ymm2, 1 - vpextrq rcx, xmm4, 1 - vextracti128 xmm4, ymm15, 1 - vpextrb byte ptr [r8 + rcx], xmm4, 11 -.LBB0_146: # in Loop: Header=BB0_26 Depth=1 - vpmovzxdq ymm4, xmm1 # ymm4 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero - vpackssdw ymm1, ymm0, ymm9 - vpacksswb ymm1, ymm0, ymm1 - vextracti128 xmm1, ymm1, 1 - vpextrb ecx, xmm1, 12 - test cl, 1 - je .LBB0_148 -# %bb.147: # in Loop: Header=BB0_26 Depth=1 - vmovq rcx, xmm4 - vextracti128 xmm1, ymm15, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 12 -.LBB0_148: # in Loop: Header=BB0_26 Depth=1 - vpackssdw ymm1, ymm0, ymm9 - vpacksswb ymm1, ymm0, ymm1 - vextracti128 xmm1, ymm1, 1 - vpextrb ecx, xmm1, 13 - test cl, 1 - je .LBB0_150 -# %bb.149: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm4, 1 - vextracti128 xmm1, ymm15, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 13 -.LBB0_150: # in Loop: Header=BB0_26 Depth=1 - vpackssdw ymm1, ymm0, ymm9 - vpacksswb ymm1, ymm0, ymm1 - vextracti128 xmm1, ymm1, 1 - vpextrb ecx, xmm1, 14 - test cl, 1 - je .LBB0_152 -# %bb.151: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm4, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm15, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 14 -.LBB0_152: # in Loop: Header=BB0_26 Depth=1 - vpackssdw ymm1, ymm0, ymm9 - vpacksswb ymm1, ymm0, ymm1 - vextracti128 xmm1, ymm1, 1 - vpextrb ecx, xmm1, 15 - test cl, 1 - je .LBB0_154 -# %bb.153: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm4, 1 - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm15, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 15 -.LBB0_154: # in Loop: Header=BB0_26 Depth=1 - vpackssdw ymm1, ymm6, ymm8 - vpermq ymm1, ymm1, 216 # ymm1 = ymm1[0,2,1,3] - vpackssdw ymm5, ymm7, ymm9 - vpermq ymm5, ymm5, 216 # ymm5 = ymm5[0,2,1,3] - vpacksswb ymm1, ymm1, ymm5 - vmovdqa ymm2, ymmword ptr [rsp + 768] # 32-byte Reload - vpor ymm15, ymm2, ymmword ptr [rsp + 512] # 32-byte Folded Reload - vpor ymm5, ymm2, ymmword ptr [rsp + 480] # 32-byte Folded Reload - vpor ymm10, ymm2, ymmword ptr [rsp + 384] # 32-byte Folded Reload - vpor ymm9, ymm2, ymmword ptr [rsp + 352] # 32-byte Folded Reload - vpor ymm12, ymm2, ymmword ptr [rsp + 448] # 32-byte Folded Reload - vpor ymm11, ymm2, ymmword ptr [rsp + 416] # 32-byte Folded Reload - vpor ymm8, ymm2, ymmword ptr [rsp + 320] # 32-byte Folded Reload - vpor ymm7, ymm4, ymm2 - vperm2i128 ymm6, ymm8, ymm7, 49 # ymm6 = ymm8[2,3],ymm7[2,3] - vinserti128 ymm13, ymm8, xmm7, 1 - vshufps ymm6, ymm13, ymm6, 136 # ymm6 = ymm13[0,2],ymm6[0,2],ymm13[4,6],ymm6[4,6] - vperm2i128 ymm13, ymm12, ymm11, 49 # ymm13 = ymm12[2,3],ymm11[2,3] - vinserti128 ymm14, ymm12, xmm11, 1 - vshufps ymm13, ymm14, ymm13, 136 # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6] - vperm2i128 ymm14, ymm10, ymm9, 49 # ymm14 = ymm10[2,3],ymm9[2,3] - vinserti128 ymm2, ymm10, xmm9, 1 - vshufps ymm2, ymm2, ymm14, 136 # ymm2 = ymm2[0,2],ymm14[0,2],ymm2[4,6],ymm14[4,6] - vperm2i128 ymm14, ymm15, ymm5, 49 # ymm14 = ymm15[2,3],ymm5[2,3] - vinserti128 ymm3, ymm15, xmm5, 1 - vshufps ymm3, ymm3, ymm14, 136 # ymm3 = ymm3[0,2],ymm14[0,2],ymm3[4,6],ymm14[4,6] - vpcmpgtd ymm3, ymm0, ymm3 - vpcmpgtd ymm2, ymm0, ymm2 - vpackssdw ymm2, ymm3, ymm2 - vpcmpgtd ymm3, ymm0, ymm13 - vpcmpgtd ymm6, ymm0, ymm6 - vpackssdw ymm3, ymm3, ymm6 - vpermq ymm2, ymm2, 216 # ymm2 = ymm2[0,2,1,3] - vpermq ymm3, ymm3, 216 # ymm3 = ymm3[0,2,1,3] - vpacksswb ymm2, ymm2, ymm3 - vpand ymm6, ymm2, ymm1 - vmovd ecx, xmm6 - # implicit-def: $ymm14 - test cl, 1 - je .LBB0_155 -# %bb.660: # in Loop: Header=BB0_26 Depth=1 - vpbroadcastb ymm14, byte ptr [rdi + r11] - vpextrb ecx, xmm6, 1 - test cl, 1 - jne .LBB0_661 -.LBB0_156: # in Loop: Header=BB0_26 Depth=1 - mov rbx, qword ptr [rsp + 224] # 8-byte Reload - vpextrb ecx, xmm6, 2 - test cl, 1 - je .LBB0_157 -.LBB0_662: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rsi], 2 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 3 - test cl, 1 - jne .LBB0_663 -.LBB0_158: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 4 - test cl, 1 - je .LBB0_159 -.LBB0_664: # in Loop: Header=BB0_26 Depth=1 - mov rcx, qword ptr [rsp + 264] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rcx], 4 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 5 - test cl, 1 - jne .LBB0_665 -.LBB0_160: # in Loop: Header=BB0_26 Depth=1 - mov rsi, qword ptr [rsp + 232] # 8-byte Reload - vpextrb ecx, xmm6, 6 - test cl, 1 - je .LBB0_161 -.LBB0_666: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 6 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 7 - test cl, 1 - jne .LBB0_667 -.LBB0_162: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 8 - test cl, 1 - je .LBB0_163 -.LBB0_668: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 96] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 8 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 9 - test cl, 1 - jne .LBB0_669 -.LBB0_164: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 10 - test cl, 1 - je .LBB0_165 -.LBB0_670: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 144] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 10 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 11 - test cl, 1 - jne .LBB0_671 -.LBB0_166: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 12 - test cl, 1 - je .LBB0_167 -.LBB0_672: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 248] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 12 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 13 - test cl, 1 - jne .LBB0_673 -.LBB0_168: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 14 - test cl, 1 - je .LBB0_169 -.LBB0_674: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 80] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 14 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 15 - test cl, 1 - jne .LBB0_170 - jmp .LBB0_171 - .p2align 4, 0x90 -.LBB0_155: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 1 - test cl, 1 - je .LBB0_156 -.LBB0_661: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + r9], 1 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - mov rbx, qword ptr [rsp + 224] # 8-byte Reload - vpextrb ecx, xmm6, 2 - test cl, 1 - jne .LBB0_662 -.LBB0_157: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 3 - test cl, 1 - je .LBB0_158 -.LBB0_663: # in Loop: Header=BB0_26 Depth=1 - mov rcx, qword ptr [rsp + 272] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rcx], 3 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 4 - test cl, 1 - jne .LBB0_664 -.LBB0_159: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 5 - test cl, 1 - je .LBB0_160 -.LBB0_665: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + r14], 5 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - mov rsi, qword ptr [rsp + 232] # 8-byte Reload - vpextrb ecx, xmm6, 6 - test cl, 1 - jne .LBB0_666 -.LBB0_161: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 7 - test cl, 1 - je .LBB0_162 -.LBB0_667: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rsi], 7 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 8 - test cl, 1 - jne .LBB0_668 -.LBB0_163: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 9 - test cl, 1 - je .LBB0_164 -.LBB0_669: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rbx], 9 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 10 - test cl, 1 - jne .LBB0_670 -.LBB0_165: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 11 - test cl, 1 - je .LBB0_166 -.LBB0_671: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 256] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 11 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 12 - test cl, 1 - jne .LBB0_672 -.LBB0_167: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 13 - test cl, 1 - je .LBB0_168 -.LBB0_673: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 88] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 13 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 14 - test cl, 1 - jne .LBB0_674 -.LBB0_169: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 15 - test cl, 1 - je .LBB0_171 -.LBB0_170: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 72] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 15 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_171: # in Loop: Header=BB0_26 Depth=1 - mov rcx, qword ptr [rsp + 208] # 8-byte Reload - vextracti128 xmm13, ymm6, 1 - vmovd eax, xmm13 - mov dword ptr [rsp + 44], eax # 4-byte Spill - test al, 1 - je .LBB0_172 -# %bb.675: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - mov rax, qword ptr [rsp + 64] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rax], 0 - vinserti128 ymm14, ymm14, xmm1, 1 - vpextrb eax, xmm13, 1 - mov dword ptr [rsp + 40], eax # 4-byte Spill - test al, 1 - jne .LBB0_676 -.LBB0_173: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm13, 2 - mov dword ptr [rsp + 36], eax # 4-byte Spill - test al, 1 - je .LBB0_174 -.LBB0_677: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - mov rax, qword ptr [rsp + 128] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rax], 2 - vinserti128 ymm14, ymm14, xmm1, 1 - vpextrb eax, xmm13, 3 - mov dword ptr [rsp + 32], eax # 4-byte Spill - test al, 1 - jne .LBB0_678 -.LBB0_175: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm13, 4 - mov dword ptr [rsp + 28], eax # 4-byte Spill - test al, 1 - je .LBB0_176 -.LBB0_679: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - mov rax, qword ptr [rsp + 56] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rax], 4 - vinserti128 ymm14, ymm14, xmm1, 1 - vpextrb eax, xmm13, 5 - mov dword ptr [rsp + 24], eax # 4-byte Spill - test al, 1 - jne .LBB0_680 -.LBB0_177: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm13, 6 - mov dword ptr [rsp + 20], eax # 4-byte Spill - test al, 1 - je .LBB0_178 -.LBB0_681: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - vpinsrb xmm1, xmm1, byte ptr [rdi + r10], 6 - vinserti128 ymm14, ymm14, xmm1, 1 - vpextrb eax, xmm13, 7 - mov dword ptr [rsp + 316], eax # 4-byte Spill - test al, 1 - jne .LBB0_682 -.LBB0_179: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 216] # 8-byte Reload - vpextrb ebx, xmm13, 8 - test bl, 1 - je .LBB0_181 -.LBB0_180: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - vpinsrb xmm1, xmm1, byte ptr [rdi + rax], 8 - vinserti128 ymm14, ymm14, xmm1, 1 -.LBB0_181: # in Loop: Header=BB0_26 Depth=1 - vpextrb r9d, xmm13, 9 - test r9b, 1 - mov qword ptr [rsp + 280], r13 # 8-byte Spill - mov qword ptr [rsp + 112], r10 # 8-byte Spill - mov qword ptr [rsp + 184], rdx # 8-byte Spill - je .LBB0_183 -# %bb.182: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - vpinsrb xmm1, xmm1, byte ptr [rdi + rcx], 9 - vinserti128 ymm14, ymm14, xmm1, 1 -.LBB0_183: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 200] # 8-byte Reload - mov rcx, qword ptr [rsp + 192] # 8-byte Reload - vpextrb r13d, xmm13, 10 - test r13b, 1 - je .LBB0_184 -# %bb.683: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - vpinsrb xmm1, xmm1, byte ptr [rdi + rax], 10 - vinserti128 ymm14, ymm14, xmm1, 1 - vpextrb eax, xmm13, 11 - test al, 1 - mov qword ptr [rsp + 120], r15 # 8-byte Spill - jne .LBB0_684 -.LBB0_185: # in Loop: Header=BB0_26 Depth=1 - vpextrb r15d, xmm13, 12 - test r15b, 1 - mov qword ptr [rsp + 304], r11 # 8-byte Spill - je .LBB0_186 -.LBB0_685: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - mov rcx, qword ptr [rsp + 184] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rcx], 12 - vinserti128 ymm14, ymm14, xmm1, 1 - vpextrb edx, xmm13, 13 - test dl, 1 - jne .LBB0_686 -.LBB0_187: # in Loop: Header=BB0_26 Depth=1 - vpextrb esi, xmm13, 14 - test sil, 1 - je .LBB0_188 -.LBB0_687: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - mov rcx, qword ptr [rsp + 168] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rcx], 14 - vinserti128 ymm14, ymm14, xmm1, 1 - vpextrb r14d, xmm13, 15 - test r14b, 1 - jne .LBB0_189 - jmp .LBB0_190 - .p2align 4, 0x90 -.LBB0_172: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm13, 1 - mov dword ptr [rsp + 40], eax # 4-byte Spill - test al, 1 - je .LBB0_173 -.LBB0_676: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - mov rax, qword ptr [rsp + 136] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rax], 1 - vinserti128 ymm14, ymm14, xmm1, 1 - vpextrb eax, xmm13, 2 - mov dword ptr [rsp + 36], eax # 4-byte Spill - test al, 1 - jne .LBB0_677 -.LBB0_174: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm13, 3 - mov dword ptr [rsp + 32], eax # 4-byte Spill - test al, 1 - je .LBB0_175 -.LBB0_678: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - vpinsrb xmm1, xmm1, byte ptr [rdi + r15], 3 - vinserti128 ymm14, ymm14, xmm1, 1 - vpextrb eax, xmm13, 4 - mov dword ptr [rsp + 28], eax # 4-byte Spill - test al, 1 - jne .LBB0_679 -.LBB0_176: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm13, 5 - mov dword ptr [rsp + 24], eax # 4-byte Spill - test al, 1 - je .LBB0_177 -.LBB0_680: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - vpinsrb xmm1, xmm1, byte ptr [rdi + r13], 5 - vinserti128 ymm14, ymm14, xmm1, 1 - vpextrb eax, xmm13, 6 - mov dword ptr [rsp + 20], eax # 4-byte Spill - test al, 1 - jne .LBB0_681 -.LBB0_178: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm13, 7 - mov dword ptr [rsp + 316], eax # 4-byte Spill - test al, 1 - je .LBB0_179 -.LBB0_682: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - mov rax, qword ptr [rsp + 240] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rax], 7 - vinserti128 ymm14, ymm14, xmm1, 1 - mov rax, qword ptr [rsp + 216] # 8-byte Reload - vpextrb ebx, xmm13, 8 - test bl, 1 - jne .LBB0_180 - jmp .LBB0_181 - .p2align 4, 0x90 -.LBB0_184: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm13, 11 - test al, 1 - mov qword ptr [rsp + 120], r15 # 8-byte Spill - je .LBB0_185 -.LBB0_684: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - vpinsrb xmm1, xmm1, byte ptr [rdi + rcx], 11 - vinserti128 ymm14, ymm14, xmm1, 1 - vpextrb r15d, xmm13, 12 - test r15b, 1 - mov qword ptr [rsp + 304], r11 # 8-byte Spill - jne .LBB0_685 -.LBB0_186: # in Loop: Header=BB0_26 Depth=1 - vpextrb edx, xmm13, 13 - test dl, 1 - je .LBB0_187 -.LBB0_686: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - mov rcx, qword ptr [rsp + 176] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rcx], 13 - vinserti128 ymm14, ymm14, xmm1, 1 - vpextrb esi, xmm13, 14 - test sil, 1 - jne .LBB0_687 -.LBB0_188: # in Loop: Header=BB0_26 Depth=1 - vpextrb r14d, xmm13, 15 - test r14b, 1 - je .LBB0_190 -.LBB0_189: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - mov rcx, qword ptr [rsp + 160] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rcx], 15 - vinserti128 ymm14, ymm14, xmm1, 1 -.LBB0_190: # in Loop: Header=BB0_26 Depth=1 - vpsrlw ymm1, ymm14, 1 - vpand ymm14, ymm1, ymmword ptr [rip + .LCPI0_4] - vmovd r10d, xmm6 - test r10b, 1 - je .LBB0_191 -# %bb.688: # in Loop: Header=BB0_26 Depth=1 - vmovq rcx, xmm15 - vpextrb byte ptr [r8 + rcx], xmm14, 0 - vpextrb ecx, xmm6, 1 - test cl, 1 - jne .LBB0_689 -.LBB0_192: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 2 - test cl, 1 - je .LBB0_193 -.LBB0_690: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm15, 1 - vmovq rcx, xmm1 - vpextrb byte ptr [r8 + rcx], xmm14, 2 - vpextrb ecx, xmm6, 3 - test cl, 1 - jne .LBB0_691 -.LBB0_194: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 4 - test cl, 1 - je .LBB0_195 -.LBB0_692: # in Loop: Header=BB0_26 Depth=1 - vmovq rcx, xmm5 - vpextrb byte ptr [r8 + rcx], xmm14, 4 - vpextrb ecx, xmm6, 5 - test cl, 1 - jne .LBB0_693 -.LBB0_196: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 6 - test cl, 1 - je .LBB0_197 -.LBB0_694: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm5, 1 - vmovq rcx, xmm1 - vpextrb byte ptr [r8 + rcx], xmm14, 6 - vpextrb ecx, xmm6, 7 - test cl, 1 - jne .LBB0_695 -.LBB0_198: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 8 - test cl, 1 - je .LBB0_199 -.LBB0_696: # in Loop: Header=BB0_26 Depth=1 - vmovq rcx, xmm12 - vpextrb byte ptr [r8 + rcx], xmm14, 8 - vpextrb ecx, xmm6, 9 - test cl, 1 - jne .LBB0_697 -.LBB0_200: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 10 - test cl, 1 - je .LBB0_201 -.LBB0_698: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm12, 1 - vmovq rcx, xmm1 - vpextrb byte ptr [r8 + rcx], xmm14, 10 - vpextrb ecx, xmm6, 11 - test cl, 1 - jne .LBB0_699 -.LBB0_202: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 12 - test cl, 1 - je .LBB0_203 -.LBB0_700: # in Loop: Header=BB0_26 Depth=1 - vmovq rcx, xmm11 - vpextrb byte ptr [r8 + rcx], xmm14, 12 - vpextrb ecx, xmm6, 13 - test cl, 1 - jne .LBB0_701 -.LBB0_204: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 14 - test cl, 1 - je .LBB0_205 -.LBB0_702: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm11, 1 - vmovq rcx, xmm1 - vpextrb byte ptr [r8 + rcx], xmm14, 14 - vpextrb ecx, xmm6, 15 - test cl, 1 - jne .LBB0_703 -.LBB0_206: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 44], 1 # 1-byte Folded Reload - je .LBB0_207 -.LBB0_704: # in Loop: Header=BB0_26 Depth=1 - vmovq rcx, xmm10 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 0 - test byte ptr [rsp + 40], 1 # 1-byte Folded Reload - jne .LBB0_705 -.LBB0_208: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 36], 1 # 1-byte Folded Reload - je .LBB0_209 -.LBB0_706: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm10, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 2 - test byte ptr [rsp + 32], 1 # 1-byte Folded Reload - jne .LBB0_707 -.LBB0_210: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 28], 1 # 1-byte Folded Reload - je .LBB0_211 -.LBB0_708: # in Loop: Header=BB0_26 Depth=1 - vmovq rcx, xmm9 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 4 - test byte ptr [rsp + 24], 1 # 1-byte Folded Reload - jne .LBB0_709 -.LBB0_212: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 20], 1 # 1-byte Folded Reload - je .LBB0_213 -.LBB0_710: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm9, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 6 - test byte ptr [rsp + 316], 1 # 1-byte Folded Reload - jne .LBB0_711 -.LBB0_214: # in Loop: Header=BB0_26 Depth=1 - test bl, 1 - je .LBB0_215 -.LBB0_712: # in Loop: Header=BB0_26 Depth=1 - vmovq rcx, xmm8 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 8 - test r9b, 1 - mov r10, qword ptr [rsp + 224] # 8-byte Reload - mov r11, qword ptr [rsp + 144] # 8-byte Reload - jne .LBB0_713 -.LBB0_216: # in Loop: Header=BB0_26 Depth=1 - test r13b, 1 - mov rbx, qword ptr [rsp + 296] # 8-byte Reload - je .LBB0_217 -.LBB0_714: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm8, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 10 - test al, 1 - mov r9, qword ptr [rsp + 288] # 8-byte Reload - mov rax, qword ptr [rsp + 232] # 8-byte Reload - jne .LBB0_715 -.LBB0_218: # in Loop: Header=BB0_26 Depth=1 - test r15b, 1 - je .LBB0_219 -.LBB0_716: # in Loop: Header=BB0_26 Depth=1 - vmovq rcx, xmm7 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 12 - test dl, 1 - mov r13, qword ptr [rsp + 136] # 8-byte Reload - mov r15, qword ptr [rsp + 128] # 8-byte Reload - jne .LBB0_717 -.LBB0_220: # in Loop: Header=BB0_26 Depth=1 - test sil, 1 - mov rdx, qword ptr [rsp + 304] # 8-byte Reload - je .LBB0_221 -.LBB0_718: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm7, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 14 - test r14b, 1 - mov rsi, qword ptr [rsp + 152] # 8-byte Reload - jne .LBB0_222 - jmp .LBB0_223 - .p2align 4, 0x90 -.LBB0_191: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 1 - test cl, 1 - je .LBB0_192 -.LBB0_689: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm15, 1 - vpextrb byte ptr [r8 + rcx], xmm14, 1 - vpextrb ecx, xmm6, 2 - test cl, 1 - jne .LBB0_690 -.LBB0_193: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 3 - test cl, 1 - je .LBB0_194 -.LBB0_691: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm15, 1 - vpextrq rcx, xmm1, 1 - vpextrb byte ptr [r8 + rcx], xmm14, 3 - vpextrb ecx, xmm6, 4 - test cl, 1 - jne .LBB0_692 -.LBB0_195: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 5 - test cl, 1 - je .LBB0_196 -.LBB0_693: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm5, 1 - vpextrb byte ptr [r8 + rcx], xmm14, 5 - vpextrb ecx, xmm6, 6 - test cl, 1 - jne .LBB0_694 -.LBB0_197: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 7 - test cl, 1 - je .LBB0_198 -.LBB0_695: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm5, 1 - vpextrq rcx, xmm1, 1 - vpextrb byte ptr [r8 + rcx], xmm14, 7 - vpextrb ecx, xmm6, 8 - test cl, 1 - jne .LBB0_696 -.LBB0_199: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 9 - test cl, 1 - je .LBB0_200 -.LBB0_697: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm12, 1 - vpextrb byte ptr [r8 + rcx], xmm14, 9 - vpextrb ecx, xmm6, 10 - test cl, 1 - jne .LBB0_698 -.LBB0_201: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 11 - test cl, 1 - je .LBB0_202 -.LBB0_699: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm12, 1 - vpextrq rcx, xmm1, 1 - vpextrb byte ptr [r8 + rcx], xmm14, 11 - vpextrb ecx, xmm6, 12 - test cl, 1 - jne .LBB0_700 -.LBB0_203: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 13 - test cl, 1 - je .LBB0_204 -.LBB0_701: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm11, 1 - vpextrb byte ptr [r8 + rcx], xmm14, 13 - vpextrb ecx, xmm6, 14 - test cl, 1 - jne .LBB0_702 -.LBB0_205: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 15 - test cl, 1 - je .LBB0_206 -.LBB0_703: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm11, 1 - vpextrq rcx, xmm1, 1 - vpextrb byte ptr [r8 + rcx], xmm14, 15 - test byte ptr [rsp + 44], 1 # 1-byte Folded Reload - jne .LBB0_704 -.LBB0_207: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 40], 1 # 1-byte Folded Reload - je .LBB0_208 -.LBB0_705: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm10, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 1 - test byte ptr [rsp + 36], 1 # 1-byte Folded Reload - jne .LBB0_706 -.LBB0_209: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 32], 1 # 1-byte Folded Reload - je .LBB0_210 -.LBB0_707: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm10, 1 - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 3 - test byte ptr [rsp + 28], 1 # 1-byte Folded Reload - jne .LBB0_708 -.LBB0_211: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 24], 1 # 1-byte Folded Reload - je .LBB0_212 -.LBB0_709: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm9, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 5 - test byte ptr [rsp + 20], 1 # 1-byte Folded Reload - jne .LBB0_710 -.LBB0_213: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 316], 1 # 1-byte Folded Reload - je .LBB0_214 -.LBB0_711: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm9, 1 - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 7 - test bl, 1 - jne .LBB0_712 -.LBB0_215: # in Loop: Header=BB0_26 Depth=1 - test r9b, 1 - mov r10, qword ptr [rsp + 224] # 8-byte Reload - mov r11, qword ptr [rsp + 144] # 8-byte Reload - je .LBB0_216 -.LBB0_713: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm8, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 9 - test r13b, 1 - mov rbx, qword ptr [rsp + 296] # 8-byte Reload - jne .LBB0_714 -.LBB0_217: # in Loop: Header=BB0_26 Depth=1 - test al, 1 - mov r9, qword ptr [rsp + 288] # 8-byte Reload - mov rax, qword ptr [rsp + 232] # 8-byte Reload - je .LBB0_218 -.LBB0_715: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm8, 1 - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 11 - test r15b, 1 - jne .LBB0_716 -.LBB0_219: # in Loop: Header=BB0_26 Depth=1 - test dl, 1 - mov r13, qword ptr [rsp + 136] # 8-byte Reload - mov r15, qword ptr [rsp + 128] # 8-byte Reload - je .LBB0_220 -.LBB0_717: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm7, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 13 - test sil, 1 - mov rdx, qword ptr [rsp + 304] # 8-byte Reload - jne .LBB0_718 -.LBB0_221: # in Loop: Header=BB0_26 Depth=1 - test r14b, 1 - mov rsi, qword ptr [rsp + 152] # 8-byte Reload - je .LBB0_223 -.LBB0_222: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm7, 1 - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 15 -.LBB0_223: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 736] # 32-byte Reload - vpor ymm15, ymm1, ymmword ptr [rsp + 512] # 32-byte Folded Reload - vpor ymm5, ymm1, ymmword ptr [rsp + 480] # 32-byte Folded Reload - vpor ymm10, ymm1, ymmword ptr [rsp + 384] # 32-byte Folded Reload - vpor ymm9, ymm1, ymmword ptr [rsp + 352] # 32-byte Folded Reload - vpor ymm12, ymm1, ymmword ptr [rsp + 448] # 32-byte Folded Reload - vpor ymm11, ymm1, ymmword ptr [rsp + 416] # 32-byte Folded Reload - vpor ymm8, ymm1, ymmword ptr [rsp + 320] # 32-byte Folded Reload - vpor ymm7, ymm4, ymm1 - vperm2i128 ymm1, ymm8, ymm7, 49 # ymm1 = ymm8[2,3],ymm7[2,3] - vinserti128 ymm2, ymm8, xmm7, 1 - vshufps ymm1, ymm2, ymm1, 136 # ymm1 = ymm2[0,2],ymm1[0,2],ymm2[4,6],ymm1[4,6] - vperm2i128 ymm2, ymm12, ymm11, 49 # ymm2 = ymm12[2,3],ymm11[2,3] - vinserti128 ymm3, ymm12, xmm11, 1 - vshufps ymm2, ymm3, ymm2, 136 # ymm2 = ymm3[0,2],ymm2[0,2],ymm3[4,6],ymm2[4,6] - vperm2i128 ymm3, ymm10, ymm9, 49 # ymm3 = ymm10[2,3],ymm9[2,3] - vinserti128 ymm13, ymm10, xmm9, 1 - vshufps ymm3, ymm13, ymm3, 136 # ymm3 = ymm13[0,2],ymm3[0,2],ymm13[4,6],ymm3[4,6] - vperm2i128 ymm13, ymm15, ymm5, 49 # ymm13 = ymm15[2,3],ymm5[2,3] - vinserti128 ymm14, ymm15, xmm5, 1 - vshufps ymm13, ymm14, ymm13, 136 # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6] - vpcmpgtd ymm13, ymm0, ymm13 - vpcmpgtd ymm3, ymm0, ymm3 - vpackssdw ymm3, ymm13, ymm3 - vpcmpgtd ymm2, ymm0, ymm2 - vpcmpgtd ymm1, ymm0, ymm1 - vpackssdw ymm1, ymm2, ymm1 - vpermq ymm2, ymm3, 216 # ymm2 = ymm3[0,2,1,3] - vpermq ymm1, ymm1, 216 # ymm1 = ymm1[0,2,1,3] - vpacksswb ymm1, ymm2, ymm1 - vpand ymm6, ymm1, ymm6 - vmovd ecx, xmm6 - # implicit-def: $ymm14 - test cl, 1 - je .LBB0_224 -# %bb.719: # in Loop: Header=BB0_26 Depth=1 - vpbroadcastb ymm14, byte ptr [rdi + rdx] - vpextrb ecx, xmm6, 1 - test cl, 1 - jne .LBB0_720 -.LBB0_225: # in Loop: Header=BB0_26 Depth=1 - mov rdx, qword ptr [rsp + 104] # 8-byte Reload - vpextrb ecx, xmm6, 2 - test cl, 1 - je .LBB0_227 -.LBB0_226: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rbx], 2 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_227: # in Loop: Header=BB0_26 Depth=1 - mov rsi, qword ptr [rsp + 96] # 8-byte Reload - mov rbx, qword ptr [rsp + 72] # 8-byte Reload - vpextrb ecx, xmm6, 3 - test cl, 1 - je .LBB0_228 -# %bb.721: # in Loop: Header=BB0_26 Depth=1 - mov rcx, qword ptr [rsp + 272] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rcx], 3 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 4 - test cl, 1 - jne .LBB0_722 -.LBB0_229: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 5 - test cl, 1 - je .LBB0_230 -.LBB0_723: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rdx], 5 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 6 - test cl, 1 - jne .LBB0_724 -.LBB0_231: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 7 - test cl, 1 - je .LBB0_232 -.LBB0_725: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 7 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 8 - test cl, 1 - jne .LBB0_726 -.LBB0_233: # in Loop: Header=BB0_26 Depth=1 - mov rdx, qword ptr [rsp + 88] # 8-byte Reload - vpextrb ecx, xmm6, 9 - test cl, 1 - je .LBB0_234 -.LBB0_727: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + r10], 9 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 10 - test cl, 1 - jne .LBB0_728 -.LBB0_235: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 11 - test cl, 1 - je .LBB0_236 -.LBB0_729: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 256] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 11 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 12 - test cl, 1 - jne .LBB0_730 -.LBB0_237: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 13 - test cl, 1 - je .LBB0_239 -.LBB0_238: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rdx], 13 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_239: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 80] # 8-byte Reload - mov rdx, qword ptr [rsp + 64] # 8-byte Reload - vpextrb ecx, xmm6, 14 - test cl, 1 - je .LBB0_241 -# %bb.240: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 14 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_241: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 15 - test cl, 1 - je .LBB0_243 -# %bb.242: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rbx], 15 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_243: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm6, 1 - vmovd eax, xmm1 - mov dword ptr [rsp + 44], eax # 4-byte Spill - test al, 1 - je .LBB0_245 -# %bb.244: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rdx], 0 - vinserti128 ymm14, ymm14, xmm2, 1 -.LBB0_245: # in Loop: Header=BB0_26 Depth=1 - mov rcx, qword ptr [rsp + 56] # 8-byte Reload - vpextrb eax, xmm1, 1 - mov dword ptr [rsp + 40], eax # 4-byte Spill - test al, 1 - je .LBB0_247 -# %bb.246: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + r13], 1 - vinserti128 ymm14, ymm14, xmm2, 1 -.LBB0_247: # in Loop: Header=BB0_26 Depth=1 - mov rdx, qword ptr [rsp + 280] # 8-byte Reload - mov rsi, qword ptr [rsp + 112] # 8-byte Reload - vpextrb eax, xmm1, 2 - mov dword ptr [rsp + 36], eax # 4-byte Spill - test al, 1 - je .LBB0_249 -# %bb.248: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + r15], 2 - vinserti128 ymm14, ymm14, xmm2, 1 -.LBB0_249: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 120] # 8-byte Reload - vpextrb ebx, xmm1, 3 - mov dword ptr [rsp + 32], ebx # 4-byte Spill - test bl, 1 - je .LBB0_250 -# %bb.731: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 3 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb eax, xmm1, 4 - mov dword ptr [rsp + 28], eax # 4-byte Spill - test al, 1 - jne .LBB0_732 -.LBB0_251: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm1, 5 - mov dword ptr [rsp + 24], eax # 4-byte Spill - test al, 1 - je .LBB0_252 -.LBB0_733: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rdx], 5 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb eax, xmm1, 6 - mov dword ptr [rsp + 20], eax # 4-byte Spill - test al, 1 - jne .LBB0_734 -.LBB0_253: # in Loop: Header=BB0_26 Depth=1 - vpextrb r9d, xmm1, 7 - test r9b, 1 - je .LBB0_254 -.LBB0_735: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 240] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 7 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb edx, xmm1, 8 - test dl, 1 - jne .LBB0_736 -.LBB0_255: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm1, 9 - test cl, 1 - je .LBB0_256 -.LBB0_737: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 208] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 9 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb esi, xmm1, 10 - test sil, 1 - jne .LBB0_738 -.LBB0_257: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm1, 11 - test al, 1 - je .LBB0_258 -.LBB0_739: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 192] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 11 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r13d, xmm1, 12 - test r13b, 1 - jne .LBB0_740 -.LBB0_259: # in Loop: Header=BB0_26 Depth=1 - vpextrb r10d, xmm1, 13 - test r10b, 1 - je .LBB0_260 -.LBB0_741: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 176] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 13 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r11d, xmm1, 14 - test r11b, 1 - jne .LBB0_742 -.LBB0_261: # in Loop: Header=BB0_26 Depth=1 - vpextrb r14d, xmm1, 15 - test r14b, 1 - je .LBB0_263 -.LBB0_262: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - mov rbx, qword ptr [rsp + 160] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rbx], 15 - vinserti128 ymm14, ymm14, xmm1, 1 -.LBB0_263: # in Loop: Header=BB0_26 Depth=1 - vpsrlw ymm1, ymm14, 2 - vpand ymm14, ymm1, ymmword ptr [rip + .LCPI0_4] - vmovd r15d, xmm6 - test r15b, 1 - je .LBB0_264 -# %bb.743: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm15 - vpextrb byte ptr [r8 + rbx], xmm14, 0 - vpextrb ebx, xmm6, 1 - test bl, 1 - jne .LBB0_744 -.LBB0_265: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 2 - test bl, 1 - mov r15, qword ptr [rsp + 224] # 8-byte Reload - je .LBB0_266 -.LBB0_745: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm15, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 2 - vpextrb ebx, xmm6, 3 - test bl, 1 - jne .LBB0_746 -.LBB0_267: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 4 - test bl, 1 - je .LBB0_268 -.LBB0_747: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm5 - vpextrb byte ptr [r8 + rbx], xmm14, 4 - vpextrb ebx, xmm6, 5 - test bl, 1 - jne .LBB0_748 -.LBB0_269: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 6 - test bl, 1 - je .LBB0_270 -.LBB0_749: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm5, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 6 - vpextrb ebx, xmm6, 7 - test bl, 1 - jne .LBB0_750 -.LBB0_271: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 8 - test bl, 1 - je .LBB0_272 -.LBB0_751: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm12 - vpextrb byte ptr [r8 + rbx], xmm14, 8 - vpextrb ebx, xmm6, 9 - test bl, 1 - jne .LBB0_752 -.LBB0_273: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 10 - test bl, 1 - je .LBB0_274 -.LBB0_753: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm12, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 10 - vpextrb ebx, xmm6, 11 - test bl, 1 - jne .LBB0_754 -.LBB0_275: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 12 - test bl, 1 - je .LBB0_276 -.LBB0_755: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm11 - vpextrb byte ptr [r8 + rbx], xmm14, 12 - vpextrb ebx, xmm6, 13 - test bl, 1 - jne .LBB0_756 -.LBB0_277: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 14 - test bl, 1 - je .LBB0_278 -.LBB0_757: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm11, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 14 - vpextrb ebx, xmm6, 15 - test bl, 1 - jne .LBB0_758 -.LBB0_279: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 44], 1 # 1-byte Folded Reload - je .LBB0_280 -.LBB0_759: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm10 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 0 - test byte ptr [rsp + 40], 1 # 1-byte Folded Reload - jne .LBB0_760 -.LBB0_281: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 36], 1 # 1-byte Folded Reload - je .LBB0_282 -.LBB0_761: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm10, 1 - vmovq rbx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 2 - test byte ptr [rsp + 32], 1 # 1-byte Folded Reload - jne .LBB0_762 -.LBB0_283: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 28], 1 # 1-byte Folded Reload - je .LBB0_284 -.LBB0_763: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm9 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 4 - test byte ptr [rsp + 24], 1 # 1-byte Folded Reload - jne .LBB0_764 -.LBB0_285: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 20], 1 # 1-byte Folded Reload - je .LBB0_286 -.LBB0_765: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm9, 1 - vmovq rbx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 6 - test r9b, 1 - jne .LBB0_766 -.LBB0_287: # in Loop: Header=BB0_26 Depth=1 - test dl, 1 - mov rbx, qword ptr [rsp + 296] # 8-byte Reload - je .LBB0_288 -.LBB0_767: # in Loop: Header=BB0_26 Depth=1 - vmovq rdx, xmm8 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rdx], xmm1, 8 - test cl, 1 - jne .LBB0_768 -.LBB0_289: # in Loop: Header=BB0_26 Depth=1 - test sil, 1 - mov rdx, qword ptr [rsp + 304] # 8-byte Reload - je .LBB0_290 -.LBB0_769: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm8, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 10 - test al, 1 - mov rsi, qword ptr [rsp + 152] # 8-byte Reload - jne .LBB0_770 -.LBB0_291: # in Loop: Header=BB0_26 Depth=1 - test r13b, 1 - je .LBB0_292 -.LBB0_771: # in Loop: Header=BB0_26 Depth=1 - vmovq rcx, xmm7 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 12 - test r10b, 1 - mov r13, qword ptr [rsp + 280] # 8-byte Reload - jne .LBB0_772 -.LBB0_293: # in Loop: Header=BB0_26 Depth=1 - test r11b, 1 - je .LBB0_294 -.LBB0_773: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm7, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 14 - test r14b, 1 - mov rax, qword ptr [rsp + 288] # 8-byte Reload - mov r9, qword ptr [rsp + 232] # 8-byte Reload - jne .LBB0_295 - jmp .LBB0_296 - .p2align 4, 0x90 -.LBB0_224: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 1 - test cl, 1 - je .LBB0_225 -.LBB0_720: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rsi], 1 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - mov rdx, qword ptr [rsp + 104] # 8-byte Reload - vpextrb ecx, xmm6, 2 - test cl, 1 - jne .LBB0_226 - jmp .LBB0_227 - .p2align 4, 0x90 -.LBB0_228: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 4 - test cl, 1 - je .LBB0_229 -.LBB0_722: # in Loop: Header=BB0_26 Depth=1 - mov rcx, qword ptr [rsp + 264] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rcx], 4 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 5 - test cl, 1 - jne .LBB0_723 -.LBB0_230: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 6 - test cl, 1 - je .LBB0_231 -.LBB0_724: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + r9], 6 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 7 - test cl, 1 - jne .LBB0_725 -.LBB0_232: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 8 - test cl, 1 - je .LBB0_233 -.LBB0_726: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rsi], 8 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - mov rdx, qword ptr [rsp + 88] # 8-byte Reload - vpextrb ecx, xmm6, 9 - test cl, 1 - jne .LBB0_727 -.LBB0_234: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 10 - test cl, 1 - je .LBB0_235 -.LBB0_728: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + r11], 10 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 11 - test cl, 1 - jne .LBB0_729 -.LBB0_236: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 12 - test cl, 1 - je .LBB0_237 -.LBB0_730: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 248] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 12 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 13 - test cl, 1 - jne .LBB0_238 - jmp .LBB0_239 - .p2align 4, 0x90 -.LBB0_250: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm1, 4 - mov dword ptr [rsp + 28], eax # 4-byte Spill - test al, 1 - je .LBB0_251 -.LBB0_732: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rcx], 4 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb eax, xmm1, 5 - mov dword ptr [rsp + 24], eax # 4-byte Spill - test al, 1 - jne .LBB0_733 -.LBB0_252: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm1, 6 - mov dword ptr [rsp + 20], eax # 4-byte Spill - test al, 1 - je .LBB0_253 -.LBB0_734: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rsi], 6 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r9d, xmm1, 7 - test r9b, 1 - jne .LBB0_735 -.LBB0_254: # in Loop: Header=BB0_26 Depth=1 - vpextrb edx, xmm1, 8 - test dl, 1 - je .LBB0_255 -.LBB0_736: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 216] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 8 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb ecx, xmm1, 9 - test cl, 1 - jne .LBB0_737 -.LBB0_256: # in Loop: Header=BB0_26 Depth=1 - vpextrb esi, xmm1, 10 - test sil, 1 - je .LBB0_257 -.LBB0_738: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 200] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 10 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb eax, xmm1, 11 - test al, 1 - jne .LBB0_739 -.LBB0_258: # in Loop: Header=BB0_26 Depth=1 - vpextrb r13d, xmm1, 12 - test r13b, 1 - je .LBB0_259 -.LBB0_740: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 184] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 12 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r10d, xmm1, 13 - test r10b, 1 - jne .LBB0_741 -.LBB0_260: # in Loop: Header=BB0_26 Depth=1 - vpextrb r11d, xmm1, 14 - test r11b, 1 - je .LBB0_261 -.LBB0_742: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 168] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 14 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r14d, xmm1, 15 - test r14b, 1 - jne .LBB0_262 - jmp .LBB0_263 - .p2align 4, 0x90 -.LBB0_264: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 1 - test bl, 1 - je .LBB0_265 -.LBB0_744: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm15, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 1 - vpextrb ebx, xmm6, 2 - test bl, 1 - mov r15, qword ptr [rsp + 224] # 8-byte Reload - jne .LBB0_745 -.LBB0_266: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 3 - test bl, 1 - je .LBB0_267 -.LBB0_746: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm15, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 3 - vpextrb ebx, xmm6, 4 - test bl, 1 - jne .LBB0_747 -.LBB0_268: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 5 - test bl, 1 - je .LBB0_269 -.LBB0_748: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm5, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 5 - vpextrb ebx, xmm6, 6 - test bl, 1 - jne .LBB0_749 -.LBB0_270: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 7 - test bl, 1 - je .LBB0_271 -.LBB0_750: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm5, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 7 - vpextrb ebx, xmm6, 8 - test bl, 1 - jne .LBB0_751 -.LBB0_272: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 9 - test bl, 1 - je .LBB0_273 -.LBB0_752: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm12, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 9 - vpextrb ebx, xmm6, 10 - test bl, 1 - jne .LBB0_753 -.LBB0_274: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 11 - test bl, 1 - je .LBB0_275 -.LBB0_754: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm12, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 11 - vpextrb ebx, xmm6, 12 - test bl, 1 - jne .LBB0_755 -.LBB0_276: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 13 - test bl, 1 - je .LBB0_277 -.LBB0_756: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm11, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 13 - vpextrb ebx, xmm6, 14 - test bl, 1 - jne .LBB0_757 -.LBB0_278: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 15 - test bl, 1 - je .LBB0_279 -.LBB0_758: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm11, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 15 - test byte ptr [rsp + 44], 1 # 1-byte Folded Reload - jne .LBB0_759 -.LBB0_280: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 40], 1 # 1-byte Folded Reload - je .LBB0_281 -.LBB0_760: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm10, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 1 - test byte ptr [rsp + 36], 1 # 1-byte Folded Reload - jne .LBB0_761 -.LBB0_282: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 32], 1 # 1-byte Folded Reload - je .LBB0_283 -.LBB0_762: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm10, 1 - vpextrq rbx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 3 - test byte ptr [rsp + 28], 1 # 1-byte Folded Reload - jne .LBB0_763 -.LBB0_284: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 24], 1 # 1-byte Folded Reload - je .LBB0_285 -.LBB0_764: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm9, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 5 - test byte ptr [rsp + 20], 1 # 1-byte Folded Reload - jne .LBB0_765 -.LBB0_286: # in Loop: Header=BB0_26 Depth=1 - test r9b, 1 - je .LBB0_287 -.LBB0_766: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm9, 1 - vpextrq rbx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 7 - test dl, 1 - mov rbx, qword ptr [rsp + 296] # 8-byte Reload - jne .LBB0_767 -.LBB0_288: # in Loop: Header=BB0_26 Depth=1 - test cl, 1 - je .LBB0_289 -.LBB0_768: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm8, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 9 - test sil, 1 - mov rdx, qword ptr [rsp + 304] # 8-byte Reload - jne .LBB0_769 -.LBB0_290: # in Loop: Header=BB0_26 Depth=1 - test al, 1 - mov rsi, qword ptr [rsp + 152] # 8-byte Reload - je .LBB0_291 -.LBB0_770: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm8, 1 - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 11 - test r13b, 1 - jne .LBB0_771 -.LBB0_292: # in Loop: Header=BB0_26 Depth=1 - test r10b, 1 - mov r13, qword ptr [rsp + 280] # 8-byte Reload - je .LBB0_293 -.LBB0_772: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm7, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 13 - test r11b, 1 - jne .LBB0_773 -.LBB0_294: # in Loop: Header=BB0_26 Depth=1 - test r14b, 1 - mov rax, qword ptr [rsp + 288] # 8-byte Reload - mov r9, qword ptr [rsp + 232] # 8-byte Reload - je .LBB0_296 -.LBB0_295: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm7, 1 - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 15 -.LBB0_296: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 704] # 32-byte Reload - vpor ymm15, ymm1, ymmword ptr [rsp + 512] # 32-byte Folded Reload - vpor ymm5, ymm1, ymmword ptr [rsp + 480] # 32-byte Folded Reload - vpor ymm10, ymm1, ymmword ptr [rsp + 384] # 32-byte Folded Reload - vpor ymm9, ymm1, ymmword ptr [rsp + 352] # 32-byte Folded Reload - vpor ymm12, ymm1, ymmword ptr [rsp + 448] # 32-byte Folded Reload - vpor ymm11, ymm1, ymmword ptr [rsp + 416] # 32-byte Folded Reload - vpor ymm8, ymm1, ymmword ptr [rsp + 320] # 32-byte Folded Reload - vpor ymm7, ymm4, ymm1 - vperm2i128 ymm1, ymm8, ymm7, 49 # ymm1 = ymm8[2,3],ymm7[2,3] - vinserti128 ymm2, ymm8, xmm7, 1 - vshufps ymm1, ymm2, ymm1, 136 # ymm1 = ymm2[0,2],ymm1[0,2],ymm2[4,6],ymm1[4,6] - vperm2i128 ymm2, ymm12, ymm11, 49 # ymm2 = ymm12[2,3],ymm11[2,3] - vinserti128 ymm3, ymm12, xmm11, 1 - vshufps ymm2, ymm3, ymm2, 136 # ymm2 = ymm3[0,2],ymm2[0,2],ymm3[4,6],ymm2[4,6] - vperm2i128 ymm3, ymm10, ymm9, 49 # ymm3 = ymm10[2,3],ymm9[2,3] - vinserti128 ymm13, ymm10, xmm9, 1 - vshufps ymm3, ymm13, ymm3, 136 # ymm3 = ymm13[0,2],ymm3[0,2],ymm13[4,6],ymm3[4,6] - vperm2i128 ymm13, ymm15, ymm5, 49 # ymm13 = ymm15[2,3],ymm5[2,3] - vinserti128 ymm14, ymm15, xmm5, 1 - vshufps ymm13, ymm14, ymm13, 136 # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6] - vpcmpgtd ymm13, ymm0, ymm13 - vpcmpgtd ymm3, ymm0, ymm3 - vpackssdw ymm3, ymm13, ymm3 - vpcmpgtd ymm2, ymm0, ymm2 - vpcmpgtd ymm1, ymm0, ymm1 - vpackssdw ymm1, ymm2, ymm1 - vpermq ymm2, ymm3, 216 # ymm2 = ymm3[0,2,1,3] - vpermq ymm1, ymm1, 216 # ymm1 = ymm1[0,2,1,3] - vpacksswb ymm1, ymm2, ymm1 - vpand ymm6, ymm1, ymm6 - vmovd ecx, xmm6 - # implicit-def: $ymm14 - test cl, 1 - je .LBB0_297 -# %bb.774: # in Loop: Header=BB0_26 Depth=1 - vpbroadcastb ymm14, byte ptr [rdi + rdx] - vpextrb ecx, xmm6, 1 - test cl, 1 - jne .LBB0_775 -.LBB0_298: # in Loop: Header=BB0_26 Depth=1 - mov rdx, qword ptr [rsp + 104] # 8-byte Reload - vpextrb ecx, xmm6, 2 - test cl, 1 - je .LBB0_300 -.LBB0_299: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rbx], 2 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_300: # in Loop: Header=BB0_26 Depth=1 - mov rsi, qword ptr [rsp + 96] # 8-byte Reload - mov r10, qword ptr [rsp + 72] # 8-byte Reload - vpextrb ecx, xmm6, 3 - test cl, 1 - je .LBB0_301 -# %bb.776: # in Loop: Header=BB0_26 Depth=1 - mov rcx, qword ptr [rsp + 272] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rcx], 3 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 4 - test cl, 1 - jne .LBB0_777 -.LBB0_302: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 5 - test cl, 1 - je .LBB0_303 -.LBB0_778: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rdx], 5 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 6 - test cl, 1 - jne .LBB0_779 -.LBB0_304: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 7 - test cl, 1 - je .LBB0_305 -.LBB0_780: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + r9], 7 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 8 - test cl, 1 - jne .LBB0_781 -.LBB0_306: # in Loop: Header=BB0_26 Depth=1 - mov rdx, qword ptr [rsp + 88] # 8-byte Reload - vpextrb ecx, xmm6, 9 - test cl, 1 - je .LBB0_308 -.LBB0_307: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + r15], 9 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_308: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 144] # 8-byte Reload - mov rsi, qword ptr [rsp + 136] # 8-byte Reload - mov rbx, qword ptr [rsp + 128] # 8-byte Reload - mov r9, qword ptr [rsp + 120] # 8-byte Reload - vpextrb ecx, xmm6, 10 - test cl, 1 - je .LBB0_309 -# %bb.782: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 10 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 11 - test cl, 1 - jne .LBB0_783 -.LBB0_310: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 12 - test cl, 1 - je .LBB0_311 -.LBB0_784: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 248] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 12 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 13 - test cl, 1 - jne .LBB0_312 - jmp .LBB0_313 - .p2align 4, 0x90 -.LBB0_297: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 1 - test cl, 1 - je .LBB0_298 -.LBB0_775: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rsi], 1 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - mov rdx, qword ptr [rsp + 104] # 8-byte Reload - vpextrb ecx, xmm6, 2 - test cl, 1 - jne .LBB0_299 - jmp .LBB0_300 - .p2align 4, 0x90 -.LBB0_301: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 4 - test cl, 1 - je .LBB0_302 -.LBB0_777: # in Loop: Header=BB0_26 Depth=1 - mov rcx, qword ptr [rsp + 264] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rcx], 4 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 5 - test cl, 1 - jne .LBB0_778 -.LBB0_303: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 6 - test cl, 1 - je .LBB0_304 -.LBB0_779: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 6 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 7 - test cl, 1 - jne .LBB0_780 -.LBB0_305: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 8 - test cl, 1 - je .LBB0_306 -.LBB0_781: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rsi], 8 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - mov rdx, qword ptr [rsp + 88] # 8-byte Reload - vpextrb ecx, xmm6, 9 - test cl, 1 - jne .LBB0_307 - jmp .LBB0_308 - .p2align 4, 0x90 -.LBB0_309: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 11 - test cl, 1 - je .LBB0_310 -.LBB0_783: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 256] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 11 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 12 - test cl, 1 - jne .LBB0_784 -.LBB0_311: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 13 - test cl, 1 - je .LBB0_313 -.LBB0_312: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rdx], 13 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_313: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 80] # 8-byte Reload - mov rdx, qword ptr [rsp + 64] # 8-byte Reload - vpextrb ecx, xmm6, 14 - test cl, 1 - je .LBB0_315 -# %bb.314: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 14 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_315: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 15 - test cl, 1 - je .LBB0_317 -# %bb.316: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + r10], 15 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_317: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm6, 1 - vmovd eax, xmm1 - mov dword ptr [rsp + 44], eax # 4-byte Spill - test al, 1 - je .LBB0_319 -# %bb.318: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rdx], 0 - vinserti128 ymm14, ymm14, xmm2, 1 -.LBB0_319: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 56] # 8-byte Reload - vpextrb ecx, xmm1, 1 - mov dword ptr [rsp + 40], ecx # 4-byte Spill - test cl, 1 - je .LBB0_320 -# %bb.785: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rsi], 1 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb ecx, xmm1, 2 - mov dword ptr [rsp + 36], ecx # 4-byte Spill - test cl, 1 - jne .LBB0_786 -.LBB0_321: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm1, 3 - mov dword ptr [rsp + 32], ecx # 4-byte Spill - test cl, 1 - je .LBB0_322 -.LBB0_787: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + r9], 3 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb ecx, xmm1, 4 - mov dword ptr [rsp + 28], ecx # 4-byte Spill - test cl, 1 - jne .LBB0_788 -.LBB0_323: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm1, 5 - mov dword ptr [rsp + 24], eax # 4-byte Spill - test al, 1 - je .LBB0_325 -.LBB0_324: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + r13], 5 - vinserti128 ymm14, ymm14, xmm2, 1 -.LBB0_325: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 112] # 8-byte Reload - vpextrb ecx, xmm1, 6 - mov dword ptr [rsp + 20], ecx # 4-byte Spill - test cl, 1 - je .LBB0_326 -# %bb.789: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 6 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r9d, xmm1, 7 - test r9b, 1 - jne .LBB0_790 -.LBB0_327: # in Loop: Header=BB0_26 Depth=1 - vpextrb edx, xmm1, 8 - test dl, 1 - je .LBB0_328 -.LBB0_791: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 216] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 8 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb ecx, xmm1, 9 - test cl, 1 - jne .LBB0_792 -.LBB0_329: # in Loop: Header=BB0_26 Depth=1 - vpextrb esi, xmm1, 10 - test sil, 1 - je .LBB0_330 -.LBB0_793: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 200] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 10 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb eax, xmm1, 11 - test al, 1 - jne .LBB0_794 -.LBB0_331: # in Loop: Header=BB0_26 Depth=1 - vpextrb r13d, xmm1, 12 - test r13b, 1 - je .LBB0_332 -.LBB0_795: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 184] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 12 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r10d, xmm1, 13 - test r10b, 1 - jne .LBB0_796 -.LBB0_333: # in Loop: Header=BB0_26 Depth=1 - vpextrb r11d, xmm1, 14 - test r11b, 1 - je .LBB0_334 -.LBB0_797: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 168] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 14 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r14d, xmm1, 15 - test r14b, 1 - jne .LBB0_335 - jmp .LBB0_336 - .p2align 4, 0x90 -.LBB0_320: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm1, 2 - mov dword ptr [rsp + 36], ecx # 4-byte Spill - test cl, 1 - je .LBB0_321 -.LBB0_786: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 2 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb ecx, xmm1, 3 - mov dword ptr [rsp + 32], ecx # 4-byte Spill - test cl, 1 - jne .LBB0_787 -.LBB0_322: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm1, 4 - mov dword ptr [rsp + 28], ecx # 4-byte Spill - test cl, 1 - je .LBB0_323 -.LBB0_788: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 4 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb eax, xmm1, 5 - mov dword ptr [rsp + 24], eax # 4-byte Spill - test al, 1 - jne .LBB0_324 - jmp .LBB0_325 - .p2align 4, 0x90 -.LBB0_326: # in Loop: Header=BB0_26 Depth=1 - vpextrb r9d, xmm1, 7 - test r9b, 1 - je .LBB0_327 -.LBB0_790: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 240] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 7 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb edx, xmm1, 8 - test dl, 1 - jne .LBB0_791 -.LBB0_328: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm1, 9 - test cl, 1 - je .LBB0_329 -.LBB0_792: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 208] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 9 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb esi, xmm1, 10 - test sil, 1 - jne .LBB0_793 -.LBB0_330: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm1, 11 - test al, 1 - je .LBB0_331 -.LBB0_794: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 192] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 11 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r13d, xmm1, 12 - test r13b, 1 - jne .LBB0_795 -.LBB0_332: # in Loop: Header=BB0_26 Depth=1 - vpextrb r10d, xmm1, 13 - test r10b, 1 - je .LBB0_333 -.LBB0_796: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 176] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 13 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r11d, xmm1, 14 - test r11b, 1 - jne .LBB0_797 -.LBB0_334: # in Loop: Header=BB0_26 Depth=1 - vpextrb r14d, xmm1, 15 - test r14b, 1 - je .LBB0_336 -.LBB0_335: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - mov rbx, qword ptr [rsp + 160] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rbx], 15 - vinserti128 ymm14, ymm14, xmm1, 1 -.LBB0_336: # in Loop: Header=BB0_26 Depth=1 - vpsrlw ymm1, ymm14, 3 - vpand ymm14, ymm1, ymmword ptr [rip + .LCPI0_4] - vmovd r15d, xmm6 - test r15b, 1 - je .LBB0_337 -# %bb.798: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm15 - vpextrb byte ptr [r8 + rbx], xmm14, 0 - vpextrb ebx, xmm6, 1 - test bl, 1 - jne .LBB0_799 -.LBB0_338: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 2 - test bl, 1 - mov r15, qword ptr [rsp + 224] # 8-byte Reload - je .LBB0_339 -.LBB0_800: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm15, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 2 - vpextrb ebx, xmm6, 3 - test bl, 1 - jne .LBB0_801 -.LBB0_340: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 4 - test bl, 1 - je .LBB0_341 -.LBB0_802: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm5 - vpextrb byte ptr [r8 + rbx], xmm14, 4 - vpextrb ebx, xmm6, 5 - test bl, 1 - jne .LBB0_803 -.LBB0_342: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 6 - test bl, 1 - je .LBB0_343 -.LBB0_804: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm5, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 6 - vpextrb ebx, xmm6, 7 - test bl, 1 - jne .LBB0_805 -.LBB0_344: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 8 - test bl, 1 - je .LBB0_345 -.LBB0_806: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm12 - vpextrb byte ptr [r8 + rbx], xmm14, 8 - vpextrb ebx, xmm6, 9 - test bl, 1 - jne .LBB0_807 -.LBB0_346: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 10 - test bl, 1 - je .LBB0_347 -.LBB0_808: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm12, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 10 - vpextrb ebx, xmm6, 11 - test bl, 1 - jne .LBB0_809 -.LBB0_348: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 12 - test bl, 1 - je .LBB0_349 -.LBB0_810: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm11 - vpextrb byte ptr [r8 + rbx], xmm14, 12 - vpextrb ebx, xmm6, 13 - test bl, 1 - jne .LBB0_811 -.LBB0_350: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 14 - test bl, 1 - je .LBB0_351 -.LBB0_812: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm11, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 14 - vpextrb ebx, xmm6, 15 - test bl, 1 - jne .LBB0_813 -.LBB0_352: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 44], 1 # 1-byte Folded Reload - je .LBB0_353 -.LBB0_814: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm10 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 0 - test byte ptr [rsp + 40], 1 # 1-byte Folded Reload - jne .LBB0_815 -.LBB0_354: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 36], 1 # 1-byte Folded Reload - je .LBB0_355 -.LBB0_816: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm10, 1 - vmovq rbx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 2 - test byte ptr [rsp + 32], 1 # 1-byte Folded Reload - jne .LBB0_817 -.LBB0_356: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 28], 1 # 1-byte Folded Reload - je .LBB0_357 -.LBB0_818: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm9 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 4 - test byte ptr [rsp + 24], 1 # 1-byte Folded Reload - jne .LBB0_819 -.LBB0_358: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 20], 1 # 1-byte Folded Reload - je .LBB0_359 -.LBB0_820: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm9, 1 - vmovq rbx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 6 - test r9b, 1 - jne .LBB0_821 -.LBB0_360: # in Loop: Header=BB0_26 Depth=1 - test dl, 1 - mov rbx, qword ptr [rsp + 296] # 8-byte Reload - je .LBB0_361 -.LBB0_822: # in Loop: Header=BB0_26 Depth=1 - vmovq rdx, xmm8 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rdx], xmm1, 8 - test cl, 1 - jne .LBB0_823 -.LBB0_362: # in Loop: Header=BB0_26 Depth=1 - test sil, 1 - mov rdx, qword ptr [rsp + 304] # 8-byte Reload - je .LBB0_363 -.LBB0_824: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm8, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 10 - test al, 1 - mov rsi, qword ptr [rsp + 152] # 8-byte Reload - jne .LBB0_825 -.LBB0_364: # in Loop: Header=BB0_26 Depth=1 - test r13b, 1 - je .LBB0_365 -.LBB0_826: # in Loop: Header=BB0_26 Depth=1 - vmovq rcx, xmm7 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 12 - test r10b, 1 - mov r13, qword ptr [rsp + 280] # 8-byte Reload - jne .LBB0_827 -.LBB0_366: # in Loop: Header=BB0_26 Depth=1 - test r11b, 1 - je .LBB0_367 -.LBB0_828: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm7, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 14 - test r14b, 1 - mov rax, qword ptr [rsp + 288] # 8-byte Reload - mov r9, qword ptr [rsp + 232] # 8-byte Reload - jne .LBB0_368 - jmp .LBB0_369 - .p2align 4, 0x90 -.LBB0_337: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 1 - test bl, 1 - je .LBB0_338 -.LBB0_799: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm15, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 1 - vpextrb ebx, xmm6, 2 - test bl, 1 - mov r15, qword ptr [rsp + 224] # 8-byte Reload - jne .LBB0_800 -.LBB0_339: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 3 - test bl, 1 - je .LBB0_340 -.LBB0_801: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm15, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 3 - vpextrb ebx, xmm6, 4 - test bl, 1 - jne .LBB0_802 -.LBB0_341: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 5 - test bl, 1 - je .LBB0_342 -.LBB0_803: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm5, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 5 - vpextrb ebx, xmm6, 6 - test bl, 1 - jne .LBB0_804 -.LBB0_343: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 7 - test bl, 1 - je .LBB0_344 -.LBB0_805: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm5, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 7 - vpextrb ebx, xmm6, 8 - test bl, 1 - jne .LBB0_806 -.LBB0_345: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 9 - test bl, 1 - je .LBB0_346 -.LBB0_807: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm12, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 9 - vpextrb ebx, xmm6, 10 - test bl, 1 - jne .LBB0_808 -.LBB0_347: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 11 - test bl, 1 - je .LBB0_348 -.LBB0_809: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm12, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 11 - vpextrb ebx, xmm6, 12 - test bl, 1 - jne .LBB0_810 -.LBB0_349: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 13 - test bl, 1 - je .LBB0_350 -.LBB0_811: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm11, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 13 - vpextrb ebx, xmm6, 14 - test bl, 1 - jne .LBB0_812 -.LBB0_351: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 15 - test bl, 1 - je .LBB0_352 -.LBB0_813: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm11, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 15 - test byte ptr [rsp + 44], 1 # 1-byte Folded Reload - jne .LBB0_814 -.LBB0_353: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 40], 1 # 1-byte Folded Reload - je .LBB0_354 -.LBB0_815: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm10, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 1 - test byte ptr [rsp + 36], 1 # 1-byte Folded Reload - jne .LBB0_816 -.LBB0_355: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 32], 1 # 1-byte Folded Reload - je .LBB0_356 -.LBB0_817: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm10, 1 - vpextrq rbx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 3 - test byte ptr [rsp + 28], 1 # 1-byte Folded Reload - jne .LBB0_818 -.LBB0_357: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 24], 1 # 1-byte Folded Reload - je .LBB0_358 -.LBB0_819: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm9, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 5 - test byte ptr [rsp + 20], 1 # 1-byte Folded Reload - jne .LBB0_820 -.LBB0_359: # in Loop: Header=BB0_26 Depth=1 - test r9b, 1 - je .LBB0_360 -.LBB0_821: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm9, 1 - vpextrq rbx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 7 - test dl, 1 - mov rbx, qword ptr [rsp + 296] # 8-byte Reload - jne .LBB0_822 -.LBB0_361: # in Loop: Header=BB0_26 Depth=1 - test cl, 1 - je .LBB0_362 -.LBB0_823: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm8, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 9 - test sil, 1 - mov rdx, qword ptr [rsp + 304] # 8-byte Reload - jne .LBB0_824 -.LBB0_363: # in Loop: Header=BB0_26 Depth=1 - test al, 1 - mov rsi, qword ptr [rsp + 152] # 8-byte Reload - je .LBB0_364 -.LBB0_825: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm8, 1 - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 11 - test r13b, 1 - jne .LBB0_826 -.LBB0_365: # in Loop: Header=BB0_26 Depth=1 - test r10b, 1 - mov r13, qword ptr [rsp + 280] # 8-byte Reload - je .LBB0_366 -.LBB0_827: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm7, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 13 - test r11b, 1 - jne .LBB0_828 -.LBB0_367: # in Loop: Header=BB0_26 Depth=1 - test r14b, 1 - mov rax, qword ptr [rsp + 288] # 8-byte Reload - mov r9, qword ptr [rsp + 232] # 8-byte Reload - je .LBB0_369 -.LBB0_368: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm7, 1 - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 15 -.LBB0_369: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 672] # 32-byte Reload - vpor ymm15, ymm1, ymmword ptr [rsp + 512] # 32-byte Folded Reload - vpor ymm5, ymm1, ymmword ptr [rsp + 480] # 32-byte Folded Reload - vpor ymm10, ymm1, ymmword ptr [rsp + 384] # 32-byte Folded Reload - vpor ymm9, ymm1, ymmword ptr [rsp + 352] # 32-byte Folded Reload - vpor ymm12, ymm1, ymmword ptr [rsp + 448] # 32-byte Folded Reload - vpor ymm11, ymm1, ymmword ptr [rsp + 416] # 32-byte Folded Reload - vpor ymm8, ymm1, ymmword ptr [rsp + 320] # 32-byte Folded Reload - vpor ymm7, ymm4, ymm1 - vperm2i128 ymm1, ymm8, ymm7, 49 # ymm1 = ymm8[2,3],ymm7[2,3] - vinserti128 ymm2, ymm8, xmm7, 1 - vshufps ymm1, ymm2, ymm1, 136 # ymm1 = ymm2[0,2],ymm1[0,2],ymm2[4,6],ymm1[4,6] - vperm2i128 ymm2, ymm12, ymm11, 49 # ymm2 = ymm12[2,3],ymm11[2,3] - vinserti128 ymm3, ymm12, xmm11, 1 - vshufps ymm2, ymm3, ymm2, 136 # ymm2 = ymm3[0,2],ymm2[0,2],ymm3[4,6],ymm2[4,6] - vperm2i128 ymm3, ymm10, ymm9, 49 # ymm3 = ymm10[2,3],ymm9[2,3] - vinserti128 ymm13, ymm10, xmm9, 1 - vshufps ymm3, ymm13, ymm3, 136 # ymm3 = ymm13[0,2],ymm3[0,2],ymm13[4,6],ymm3[4,6] - vperm2i128 ymm13, ymm15, ymm5, 49 # ymm13 = ymm15[2,3],ymm5[2,3] - vinserti128 ymm14, ymm15, xmm5, 1 - vshufps ymm13, ymm14, ymm13, 136 # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6] - vpcmpgtd ymm13, ymm0, ymm13 - vpcmpgtd ymm3, ymm0, ymm3 - vpackssdw ymm3, ymm13, ymm3 - vpcmpgtd ymm2, ymm0, ymm2 - vpcmpgtd ymm1, ymm0, ymm1 - vpackssdw ymm1, ymm2, ymm1 - vpermq ymm2, ymm3, 216 # ymm2 = ymm3[0,2,1,3] - vpermq ymm1, ymm1, 216 # ymm1 = ymm1[0,2,1,3] - vpacksswb ymm1, ymm2, ymm1 - vpand ymm6, ymm1, ymm6 - vmovd ecx, xmm6 - # implicit-def: $ymm14 - test cl, 1 - je .LBB0_370 -# %bb.829: # in Loop: Header=BB0_26 Depth=1 - vpbroadcastb ymm14, byte ptr [rdi + rdx] - vpextrb ecx, xmm6, 1 - test cl, 1 - jne .LBB0_830 -.LBB0_371: # in Loop: Header=BB0_26 Depth=1 - mov rdx, qword ptr [rsp + 104] # 8-byte Reload - vpextrb ecx, xmm6, 2 - test cl, 1 - je .LBB0_373 -.LBB0_372: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rbx], 2 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_373: # in Loop: Header=BB0_26 Depth=1 - mov rsi, qword ptr [rsp + 96] # 8-byte Reload - mov r10, qword ptr [rsp + 72] # 8-byte Reload - vpextrb ecx, xmm6, 3 - test cl, 1 - je .LBB0_374 -# %bb.831: # in Loop: Header=BB0_26 Depth=1 - mov rcx, qword ptr [rsp + 272] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rcx], 3 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 4 - test cl, 1 - jne .LBB0_832 -.LBB0_375: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 5 - test cl, 1 - je .LBB0_376 -.LBB0_833: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rdx], 5 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 6 - test cl, 1 - jne .LBB0_834 -.LBB0_377: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 7 - test cl, 1 - je .LBB0_378 -.LBB0_835: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + r9], 7 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 8 - test cl, 1 - jne .LBB0_836 -.LBB0_379: # in Loop: Header=BB0_26 Depth=1 - mov rdx, qword ptr [rsp + 88] # 8-byte Reload - vpextrb ecx, xmm6, 9 - test cl, 1 - je .LBB0_381 -.LBB0_380: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + r15], 9 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_381: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 144] # 8-byte Reload - mov rsi, qword ptr [rsp + 136] # 8-byte Reload - mov rbx, qword ptr [rsp + 128] # 8-byte Reload - mov r9, qword ptr [rsp + 120] # 8-byte Reload - vpextrb ecx, xmm6, 10 - test cl, 1 - je .LBB0_382 -# %bb.837: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 10 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 11 - test cl, 1 - jne .LBB0_838 -.LBB0_383: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 12 - test cl, 1 - je .LBB0_384 -.LBB0_839: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 248] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 12 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 13 - test cl, 1 - jne .LBB0_385 - jmp .LBB0_386 - .p2align 4, 0x90 -.LBB0_370: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 1 - test cl, 1 - je .LBB0_371 -.LBB0_830: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rsi], 1 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - mov rdx, qword ptr [rsp + 104] # 8-byte Reload - vpextrb ecx, xmm6, 2 - test cl, 1 - jne .LBB0_372 - jmp .LBB0_373 - .p2align 4, 0x90 -.LBB0_374: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 4 - test cl, 1 - je .LBB0_375 -.LBB0_832: # in Loop: Header=BB0_26 Depth=1 - mov rcx, qword ptr [rsp + 264] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rcx], 4 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 5 - test cl, 1 - jne .LBB0_833 -.LBB0_376: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 6 - test cl, 1 - je .LBB0_377 -.LBB0_834: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 6 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 7 - test cl, 1 - jne .LBB0_835 -.LBB0_378: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 8 - test cl, 1 - je .LBB0_379 -.LBB0_836: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rsi], 8 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - mov rdx, qword ptr [rsp + 88] # 8-byte Reload - vpextrb ecx, xmm6, 9 - test cl, 1 - jne .LBB0_380 - jmp .LBB0_381 - .p2align 4, 0x90 -.LBB0_382: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 11 - test cl, 1 - je .LBB0_383 -.LBB0_838: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 256] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 11 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 12 - test cl, 1 - jne .LBB0_839 -.LBB0_384: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 13 - test cl, 1 - je .LBB0_386 -.LBB0_385: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rdx], 13 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_386: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 80] # 8-byte Reload - mov rdx, qword ptr [rsp + 64] # 8-byte Reload - vpextrb ecx, xmm6, 14 - test cl, 1 - je .LBB0_388 -# %bb.387: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 14 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_388: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 15 - test cl, 1 - je .LBB0_390 -# %bb.389: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + r10], 15 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_390: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm6, 1 - vmovd eax, xmm1 - mov dword ptr [rsp + 44], eax # 4-byte Spill - test al, 1 - je .LBB0_392 -# %bb.391: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rdx], 0 - vinserti128 ymm14, ymm14, xmm2, 1 -.LBB0_392: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 56] # 8-byte Reload - vpextrb ecx, xmm1, 1 - mov dword ptr [rsp + 40], ecx # 4-byte Spill - test cl, 1 - je .LBB0_393 -# %bb.840: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rsi], 1 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb ecx, xmm1, 2 - mov dword ptr [rsp + 36], ecx # 4-byte Spill - test cl, 1 - jne .LBB0_841 -.LBB0_394: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm1, 3 - mov dword ptr [rsp + 32], ecx # 4-byte Spill - test cl, 1 - je .LBB0_395 -.LBB0_842: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + r9], 3 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb ecx, xmm1, 4 - mov dword ptr [rsp + 28], ecx # 4-byte Spill - test cl, 1 - jne .LBB0_843 -.LBB0_396: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm1, 5 - mov dword ptr [rsp + 24], eax # 4-byte Spill - test al, 1 - je .LBB0_398 -.LBB0_397: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + r13], 5 - vinserti128 ymm14, ymm14, xmm2, 1 -.LBB0_398: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 112] # 8-byte Reload - vpextrb ecx, xmm1, 6 - mov dword ptr [rsp + 20], ecx # 4-byte Spill - test cl, 1 - je .LBB0_399 -# %bb.844: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 6 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r9d, xmm1, 7 - test r9b, 1 - jne .LBB0_845 -.LBB0_400: # in Loop: Header=BB0_26 Depth=1 - vpextrb edx, xmm1, 8 - test dl, 1 - je .LBB0_401 -.LBB0_846: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 216] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 8 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb ecx, xmm1, 9 - test cl, 1 - jne .LBB0_847 -.LBB0_402: # in Loop: Header=BB0_26 Depth=1 - vpextrb esi, xmm1, 10 - test sil, 1 - je .LBB0_403 -.LBB0_848: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 200] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 10 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb eax, xmm1, 11 - test al, 1 - jne .LBB0_849 -.LBB0_404: # in Loop: Header=BB0_26 Depth=1 - vpextrb r13d, xmm1, 12 - test r13b, 1 - je .LBB0_405 -.LBB0_850: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 184] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 12 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r10d, xmm1, 13 - test r10b, 1 - jne .LBB0_851 -.LBB0_406: # in Loop: Header=BB0_26 Depth=1 - vpextrb r11d, xmm1, 14 - test r11b, 1 - je .LBB0_407 -.LBB0_852: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 168] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 14 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r14d, xmm1, 15 - test r14b, 1 - jne .LBB0_408 - jmp .LBB0_409 - .p2align 4, 0x90 -.LBB0_393: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm1, 2 - mov dword ptr [rsp + 36], ecx # 4-byte Spill - test cl, 1 - je .LBB0_394 -.LBB0_841: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 2 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb ecx, xmm1, 3 - mov dword ptr [rsp + 32], ecx # 4-byte Spill - test cl, 1 - jne .LBB0_842 -.LBB0_395: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm1, 4 - mov dword ptr [rsp + 28], ecx # 4-byte Spill - test cl, 1 - je .LBB0_396 -.LBB0_843: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 4 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb eax, xmm1, 5 - mov dword ptr [rsp + 24], eax # 4-byte Spill - test al, 1 - jne .LBB0_397 - jmp .LBB0_398 - .p2align 4, 0x90 -.LBB0_399: # in Loop: Header=BB0_26 Depth=1 - vpextrb r9d, xmm1, 7 - test r9b, 1 - je .LBB0_400 -.LBB0_845: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 240] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 7 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb edx, xmm1, 8 - test dl, 1 - jne .LBB0_846 -.LBB0_401: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm1, 9 - test cl, 1 - je .LBB0_402 -.LBB0_847: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 208] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 9 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb esi, xmm1, 10 - test sil, 1 - jne .LBB0_848 -.LBB0_403: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm1, 11 - test al, 1 - je .LBB0_404 -.LBB0_849: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 192] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 11 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r13d, xmm1, 12 - test r13b, 1 - jne .LBB0_850 -.LBB0_405: # in Loop: Header=BB0_26 Depth=1 - vpextrb r10d, xmm1, 13 - test r10b, 1 - je .LBB0_406 -.LBB0_851: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 176] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 13 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r11d, xmm1, 14 - test r11b, 1 - jne .LBB0_852 -.LBB0_407: # in Loop: Header=BB0_26 Depth=1 - vpextrb r14d, xmm1, 15 - test r14b, 1 - je .LBB0_409 -.LBB0_408: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - mov rbx, qword ptr [rsp + 160] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rbx], 15 - vinserti128 ymm14, ymm14, xmm1, 1 -.LBB0_409: # in Loop: Header=BB0_26 Depth=1 - vpsrlw ymm1, ymm14, 4 - vpand ymm14, ymm1, ymmword ptr [rip + .LCPI0_4] - vmovd r15d, xmm6 - test r15b, 1 - je .LBB0_410 -# %bb.853: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm15 - vpextrb byte ptr [r8 + rbx], xmm14, 0 - vpextrb ebx, xmm6, 1 - test bl, 1 - jne .LBB0_854 -.LBB0_411: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 2 - test bl, 1 - mov r15, qword ptr [rsp + 224] # 8-byte Reload - je .LBB0_412 -.LBB0_855: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm15, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 2 - vpextrb ebx, xmm6, 3 - test bl, 1 - jne .LBB0_856 -.LBB0_413: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 4 - test bl, 1 - je .LBB0_414 -.LBB0_857: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm5 - vpextrb byte ptr [r8 + rbx], xmm14, 4 - vpextrb ebx, xmm6, 5 - test bl, 1 - jne .LBB0_858 -.LBB0_415: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 6 - test bl, 1 - je .LBB0_416 -.LBB0_859: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm5, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 6 - vpextrb ebx, xmm6, 7 - test bl, 1 - jne .LBB0_860 -.LBB0_417: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 8 - test bl, 1 - je .LBB0_418 -.LBB0_861: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm12 - vpextrb byte ptr [r8 + rbx], xmm14, 8 - vpextrb ebx, xmm6, 9 - test bl, 1 - jne .LBB0_862 -.LBB0_419: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 10 - test bl, 1 - je .LBB0_420 -.LBB0_863: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm12, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 10 - vpextrb ebx, xmm6, 11 - test bl, 1 - jne .LBB0_864 -.LBB0_421: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 12 - test bl, 1 - je .LBB0_422 -.LBB0_865: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm11 - vpextrb byte ptr [r8 + rbx], xmm14, 12 - vpextrb ebx, xmm6, 13 - test bl, 1 - jne .LBB0_866 -.LBB0_423: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 14 - test bl, 1 - je .LBB0_424 -.LBB0_867: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm11, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 14 - vpextrb ebx, xmm6, 15 - test bl, 1 - jne .LBB0_868 -.LBB0_425: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 44], 1 # 1-byte Folded Reload - je .LBB0_426 -.LBB0_869: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm10 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 0 - test byte ptr [rsp + 40], 1 # 1-byte Folded Reload - jne .LBB0_870 -.LBB0_427: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 36], 1 # 1-byte Folded Reload - je .LBB0_428 -.LBB0_871: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm10, 1 - vmovq rbx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 2 - test byte ptr [rsp + 32], 1 # 1-byte Folded Reload - jne .LBB0_872 -.LBB0_429: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 28], 1 # 1-byte Folded Reload - je .LBB0_430 -.LBB0_873: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm9 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 4 - test byte ptr [rsp + 24], 1 # 1-byte Folded Reload - jne .LBB0_874 -.LBB0_431: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 20], 1 # 1-byte Folded Reload - je .LBB0_432 -.LBB0_875: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm9, 1 - vmovq rbx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 6 - test r9b, 1 - jne .LBB0_876 -.LBB0_433: # in Loop: Header=BB0_26 Depth=1 - test dl, 1 - mov rbx, qword ptr [rsp + 296] # 8-byte Reload - je .LBB0_434 -.LBB0_877: # in Loop: Header=BB0_26 Depth=1 - vmovq rdx, xmm8 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rdx], xmm1, 8 - test cl, 1 - jne .LBB0_878 -.LBB0_435: # in Loop: Header=BB0_26 Depth=1 - test sil, 1 - mov rdx, qword ptr [rsp + 304] # 8-byte Reload - je .LBB0_436 -.LBB0_879: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm8, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 10 - test al, 1 - mov rsi, qword ptr [rsp + 152] # 8-byte Reload - jne .LBB0_880 -.LBB0_437: # in Loop: Header=BB0_26 Depth=1 - test r13b, 1 - je .LBB0_438 -.LBB0_881: # in Loop: Header=BB0_26 Depth=1 - vmovq rcx, xmm7 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 12 - test r10b, 1 - mov r13, qword ptr [rsp + 280] # 8-byte Reload - jne .LBB0_882 -.LBB0_439: # in Loop: Header=BB0_26 Depth=1 - test r11b, 1 - je .LBB0_440 -.LBB0_883: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm7, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 14 - test r14b, 1 - mov rax, qword ptr [rsp + 288] # 8-byte Reload - mov r9, qword ptr [rsp + 232] # 8-byte Reload - jne .LBB0_441 - jmp .LBB0_442 - .p2align 4, 0x90 -.LBB0_410: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 1 - test bl, 1 - je .LBB0_411 -.LBB0_854: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm15, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 1 - vpextrb ebx, xmm6, 2 - test bl, 1 - mov r15, qword ptr [rsp + 224] # 8-byte Reload - jne .LBB0_855 -.LBB0_412: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 3 - test bl, 1 - je .LBB0_413 -.LBB0_856: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm15, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 3 - vpextrb ebx, xmm6, 4 - test bl, 1 - jne .LBB0_857 -.LBB0_414: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 5 - test bl, 1 - je .LBB0_415 -.LBB0_858: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm5, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 5 - vpextrb ebx, xmm6, 6 - test bl, 1 - jne .LBB0_859 -.LBB0_416: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 7 - test bl, 1 - je .LBB0_417 -.LBB0_860: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm5, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 7 - vpextrb ebx, xmm6, 8 - test bl, 1 - jne .LBB0_861 -.LBB0_418: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 9 - test bl, 1 - je .LBB0_419 -.LBB0_862: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm12, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 9 - vpextrb ebx, xmm6, 10 - test bl, 1 - jne .LBB0_863 -.LBB0_420: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 11 - test bl, 1 - je .LBB0_421 -.LBB0_864: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm12, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 11 - vpextrb ebx, xmm6, 12 - test bl, 1 - jne .LBB0_865 -.LBB0_422: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 13 - test bl, 1 - je .LBB0_423 -.LBB0_866: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm11, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 13 - vpextrb ebx, xmm6, 14 - test bl, 1 - jne .LBB0_867 -.LBB0_424: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 15 - test bl, 1 - je .LBB0_425 -.LBB0_868: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm11, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 15 - test byte ptr [rsp + 44], 1 # 1-byte Folded Reload - jne .LBB0_869 -.LBB0_426: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 40], 1 # 1-byte Folded Reload - je .LBB0_427 -.LBB0_870: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm10, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 1 - test byte ptr [rsp + 36], 1 # 1-byte Folded Reload - jne .LBB0_871 -.LBB0_428: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 32], 1 # 1-byte Folded Reload - je .LBB0_429 -.LBB0_872: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm10, 1 - vpextrq rbx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 3 - test byte ptr [rsp + 28], 1 # 1-byte Folded Reload - jne .LBB0_873 -.LBB0_430: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 24], 1 # 1-byte Folded Reload - je .LBB0_431 -.LBB0_874: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm9, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 5 - test byte ptr [rsp + 20], 1 # 1-byte Folded Reload - jne .LBB0_875 -.LBB0_432: # in Loop: Header=BB0_26 Depth=1 - test r9b, 1 - je .LBB0_433 -.LBB0_876: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm9, 1 - vpextrq rbx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 7 - test dl, 1 - mov rbx, qword ptr [rsp + 296] # 8-byte Reload - jne .LBB0_877 -.LBB0_434: # in Loop: Header=BB0_26 Depth=1 - test cl, 1 - je .LBB0_435 -.LBB0_878: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm8, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 9 - test sil, 1 - mov rdx, qword ptr [rsp + 304] # 8-byte Reload - jne .LBB0_879 -.LBB0_436: # in Loop: Header=BB0_26 Depth=1 - test al, 1 - mov rsi, qword ptr [rsp + 152] # 8-byte Reload - je .LBB0_437 -.LBB0_880: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm8, 1 - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 11 - test r13b, 1 - jne .LBB0_881 -.LBB0_438: # in Loop: Header=BB0_26 Depth=1 - test r10b, 1 - mov r13, qword ptr [rsp + 280] # 8-byte Reload - je .LBB0_439 -.LBB0_882: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm7, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 13 - test r11b, 1 - jne .LBB0_883 -.LBB0_440: # in Loop: Header=BB0_26 Depth=1 - test r14b, 1 - mov rax, qword ptr [rsp + 288] # 8-byte Reload - mov r9, qword ptr [rsp + 232] # 8-byte Reload - je .LBB0_442 -.LBB0_441: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm7, 1 - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 15 -.LBB0_442: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 640] # 32-byte Reload - vpor ymm15, ymm1, ymmword ptr [rsp + 512] # 32-byte Folded Reload - vpor ymm5, ymm1, ymmword ptr [rsp + 480] # 32-byte Folded Reload - vpor ymm10, ymm1, ymmword ptr [rsp + 384] # 32-byte Folded Reload - vpor ymm9, ymm1, ymmword ptr [rsp + 352] # 32-byte Folded Reload - vpor ymm12, ymm1, ymmword ptr [rsp + 448] # 32-byte Folded Reload - vpor ymm11, ymm1, ymmword ptr [rsp + 416] # 32-byte Folded Reload - vpor ymm8, ymm1, ymmword ptr [rsp + 320] # 32-byte Folded Reload - vpor ymm7, ymm4, ymm1 - vperm2i128 ymm1, ymm8, ymm7, 49 # ymm1 = ymm8[2,3],ymm7[2,3] - vinserti128 ymm2, ymm8, xmm7, 1 - vshufps ymm1, ymm2, ymm1, 136 # ymm1 = ymm2[0,2],ymm1[0,2],ymm2[4,6],ymm1[4,6] - vperm2i128 ymm2, ymm12, ymm11, 49 # ymm2 = ymm12[2,3],ymm11[2,3] - vinserti128 ymm3, ymm12, xmm11, 1 - vshufps ymm2, ymm3, ymm2, 136 # ymm2 = ymm3[0,2],ymm2[0,2],ymm3[4,6],ymm2[4,6] - vperm2i128 ymm3, ymm10, ymm9, 49 # ymm3 = ymm10[2,3],ymm9[2,3] - vinserti128 ymm13, ymm10, xmm9, 1 - vshufps ymm3, ymm13, ymm3, 136 # ymm3 = ymm13[0,2],ymm3[0,2],ymm13[4,6],ymm3[4,6] - vperm2i128 ymm13, ymm15, ymm5, 49 # ymm13 = ymm15[2,3],ymm5[2,3] - vinserti128 ymm14, ymm15, xmm5, 1 - vshufps ymm13, ymm14, ymm13, 136 # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6] - vpcmpgtd ymm13, ymm0, ymm13 - vpcmpgtd ymm3, ymm0, ymm3 - vpackssdw ymm3, ymm13, ymm3 - vpcmpgtd ymm2, ymm0, ymm2 - vpcmpgtd ymm1, ymm0, ymm1 - vpackssdw ymm1, ymm2, ymm1 - vpermq ymm2, ymm3, 216 # ymm2 = ymm3[0,2,1,3] - vpermq ymm1, ymm1, 216 # ymm1 = ymm1[0,2,1,3] - vpacksswb ymm1, ymm2, ymm1 - vpand ymm6, ymm1, ymm6 - vmovd ecx, xmm6 - # implicit-def: $ymm14 - test cl, 1 - je .LBB0_443 -# %bb.884: # in Loop: Header=BB0_26 Depth=1 - vpbroadcastb ymm14, byte ptr [rdi + rdx] - vpextrb ecx, xmm6, 1 - test cl, 1 - jne .LBB0_885 -.LBB0_444: # in Loop: Header=BB0_26 Depth=1 - mov rdx, qword ptr [rsp + 104] # 8-byte Reload - vpextrb ecx, xmm6, 2 - test cl, 1 - je .LBB0_446 -.LBB0_445: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rbx], 2 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_446: # in Loop: Header=BB0_26 Depth=1 - mov rsi, qword ptr [rsp + 96] # 8-byte Reload - mov r10, qword ptr [rsp + 72] # 8-byte Reload - vpextrb ecx, xmm6, 3 - test cl, 1 - je .LBB0_447 -# %bb.886: # in Loop: Header=BB0_26 Depth=1 - mov rcx, qword ptr [rsp + 272] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rcx], 3 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 4 - test cl, 1 - jne .LBB0_887 -.LBB0_448: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 5 - test cl, 1 - je .LBB0_449 -.LBB0_888: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rdx], 5 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 6 - test cl, 1 - jne .LBB0_889 -.LBB0_450: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 7 - test cl, 1 - je .LBB0_451 -.LBB0_890: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + r9], 7 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 8 - test cl, 1 - jne .LBB0_891 -.LBB0_452: # in Loop: Header=BB0_26 Depth=1 - mov rdx, qword ptr [rsp + 88] # 8-byte Reload - vpextrb ecx, xmm6, 9 - test cl, 1 - je .LBB0_454 -.LBB0_453: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + r15], 9 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_454: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 144] # 8-byte Reload - mov rsi, qword ptr [rsp + 136] # 8-byte Reload - mov rbx, qword ptr [rsp + 128] # 8-byte Reload - mov r9, qword ptr [rsp + 120] # 8-byte Reload - vpextrb ecx, xmm6, 10 - test cl, 1 - je .LBB0_455 -# %bb.892: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 10 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 11 - test cl, 1 - jne .LBB0_893 -.LBB0_456: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 12 - test cl, 1 - je .LBB0_457 -.LBB0_894: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 248] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 12 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 13 - test cl, 1 - jne .LBB0_458 - jmp .LBB0_459 - .p2align 4, 0x90 -.LBB0_443: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 1 - test cl, 1 - je .LBB0_444 -.LBB0_885: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rsi], 1 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - mov rdx, qword ptr [rsp + 104] # 8-byte Reload - vpextrb ecx, xmm6, 2 - test cl, 1 - jne .LBB0_445 - jmp .LBB0_446 - .p2align 4, 0x90 -.LBB0_447: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 4 - test cl, 1 - je .LBB0_448 -.LBB0_887: # in Loop: Header=BB0_26 Depth=1 - mov rcx, qword ptr [rsp + 264] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rcx], 4 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 5 - test cl, 1 - jne .LBB0_888 -.LBB0_449: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 6 - test cl, 1 - je .LBB0_450 -.LBB0_889: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 6 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 7 - test cl, 1 - jne .LBB0_890 -.LBB0_451: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 8 - test cl, 1 - je .LBB0_452 -.LBB0_891: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rsi], 8 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - mov rdx, qword ptr [rsp + 88] # 8-byte Reload - vpextrb ecx, xmm6, 9 - test cl, 1 - jne .LBB0_453 - jmp .LBB0_454 - .p2align 4, 0x90 -.LBB0_455: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 11 - test cl, 1 - je .LBB0_456 -.LBB0_893: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 256] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 11 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 12 - test cl, 1 - jne .LBB0_894 -.LBB0_457: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 13 - test cl, 1 - je .LBB0_459 -.LBB0_458: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rdx], 13 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_459: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 80] # 8-byte Reload - mov rdx, qword ptr [rsp + 64] # 8-byte Reload - vpextrb ecx, xmm6, 14 - test cl, 1 - je .LBB0_461 -# %bb.460: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 14 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_461: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 15 - test cl, 1 - je .LBB0_463 -# %bb.462: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + r10], 15 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_463: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm6, 1 - vmovd eax, xmm1 - mov dword ptr [rsp + 44], eax # 4-byte Spill - test al, 1 - je .LBB0_465 -# %bb.464: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rdx], 0 - vinserti128 ymm14, ymm14, xmm2, 1 -.LBB0_465: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 56] # 8-byte Reload - vpextrb ecx, xmm1, 1 - mov dword ptr [rsp + 40], ecx # 4-byte Spill - test cl, 1 - je .LBB0_466 -# %bb.895: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rsi], 1 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb ecx, xmm1, 2 - mov dword ptr [rsp + 36], ecx # 4-byte Spill - test cl, 1 - jne .LBB0_896 -.LBB0_467: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm1, 3 - mov dword ptr [rsp + 32], ecx # 4-byte Spill - test cl, 1 - je .LBB0_468 -.LBB0_897: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + r9], 3 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb ecx, xmm1, 4 - mov dword ptr [rsp + 28], ecx # 4-byte Spill - test cl, 1 - jne .LBB0_898 -.LBB0_469: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm1, 5 - mov dword ptr [rsp + 24], eax # 4-byte Spill - test al, 1 - je .LBB0_471 -.LBB0_470: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + r13], 5 - vinserti128 ymm14, ymm14, xmm2, 1 -.LBB0_471: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 112] # 8-byte Reload - vpextrb ecx, xmm1, 6 - mov dword ptr [rsp + 20], ecx # 4-byte Spill - test cl, 1 - je .LBB0_472 -# %bb.899: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 6 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r9d, xmm1, 7 - test r9b, 1 - jne .LBB0_900 -.LBB0_473: # in Loop: Header=BB0_26 Depth=1 - vpextrb edx, xmm1, 8 - test dl, 1 - je .LBB0_474 -.LBB0_901: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 216] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 8 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb ecx, xmm1, 9 - test cl, 1 - jne .LBB0_902 -.LBB0_475: # in Loop: Header=BB0_26 Depth=1 - vpextrb esi, xmm1, 10 - test sil, 1 - je .LBB0_476 -.LBB0_903: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 200] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 10 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb eax, xmm1, 11 - test al, 1 - jne .LBB0_904 -.LBB0_477: # in Loop: Header=BB0_26 Depth=1 - vpextrb r13d, xmm1, 12 - test r13b, 1 - je .LBB0_478 -.LBB0_905: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 184] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 12 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r10d, xmm1, 13 - test r10b, 1 - jne .LBB0_906 -.LBB0_479: # in Loop: Header=BB0_26 Depth=1 - vpextrb r11d, xmm1, 14 - test r11b, 1 - je .LBB0_480 -.LBB0_907: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 168] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 14 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r14d, xmm1, 15 - test r14b, 1 - jne .LBB0_481 - jmp .LBB0_482 - .p2align 4, 0x90 -.LBB0_466: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm1, 2 - mov dword ptr [rsp + 36], ecx # 4-byte Spill - test cl, 1 - je .LBB0_467 -.LBB0_896: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 2 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb ecx, xmm1, 3 - mov dword ptr [rsp + 32], ecx # 4-byte Spill - test cl, 1 - jne .LBB0_897 -.LBB0_468: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm1, 4 - mov dword ptr [rsp + 28], ecx # 4-byte Spill - test cl, 1 - je .LBB0_469 -.LBB0_898: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 4 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb eax, xmm1, 5 - mov dword ptr [rsp + 24], eax # 4-byte Spill - test al, 1 - jne .LBB0_470 - jmp .LBB0_471 - .p2align 4, 0x90 -.LBB0_472: # in Loop: Header=BB0_26 Depth=1 - vpextrb r9d, xmm1, 7 - test r9b, 1 - je .LBB0_473 -.LBB0_900: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 240] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 7 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb edx, xmm1, 8 - test dl, 1 - jne .LBB0_901 -.LBB0_474: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm1, 9 - test cl, 1 - je .LBB0_475 -.LBB0_902: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 208] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 9 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb esi, xmm1, 10 - test sil, 1 - jne .LBB0_903 -.LBB0_476: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm1, 11 - test al, 1 - je .LBB0_477 -.LBB0_904: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 192] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 11 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r13d, xmm1, 12 - test r13b, 1 - jne .LBB0_905 -.LBB0_478: # in Loop: Header=BB0_26 Depth=1 - vpextrb r10d, xmm1, 13 - test r10b, 1 - je .LBB0_479 -.LBB0_906: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 176] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 13 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r11d, xmm1, 14 - test r11b, 1 - jne .LBB0_907 -.LBB0_480: # in Loop: Header=BB0_26 Depth=1 - vpextrb r14d, xmm1, 15 - test r14b, 1 - je .LBB0_482 -.LBB0_481: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - mov rbx, qword ptr [rsp + 160] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rbx], 15 - vinserti128 ymm14, ymm14, xmm1, 1 -.LBB0_482: # in Loop: Header=BB0_26 Depth=1 - vpsrlw ymm1, ymm14, 5 - vpand ymm14, ymm1, ymmword ptr [rip + .LCPI0_4] - vmovd r15d, xmm6 - test r15b, 1 - je .LBB0_483 -# %bb.908: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm15 - vpextrb byte ptr [r8 + rbx], xmm14, 0 - vpextrb ebx, xmm6, 1 - test bl, 1 - jne .LBB0_909 -.LBB0_484: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 2 - test bl, 1 - mov r15, qword ptr [rsp + 224] # 8-byte Reload - je .LBB0_485 -.LBB0_910: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm15, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 2 - vpextrb ebx, xmm6, 3 - test bl, 1 - jne .LBB0_911 -.LBB0_486: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 4 - test bl, 1 - je .LBB0_487 -.LBB0_912: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm5 - vpextrb byte ptr [r8 + rbx], xmm14, 4 - vpextrb ebx, xmm6, 5 - test bl, 1 - jne .LBB0_913 -.LBB0_488: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 6 - test bl, 1 - je .LBB0_489 -.LBB0_914: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm5, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 6 - vpextrb ebx, xmm6, 7 - test bl, 1 - jne .LBB0_915 -.LBB0_490: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 8 - test bl, 1 - je .LBB0_491 -.LBB0_916: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm12 - vpextrb byte ptr [r8 + rbx], xmm14, 8 - vpextrb ebx, xmm6, 9 - test bl, 1 - jne .LBB0_917 -.LBB0_492: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 10 - test bl, 1 - je .LBB0_493 -.LBB0_918: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm12, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 10 - vpextrb ebx, xmm6, 11 - test bl, 1 - jne .LBB0_919 -.LBB0_494: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 12 - test bl, 1 - je .LBB0_495 -.LBB0_920: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm11 - vpextrb byte ptr [r8 + rbx], xmm14, 12 - vpextrb ebx, xmm6, 13 - test bl, 1 - jne .LBB0_921 -.LBB0_496: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 14 - test bl, 1 - je .LBB0_497 -.LBB0_922: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm11, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 14 - vpextrb ebx, xmm6, 15 - test bl, 1 - jne .LBB0_923 -.LBB0_498: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 44], 1 # 1-byte Folded Reload - je .LBB0_499 -.LBB0_924: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm10 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 0 - test byte ptr [rsp + 40], 1 # 1-byte Folded Reload - jne .LBB0_925 -.LBB0_500: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 36], 1 # 1-byte Folded Reload - je .LBB0_501 -.LBB0_926: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm10, 1 - vmovq rbx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 2 - test byte ptr [rsp + 32], 1 # 1-byte Folded Reload - jne .LBB0_927 -.LBB0_502: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 28], 1 # 1-byte Folded Reload - je .LBB0_503 -.LBB0_928: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm9 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 4 - test byte ptr [rsp + 24], 1 # 1-byte Folded Reload - jne .LBB0_929 -.LBB0_504: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 20], 1 # 1-byte Folded Reload - je .LBB0_505 -.LBB0_930: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm9, 1 - vmovq rbx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 6 - test r9b, 1 - jne .LBB0_931 -.LBB0_506: # in Loop: Header=BB0_26 Depth=1 - test dl, 1 - mov rbx, qword ptr [rsp + 296] # 8-byte Reload - je .LBB0_507 -.LBB0_932: # in Loop: Header=BB0_26 Depth=1 - vmovq rdx, xmm8 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rdx], xmm1, 8 - test cl, 1 - jne .LBB0_933 -.LBB0_508: # in Loop: Header=BB0_26 Depth=1 - test sil, 1 - mov rdx, qword ptr [rsp + 304] # 8-byte Reload - je .LBB0_509 -.LBB0_934: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm8, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 10 - test al, 1 - mov rsi, qword ptr [rsp + 152] # 8-byte Reload - jne .LBB0_935 -.LBB0_510: # in Loop: Header=BB0_26 Depth=1 - test r13b, 1 - je .LBB0_511 -.LBB0_936: # in Loop: Header=BB0_26 Depth=1 - vmovq rcx, xmm7 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 12 - test r10b, 1 - mov r13, qword ptr [rsp + 280] # 8-byte Reload - jne .LBB0_937 -.LBB0_512: # in Loop: Header=BB0_26 Depth=1 - test r11b, 1 - je .LBB0_513 -.LBB0_938: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm7, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 14 - test r14b, 1 - mov rax, qword ptr [rsp + 288] # 8-byte Reload - mov r9, qword ptr [rsp + 232] # 8-byte Reload - jne .LBB0_514 - jmp .LBB0_515 - .p2align 4, 0x90 -.LBB0_483: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 1 - test bl, 1 - je .LBB0_484 -.LBB0_909: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm15, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 1 - vpextrb ebx, xmm6, 2 - test bl, 1 - mov r15, qword ptr [rsp + 224] # 8-byte Reload - jne .LBB0_910 -.LBB0_485: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 3 - test bl, 1 - je .LBB0_486 -.LBB0_911: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm15, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 3 - vpextrb ebx, xmm6, 4 - test bl, 1 - jne .LBB0_912 -.LBB0_487: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 5 - test bl, 1 - je .LBB0_488 -.LBB0_913: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm5, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 5 - vpextrb ebx, xmm6, 6 - test bl, 1 - jne .LBB0_914 -.LBB0_489: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 7 - test bl, 1 - je .LBB0_490 -.LBB0_915: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm5, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 7 - vpextrb ebx, xmm6, 8 - test bl, 1 - jne .LBB0_916 -.LBB0_491: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 9 - test bl, 1 - je .LBB0_492 -.LBB0_917: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm12, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 9 - vpextrb ebx, xmm6, 10 - test bl, 1 - jne .LBB0_918 -.LBB0_493: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 11 - test bl, 1 - je .LBB0_494 -.LBB0_919: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm12, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 11 - vpextrb ebx, xmm6, 12 - test bl, 1 - jne .LBB0_920 -.LBB0_495: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 13 - test bl, 1 - je .LBB0_496 -.LBB0_921: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm11, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 13 - vpextrb ebx, xmm6, 14 - test bl, 1 - jne .LBB0_922 -.LBB0_497: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 15 - test bl, 1 - je .LBB0_498 -.LBB0_923: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm11, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 15 - test byte ptr [rsp + 44], 1 # 1-byte Folded Reload - jne .LBB0_924 -.LBB0_499: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 40], 1 # 1-byte Folded Reload - je .LBB0_500 -.LBB0_925: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm10, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 1 - test byte ptr [rsp + 36], 1 # 1-byte Folded Reload - jne .LBB0_926 -.LBB0_501: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 32], 1 # 1-byte Folded Reload - je .LBB0_502 -.LBB0_927: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm10, 1 - vpextrq rbx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 3 - test byte ptr [rsp + 28], 1 # 1-byte Folded Reload - jne .LBB0_928 -.LBB0_503: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 24], 1 # 1-byte Folded Reload - je .LBB0_504 -.LBB0_929: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm9, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 5 - test byte ptr [rsp + 20], 1 # 1-byte Folded Reload - jne .LBB0_930 -.LBB0_505: # in Loop: Header=BB0_26 Depth=1 - test r9b, 1 - je .LBB0_506 -.LBB0_931: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm9, 1 - vpextrq rbx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 7 - test dl, 1 - mov rbx, qword ptr [rsp + 296] # 8-byte Reload - jne .LBB0_932 -.LBB0_507: # in Loop: Header=BB0_26 Depth=1 - test cl, 1 - je .LBB0_508 -.LBB0_933: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm8, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 9 - test sil, 1 - mov rdx, qword ptr [rsp + 304] # 8-byte Reload - jne .LBB0_934 -.LBB0_509: # in Loop: Header=BB0_26 Depth=1 - test al, 1 - mov rsi, qword ptr [rsp + 152] # 8-byte Reload - je .LBB0_510 -.LBB0_935: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm8, 1 - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 11 - test r13b, 1 - jne .LBB0_936 -.LBB0_511: # in Loop: Header=BB0_26 Depth=1 - test r10b, 1 - mov r13, qword ptr [rsp + 280] # 8-byte Reload - je .LBB0_512 -.LBB0_937: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm7, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 13 - test r11b, 1 - jne .LBB0_938 -.LBB0_513: # in Loop: Header=BB0_26 Depth=1 - test r14b, 1 - mov rax, qword ptr [rsp + 288] # 8-byte Reload - mov r9, qword ptr [rsp + 232] # 8-byte Reload - je .LBB0_515 -.LBB0_514: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm7, 1 - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 15 -.LBB0_515: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 608] # 32-byte Reload - vpor ymm15, ymm1, ymmword ptr [rsp + 512] # 32-byte Folded Reload - vpor ymm5, ymm1, ymmword ptr [rsp + 480] # 32-byte Folded Reload - vpor ymm10, ymm1, ymmword ptr [rsp + 384] # 32-byte Folded Reload - vpor ymm9, ymm1, ymmword ptr [rsp + 352] # 32-byte Folded Reload - vpor ymm12, ymm1, ymmword ptr [rsp + 448] # 32-byte Folded Reload - vpor ymm11, ymm1, ymmword ptr [rsp + 416] # 32-byte Folded Reload - vpor ymm8, ymm1, ymmword ptr [rsp + 320] # 32-byte Folded Reload - vpor ymm7, ymm4, ymm1 - vperm2i128 ymm1, ymm8, ymm7, 49 # ymm1 = ymm8[2,3],ymm7[2,3] - vinserti128 ymm2, ymm8, xmm7, 1 - vshufps ymm1, ymm2, ymm1, 136 # ymm1 = ymm2[0,2],ymm1[0,2],ymm2[4,6],ymm1[4,6] - vperm2i128 ymm2, ymm12, ymm11, 49 # ymm2 = ymm12[2,3],ymm11[2,3] - vinserti128 ymm3, ymm12, xmm11, 1 - vshufps ymm2, ymm3, ymm2, 136 # ymm2 = ymm3[0,2],ymm2[0,2],ymm3[4,6],ymm2[4,6] - vperm2i128 ymm3, ymm10, ymm9, 49 # ymm3 = ymm10[2,3],ymm9[2,3] - vinserti128 ymm13, ymm10, xmm9, 1 - vshufps ymm3, ymm13, ymm3, 136 # ymm3 = ymm13[0,2],ymm3[0,2],ymm13[4,6],ymm3[4,6] - vperm2i128 ymm13, ymm15, ymm5, 49 # ymm13 = ymm15[2,3],ymm5[2,3] - vinserti128 ymm14, ymm15, xmm5, 1 - vshufps ymm13, ymm14, ymm13, 136 # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6] - vpcmpgtd ymm13, ymm0, ymm13 - vpcmpgtd ymm3, ymm0, ymm3 - vpackssdw ymm3, ymm13, ymm3 - vpcmpgtd ymm2, ymm0, ymm2 - vpcmpgtd ymm1, ymm0, ymm1 - vpackssdw ymm1, ymm2, ymm1 - vpermq ymm2, ymm3, 216 # ymm2 = ymm3[0,2,1,3] - vpermq ymm1, ymm1, 216 # ymm1 = ymm1[0,2,1,3] - vpacksswb ymm1, ymm2, ymm1 - vpand ymm6, ymm1, ymm6 - vmovd ecx, xmm6 - # implicit-def: $ymm14 - test cl, 1 - je .LBB0_516 -# %bb.939: # in Loop: Header=BB0_26 Depth=1 - vpbroadcastb ymm14, byte ptr [rdi + rdx] - vpextrb ecx, xmm6, 1 - test cl, 1 - jne .LBB0_940 -.LBB0_517: # in Loop: Header=BB0_26 Depth=1 - mov rdx, qword ptr [rsp + 104] # 8-byte Reload - vpextrb ecx, xmm6, 2 - test cl, 1 - je .LBB0_519 -.LBB0_518: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rbx], 2 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_519: # in Loop: Header=BB0_26 Depth=1 - mov rsi, qword ptr [rsp + 96] # 8-byte Reload - mov r10, qword ptr [rsp + 72] # 8-byte Reload - vpextrb ecx, xmm6, 3 - test cl, 1 - je .LBB0_520 -# %bb.941: # in Loop: Header=BB0_26 Depth=1 - mov rcx, qword ptr [rsp + 272] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rcx], 3 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 4 - test cl, 1 - jne .LBB0_942 -.LBB0_521: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 5 - test cl, 1 - je .LBB0_522 -.LBB0_943: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rdx], 5 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 6 - test cl, 1 - jne .LBB0_944 -.LBB0_523: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 7 - test cl, 1 - je .LBB0_524 -.LBB0_945: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + r9], 7 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 8 - test cl, 1 - jne .LBB0_946 -.LBB0_525: # in Loop: Header=BB0_26 Depth=1 - mov rdx, qword ptr [rsp + 88] # 8-byte Reload - vpextrb ecx, xmm6, 9 - test cl, 1 - je .LBB0_527 -.LBB0_526: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + r15], 9 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_527: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 144] # 8-byte Reload - mov rsi, qword ptr [rsp + 136] # 8-byte Reload - mov rbx, qword ptr [rsp + 128] # 8-byte Reload - mov r9, qword ptr [rsp + 120] # 8-byte Reload - vpextrb ecx, xmm6, 10 - test cl, 1 - je .LBB0_528 -# %bb.947: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 10 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 11 - test cl, 1 - jne .LBB0_948 -.LBB0_529: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 12 - test cl, 1 - je .LBB0_530 -.LBB0_949: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 248] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 12 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 13 - test cl, 1 - jne .LBB0_531 - jmp .LBB0_532 - .p2align 4, 0x90 -.LBB0_516: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 1 - test cl, 1 - je .LBB0_517 -.LBB0_940: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rsi], 1 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - mov rdx, qword ptr [rsp + 104] # 8-byte Reload - vpextrb ecx, xmm6, 2 - test cl, 1 - jne .LBB0_518 - jmp .LBB0_519 - .p2align 4, 0x90 -.LBB0_520: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 4 - test cl, 1 - je .LBB0_521 -.LBB0_942: # in Loop: Header=BB0_26 Depth=1 - mov rcx, qword ptr [rsp + 264] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rcx], 4 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 5 - test cl, 1 - jne .LBB0_943 -.LBB0_522: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 6 - test cl, 1 - je .LBB0_523 -.LBB0_944: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 6 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 7 - test cl, 1 - jne .LBB0_945 -.LBB0_524: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 8 - test cl, 1 - je .LBB0_525 -.LBB0_946: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rsi], 8 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - mov rdx, qword ptr [rsp + 88] # 8-byte Reload - vpextrb ecx, xmm6, 9 - test cl, 1 - jne .LBB0_526 - jmp .LBB0_527 - .p2align 4, 0x90 -.LBB0_528: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 11 - test cl, 1 - je .LBB0_529 -.LBB0_948: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 256] # 8-byte Reload - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 11 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] - vpextrb ecx, xmm6, 12 - test cl, 1 - jne .LBB0_949 -.LBB0_530: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 13 - test cl, 1 - je .LBB0_532 -.LBB0_531: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rdx], 13 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_532: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 80] # 8-byte Reload - mov rdx, qword ptr [rsp + 64] # 8-byte Reload - vpextrb ecx, xmm6, 14 - test cl, 1 - je .LBB0_534 -# %bb.533: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + rax], 14 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_534: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 15 - test cl, 1 - je .LBB0_536 -# %bb.535: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm1, xmm14, byte ptr [rdi + r10], 15 - vpblendd ymm14, ymm14, ymm1, 15 # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7] -.LBB0_536: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm6, 1 - vmovd eax, xmm1 - mov dword ptr [rsp + 44], eax # 4-byte Spill - test al, 1 - je .LBB0_538 -# %bb.537: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rdx], 0 - vinserti128 ymm14, ymm14, xmm2, 1 -.LBB0_538: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 56] # 8-byte Reload - vpextrb ecx, xmm1, 1 - mov dword ptr [rsp + 40], ecx # 4-byte Spill - test cl, 1 - je .LBB0_539 -# %bb.950: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rsi], 1 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb ecx, xmm1, 2 - mov dword ptr [rsp + 36], ecx # 4-byte Spill - test cl, 1 - jne .LBB0_951 -.LBB0_540: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm1, 3 - mov dword ptr [rsp + 32], ecx # 4-byte Spill - test cl, 1 - je .LBB0_541 -.LBB0_952: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + r9], 3 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb ecx, xmm1, 4 - mov dword ptr [rsp + 28], ecx # 4-byte Spill - test cl, 1 - jne .LBB0_953 -.LBB0_542: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm1, 5 - mov dword ptr [rsp + 24], eax # 4-byte Spill - test al, 1 - je .LBB0_544 -.LBB0_543: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + r13], 5 - vinserti128 ymm14, ymm14, xmm2, 1 -.LBB0_544: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 112] # 8-byte Reload - vpextrb ecx, xmm1, 6 - mov dword ptr [rsp + 20], ecx # 4-byte Spill - test cl, 1 - je .LBB0_545 -# %bb.954: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 6 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r9d, xmm1, 7 - test r9b, 1 - jne .LBB0_955 -.LBB0_546: # in Loop: Header=BB0_26 Depth=1 - vpextrb edx, xmm1, 8 - test dl, 1 - je .LBB0_547 -.LBB0_956: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 216] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 8 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb ecx, xmm1, 9 - test cl, 1 - jne .LBB0_957 -.LBB0_548: # in Loop: Header=BB0_26 Depth=1 - vpextrb esi, xmm1, 10 - test sil, 1 - je .LBB0_549 -.LBB0_958: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 200] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 10 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb eax, xmm1, 11 - test al, 1 - jne .LBB0_959 -.LBB0_550: # in Loop: Header=BB0_26 Depth=1 - vpextrb r13d, xmm1, 12 - test r13b, 1 - je .LBB0_551 -.LBB0_960: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 184] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 12 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r10d, xmm1, 13 - test r10b, 1 - jne .LBB0_961 -.LBB0_552: # in Loop: Header=BB0_26 Depth=1 - vpextrb r11d, xmm1, 14 - test r11b, 1 - je .LBB0_553 -.LBB0_962: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 168] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 14 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r14d, xmm1, 15 - test r14b, 1 - jne .LBB0_554 - jmp .LBB0_555 - .p2align 4, 0x90 -.LBB0_539: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm1, 2 - mov dword ptr [rsp + 36], ecx # 4-byte Spill - test cl, 1 - je .LBB0_540 -.LBB0_951: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 2 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb ecx, xmm1, 3 - mov dword ptr [rsp + 32], ecx # 4-byte Spill - test cl, 1 - jne .LBB0_952 -.LBB0_541: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm1, 4 - mov dword ptr [rsp + 28], ecx # 4-byte Spill - test cl, 1 - je .LBB0_542 -.LBB0_953: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 4 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb eax, xmm1, 5 - mov dword ptr [rsp + 24], eax # 4-byte Spill - test al, 1 - jne .LBB0_543 - jmp .LBB0_544 - .p2align 4, 0x90 -.LBB0_545: # in Loop: Header=BB0_26 Depth=1 - vpextrb r9d, xmm1, 7 - test r9b, 1 - je .LBB0_546 -.LBB0_955: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 240] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 7 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb edx, xmm1, 8 - test dl, 1 - jne .LBB0_956 -.LBB0_547: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm1, 9 - test cl, 1 - je .LBB0_548 -.LBB0_957: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rax, qword ptr [rsp + 208] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rax], 9 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb esi, xmm1, 10 - test sil, 1 - jne .LBB0_958 -.LBB0_549: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm1, 11 - test al, 1 - je .LBB0_550 -.LBB0_959: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 192] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 11 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r13d, xmm1, 12 - test r13b, 1 - jne .LBB0_960 -.LBB0_551: # in Loop: Header=BB0_26 Depth=1 - vpextrb r10d, xmm1, 13 - test r10b, 1 - je .LBB0_552 -.LBB0_961: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm2, ymm14, 1 - mov rbx, qword ptr [rsp + 176] # 8-byte Reload - vpinsrb xmm2, xmm2, byte ptr [rdi + rbx], 13 - vinserti128 ymm14, ymm14, xmm2, 1 - vpextrb r11d, xmm1, 14 - test r11b, 1 - jne .LBB0_962 -.LBB0_553: # in Loop: Header=BB0_26 Depth=1 - vpextrb r14d, xmm1, 15 - test r14b, 1 - je .LBB0_555 -.LBB0_554: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm14, 1 - mov rbx, qword ptr [rsp + 160] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rbx], 15 - vinserti128 ymm14, ymm14, xmm1, 1 -.LBB0_555: # in Loop: Header=BB0_26 Depth=1 - vpsrlw ymm1, ymm14, 6 - vpand ymm14, ymm1, ymmword ptr [rip + .LCPI0_4] - vmovd r15d, xmm6 - test r15b, 1 - je .LBB0_556 -# %bb.963: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm15 - vpextrb byte ptr [r8 + rbx], xmm14, 0 - vpextrb ebx, xmm6, 1 - test bl, 1 - jne .LBB0_964 -.LBB0_557: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 2 - test bl, 1 - mov r15, qword ptr [rsp + 224] # 8-byte Reload - je .LBB0_558 -.LBB0_965: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm15, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 2 - vpextrb ebx, xmm6, 3 - test bl, 1 - jne .LBB0_966 -.LBB0_559: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 4 - test bl, 1 - je .LBB0_560 -.LBB0_967: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm5 - vpextrb byte ptr [r8 + rbx], xmm14, 4 - vpextrb ebx, xmm6, 5 - test bl, 1 - jne .LBB0_968 -.LBB0_561: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 6 - test bl, 1 - je .LBB0_562 -.LBB0_969: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm5, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 6 - vpextrb ebx, xmm6, 7 - test bl, 1 - jne .LBB0_970 -.LBB0_563: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 8 - test bl, 1 - je .LBB0_564 -.LBB0_971: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm12 - vpextrb byte ptr [r8 + rbx], xmm14, 8 - vpextrb ebx, xmm6, 9 - test bl, 1 - jne .LBB0_972 -.LBB0_565: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 10 - test bl, 1 - je .LBB0_566 -.LBB0_973: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm12, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 10 - vpextrb ebx, xmm6, 11 - test bl, 1 - jne .LBB0_974 -.LBB0_567: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 12 - test bl, 1 - je .LBB0_568 -.LBB0_975: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm11 - vpextrb byte ptr [r8 + rbx], xmm14, 12 - vpextrb ebx, xmm6, 13 - test bl, 1 - jne .LBB0_976 -.LBB0_569: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 14 - test bl, 1 - je .LBB0_570 -.LBB0_977: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm11, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm14, 14 - vpextrb ebx, xmm6, 15 - test bl, 1 - jne .LBB0_978 -.LBB0_571: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 44], 1 # 1-byte Folded Reload - je .LBB0_572 -.LBB0_979: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm10 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 0 - test byte ptr [rsp + 40], 1 # 1-byte Folded Reload - jne .LBB0_980 -.LBB0_573: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 36], 1 # 1-byte Folded Reload - je .LBB0_574 -.LBB0_981: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm10, 1 - vmovq rbx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 2 - test byte ptr [rsp + 32], 1 # 1-byte Folded Reload - jne .LBB0_982 -.LBB0_575: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 28], 1 # 1-byte Folded Reload - je .LBB0_576 -.LBB0_983: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm9 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 4 - test byte ptr [rsp + 24], 1 # 1-byte Folded Reload - jne .LBB0_984 -.LBB0_577: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 20], 1 # 1-byte Folded Reload - je .LBB0_578 -.LBB0_985: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm9, 1 - vmovq rbx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 6 - test r9b, 1 - jne .LBB0_986 -.LBB0_579: # in Loop: Header=BB0_26 Depth=1 - test dl, 1 - mov rbx, qword ptr [rsp + 296] # 8-byte Reload - je .LBB0_580 -.LBB0_987: # in Loop: Header=BB0_26 Depth=1 - vmovq rdx, xmm8 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rdx], xmm1, 8 - test cl, 1 - jne .LBB0_988 -.LBB0_581: # in Loop: Header=BB0_26 Depth=1 - test sil, 1 - mov rdx, qword ptr [rsp + 304] # 8-byte Reload - je .LBB0_582 -.LBB0_989: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm8, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 10 - test al, 1 - mov rsi, qword ptr [rsp + 152] # 8-byte Reload - jne .LBB0_990 -.LBB0_583: # in Loop: Header=BB0_26 Depth=1 - test r13b, 1 - je .LBB0_584 -.LBB0_991: # in Loop: Header=BB0_26 Depth=1 - vmovq rcx, xmm7 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 12 - test r10b, 1 - mov r13, qword ptr [rsp + 280] # 8-byte Reload - jne .LBB0_992 -.LBB0_585: # in Loop: Header=BB0_26 Depth=1 - test r11b, 1 - je .LBB0_586 -.LBB0_993: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm7, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 14 - test r14b, 1 - mov rax, qword ptr [rsp + 288] # 8-byte Reload - mov r9, qword ptr [rsp + 232] # 8-byte Reload - jne .LBB0_587 - jmp .LBB0_588 - .p2align 4, 0x90 -.LBB0_556: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 1 - test bl, 1 - je .LBB0_557 -.LBB0_964: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm15, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 1 - vpextrb ebx, xmm6, 2 - test bl, 1 - mov r15, qword ptr [rsp + 224] # 8-byte Reload - jne .LBB0_965 -.LBB0_558: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 3 - test bl, 1 - je .LBB0_559 -.LBB0_966: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm15, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 3 - vpextrb ebx, xmm6, 4 - test bl, 1 - jne .LBB0_967 -.LBB0_560: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 5 - test bl, 1 - je .LBB0_561 -.LBB0_968: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm5, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 5 - vpextrb ebx, xmm6, 6 - test bl, 1 - jne .LBB0_969 -.LBB0_562: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 7 - test bl, 1 - je .LBB0_563 -.LBB0_970: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm5, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 7 - vpextrb ebx, xmm6, 8 - test bl, 1 - jne .LBB0_971 -.LBB0_564: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 9 - test bl, 1 - je .LBB0_565 -.LBB0_972: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm12, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 9 - vpextrb ebx, xmm6, 10 - test bl, 1 - jne .LBB0_973 -.LBB0_566: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 11 - test bl, 1 - je .LBB0_567 -.LBB0_974: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm12, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 11 - vpextrb ebx, xmm6, 12 - test bl, 1 - jne .LBB0_975 -.LBB0_568: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 13 - test bl, 1 - je .LBB0_569 -.LBB0_976: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm11, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 13 - vpextrb ebx, xmm6, 14 - test bl, 1 - jne .LBB0_977 -.LBB0_570: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm6, 15 - test bl, 1 - je .LBB0_571 -.LBB0_978: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm11, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm14, 15 - test byte ptr [rsp + 44], 1 # 1-byte Folded Reload - jne .LBB0_979 -.LBB0_572: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 40], 1 # 1-byte Folded Reload - je .LBB0_573 -.LBB0_980: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm10, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 1 - test byte ptr [rsp + 36], 1 # 1-byte Folded Reload - jne .LBB0_981 -.LBB0_574: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 32], 1 # 1-byte Folded Reload - je .LBB0_575 -.LBB0_982: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm10, 1 - vpextrq rbx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 3 - test byte ptr [rsp + 28], 1 # 1-byte Folded Reload - jne .LBB0_983 -.LBB0_576: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 24], 1 # 1-byte Folded Reload - je .LBB0_577 -.LBB0_984: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm9, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 5 - test byte ptr [rsp + 20], 1 # 1-byte Folded Reload - jne .LBB0_985 -.LBB0_578: # in Loop: Header=BB0_26 Depth=1 - test r9b, 1 - je .LBB0_579 -.LBB0_986: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm9, 1 - vpextrq rbx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 7 - test dl, 1 - mov rbx, qword ptr [rsp + 296] # 8-byte Reload - jne .LBB0_987 -.LBB0_580: # in Loop: Header=BB0_26 Depth=1 - test cl, 1 - je .LBB0_581 -.LBB0_988: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm8, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 9 - test sil, 1 - mov rdx, qword ptr [rsp + 304] # 8-byte Reload - jne .LBB0_989 -.LBB0_582: # in Loop: Header=BB0_26 Depth=1 - test al, 1 - mov rsi, qword ptr [rsp + 152] # 8-byte Reload - je .LBB0_583 -.LBB0_990: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm8, 1 - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 11 - test r13b, 1 - jne .LBB0_991 -.LBB0_584: # in Loop: Header=BB0_26 Depth=1 - test r10b, 1 - mov r13, qword ptr [rsp + 280] # 8-byte Reload - je .LBB0_585 -.LBB0_992: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm7, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 13 - test r11b, 1 - jne .LBB0_993 -.LBB0_586: # in Loop: Header=BB0_26 Depth=1 - test r14b, 1 - mov rax, qword ptr [rsp + 288] # 8-byte Reload - mov r9, qword ptr [rsp + 232] # 8-byte Reload - je .LBB0_588 -.LBB0_587: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm7, 1 - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm14, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 15 -.LBB0_588: # in Loop: Header=BB0_26 Depth=1 - vmovdqa ymm1, ymmword ptr [rsp + 576] # 32-byte Reload - vpor ymm11, ymm1, ymmword ptr [rsp + 512] # 32-byte Folded Reload - vpor ymm10, ymm1, ymmword ptr [rsp + 480] # 32-byte Folded Reload - vpor ymm8, ymm1, ymmword ptr [rsp + 384] # 32-byte Folded Reload - vpor ymm7, ymm1, ymmword ptr [rsp + 352] # 32-byte Folded Reload - vpor ymm9, ymm1, ymmword ptr [rsp + 448] # 32-byte Folded Reload - vpor ymm5, ymm1, ymmword ptr [rsp + 416] # 32-byte Folded Reload - vpor ymm2, ymm1, ymmword ptr [rsp + 320] # 32-byte Folded Reload - vpor ymm15, ymm4, ymm1 - vperm2i128 ymm3, ymm2, ymm15, 49 # ymm3 = ymm2[2,3],ymm15[2,3] - vinserti128 ymm4, ymm2, xmm15, 1 - vshufps ymm3, ymm4, ymm3, 136 # ymm3 = ymm4[0,2],ymm3[0,2],ymm4[4,6],ymm3[4,6] - vperm2i128 ymm4, ymm9, ymm5, 49 # ymm4 = ymm9[2,3],ymm5[2,3] - vinserti128 ymm12, ymm9, xmm5, 1 - vshufps ymm4, ymm12, ymm4, 136 # ymm4 = ymm12[0,2],ymm4[0,2],ymm12[4,6],ymm4[4,6] - vperm2i128 ymm12, ymm8, ymm7, 49 # ymm12 = ymm8[2,3],ymm7[2,3] - vinserti128 ymm13, ymm8, xmm7, 1 - vshufps ymm12, ymm13, ymm12, 136 # ymm12 = ymm13[0,2],ymm12[0,2],ymm13[4,6],ymm12[4,6] - vperm2i128 ymm13, ymm11, ymm10, 49 # ymm13 = ymm11[2,3],ymm10[2,3] - vinserti128 ymm14, ymm11, xmm10, 1 - vshufps ymm13, ymm14, ymm13, 136 # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6] - vpcmpgtd ymm13, ymm0, ymm13 - vpcmpgtd ymm12, ymm0, ymm12 - vpackssdw ymm12, ymm13, ymm12 - vpermq ymm12, ymm12, 216 # ymm12 = ymm12[0,2,1,3] - vpcmpgtd ymm4, ymm0, ymm4 - vpcmpgtd ymm3, ymm0, ymm3 - vpackssdw ymm3, ymm4, ymm3 - vpermq ymm3, ymm3, 216 # ymm3 = ymm3[0,2,1,3] - vpacksswb ymm3, ymm12, ymm3 - vpand ymm3, ymm3, ymm6 - vmovd ecx, xmm3 - # implicit-def: $ymm4 - test cl, 1 - je .LBB0_589 -# %bb.994: # in Loop: Header=BB0_26 Depth=1 - vpbroadcastb ymm4, byte ptr [rdi + rdx] - vpextrb ecx, xmm3, 1 - test cl, 1 - jne .LBB0_995 -.LBB0_590: # in Loop: Header=BB0_26 Depth=1 - mov rdx, qword ptr [rsp + 104] # 8-byte Reload - vpextrb ecx, xmm3, 2 - test cl, 1 - je .LBB0_592 -.LBB0_591: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm6, xmm4, byte ptr [rdi + rbx], 2 - vpblendd ymm4, ymm4, ymm6, 15 # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7] -.LBB0_592: # in Loop: Header=BB0_26 Depth=1 - mov rsi, qword ptr [rsp + 96] # 8-byte Reload - mov r10, qword ptr [rsp + 72] # 8-byte Reload - vpextrb ecx, xmm3, 3 - test cl, 1 - je .LBB0_593 -# %bb.996: # in Loop: Header=BB0_26 Depth=1 - mov rcx, qword ptr [rsp + 272] # 8-byte Reload - vpinsrb xmm6, xmm4, byte ptr [rdi + rcx], 3 - vpblendd ymm4, ymm4, ymm6, 15 # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7] - vpextrb ecx, xmm3, 4 - test cl, 1 - jne .LBB0_997 -.LBB0_594: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm3, 5 - test cl, 1 - je .LBB0_595 -.LBB0_998: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm6, xmm4, byte ptr [rdi + rdx], 5 - vpblendd ymm4, ymm4, ymm6, 15 # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7] - vpextrb ecx, xmm3, 6 - test cl, 1 - jne .LBB0_999 -.LBB0_596: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm3, 7 - test cl, 1 - je .LBB0_597 -.LBB0_1000: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm6, xmm4, byte ptr [rdi + r9], 7 - vpblendd ymm4, ymm4, ymm6, 15 # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7] - vpextrb ecx, xmm3, 8 - test cl, 1 - jne .LBB0_1001 -.LBB0_598: # in Loop: Header=BB0_26 Depth=1 - mov rdx, qword ptr [rsp + 88] # 8-byte Reload - vpextrb ecx, xmm3, 9 - test cl, 1 - je .LBB0_600 -.LBB0_599: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm6, xmm4, byte ptr [rdi + r15], 9 - vpblendd ymm4, ymm4, ymm6, 15 # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7] -.LBB0_600: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 144] # 8-byte Reload - mov rsi, qword ptr [rsp + 136] # 8-byte Reload - mov rbx, qword ptr [rsp + 128] # 8-byte Reload - mov r9, qword ptr [rsp + 120] # 8-byte Reload - vpextrb ecx, xmm3, 10 - test cl, 1 - je .LBB0_601 -# %bb.1002: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm6, xmm4, byte ptr [rdi + rax], 10 - vpblendd ymm4, ymm4, ymm6, 15 # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7] - vpextrb ecx, xmm3, 11 - test cl, 1 - jne .LBB0_1003 -.LBB0_602: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm3, 12 - test cl, 1 - je .LBB0_603 -.LBB0_1004: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 248] # 8-byte Reload - vpinsrb xmm6, xmm4, byte ptr [rdi + rax], 12 - vpblendd ymm4, ymm4, ymm6, 15 # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7] - vpextrb ecx, xmm3, 13 - test cl, 1 - jne .LBB0_604 - jmp .LBB0_605 - .p2align 4, 0x90 -.LBB0_589: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm3, 1 - test cl, 1 - je .LBB0_590 -.LBB0_995: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm6, xmm4, byte ptr [rdi + rsi], 1 - vpblendd ymm4, ymm4, ymm6, 15 # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7] - mov rdx, qword ptr [rsp + 104] # 8-byte Reload - vpextrb ecx, xmm3, 2 - test cl, 1 - jne .LBB0_591 - jmp .LBB0_592 - .p2align 4, 0x90 -.LBB0_593: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm3, 4 - test cl, 1 - je .LBB0_594 -.LBB0_997: # in Loop: Header=BB0_26 Depth=1 - mov rcx, qword ptr [rsp + 264] # 8-byte Reload - vpinsrb xmm6, xmm4, byte ptr [rdi + rcx], 4 - vpblendd ymm4, ymm4, ymm6, 15 # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7] - vpextrb ecx, xmm3, 5 - test cl, 1 - jne .LBB0_998 -.LBB0_595: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm3, 6 - test cl, 1 - je .LBB0_596 -.LBB0_999: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm6, xmm4, byte ptr [rdi + rax], 6 - vpblendd ymm4, ymm4, ymm6, 15 # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7] - vpextrb ecx, xmm3, 7 - test cl, 1 - jne .LBB0_1000 -.LBB0_597: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm3, 8 - test cl, 1 - je .LBB0_598 -.LBB0_1001: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm6, xmm4, byte ptr [rdi + rsi], 8 - vpblendd ymm4, ymm4, ymm6, 15 # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7] - mov rdx, qword ptr [rsp + 88] # 8-byte Reload - vpextrb ecx, xmm3, 9 - test cl, 1 - jne .LBB0_599 - jmp .LBB0_600 - .p2align 4, 0x90 -.LBB0_601: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm3, 11 - test cl, 1 - je .LBB0_602 -.LBB0_1003: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 256] # 8-byte Reload - vpinsrb xmm6, xmm4, byte ptr [rdi + rax], 11 - vpblendd ymm4, ymm4, ymm6, 15 # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7] - vpextrb ecx, xmm3, 12 - test cl, 1 - jne .LBB0_1004 -.LBB0_603: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm3, 13 - test cl, 1 - je .LBB0_605 -.LBB0_604: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm6, xmm4, byte ptr [rdi + rdx], 13 - vpblendd ymm4, ymm4, ymm6, 15 # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7] -.LBB0_605: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 80] # 8-byte Reload - mov rdx, qword ptr [rsp + 64] # 8-byte Reload - vpextrb ecx, xmm3, 14 - test cl, 1 - je .LBB0_607 -# %bb.606: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm6, xmm4, byte ptr [rdi + rax], 14 - vpblendd ymm4, ymm4, ymm6, 15 # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7] -.LBB0_607: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm3, 15 - test cl, 1 - je .LBB0_609 -# %bb.608: # in Loop: Header=BB0_26 Depth=1 - vpinsrb xmm6, xmm4, byte ptr [rdi + r10], 15 - vpblendd ymm4, ymm4, ymm6, 15 # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7] -.LBB0_609: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm6, ymm3, 1 - vmovd eax, xmm6 - mov dword ptr [rsp + 512], eax # 4-byte Spill - test al, 1 - je .LBB0_611 -# %bb.610: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm4, 1 - vpinsrb xmm1, xmm1, byte ptr [rdi + rdx], 0 - vinserti128 ymm4, ymm4, xmm1, 1 -.LBB0_611: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 56] # 8-byte Reload - vpextrb ecx, xmm6, 1 - mov dword ptr [rsp + 480], ecx # 4-byte Spill - test cl, 1 - je .LBB0_612 -# %bb.1005: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm4, 1 - vpinsrb xmm1, xmm1, byte ptr [rdi + rsi], 1 - vinserti128 ymm4, ymm4, xmm1, 1 - vpextrb ecx, xmm6, 2 - mov dword ptr [rsp + 448], ecx # 4-byte Spill - test cl, 1 - jne .LBB0_1006 -.LBB0_613: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 3 - mov dword ptr [rsp + 416], ecx # 4-byte Spill - test cl, 1 - je .LBB0_614 -.LBB0_1007: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm4, 1 - vpinsrb xmm1, xmm1, byte ptr [rdi + r9], 3 - vinserti128 ymm4, ymm4, xmm1, 1 - vpextrb ecx, xmm6, 4 - mov dword ptr [rsp + 384], ecx # 4-byte Spill - test cl, 1 - jne .LBB0_1008 -.LBB0_615: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm6, 5 - mov dword ptr [rsp + 352], eax # 4-byte Spill - test al, 1 - je .LBB0_617 -.LBB0_616: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm4, 1 - vpinsrb xmm1, xmm1, byte ptr [rdi + r13], 5 - vinserti128 ymm4, ymm4, xmm1, 1 -.LBB0_617: # in Loop: Header=BB0_26 Depth=1 - mov rax, qword ptr [rsp + 112] # 8-byte Reload - mov rbx, qword ptr [rsp + 184] # 8-byte Reload - mov rdx, qword ptr [rsp + 176] # 8-byte Reload - vpextrb ecx, xmm6, 6 - mov dword ptr [rsp + 320], ecx # 4-byte Spill - test cl, 1 - je .LBB0_618 -# %bb.1009: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm4, 1 - vpinsrb xmm1, xmm1, byte ptr [rdi + rax], 6 - vinserti128 ymm4, ymm4, xmm1, 1 - vpextrb eax, xmm6, 7 - mov dword ptr [rsp + 152], eax # 4-byte Spill - test al, 1 - jne .LBB0_1010 -.LBB0_619: # in Loop: Header=BB0_26 Depth=1 - vpextrb r9d, xmm6, 8 - test r9b, 1 - je .LBB0_620 -.LBB0_1011: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm4, 1 - mov rax, qword ptr [rsp + 216] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rax], 8 - vinserti128 ymm4, ymm4, xmm1, 1 - vpextrb ecx, xmm6, 9 - test cl, 1 - jne .LBB0_1012 -.LBB0_621: # in Loop: Header=BB0_26 Depth=1 - vpextrb r11d, xmm6, 10 - test r11b, 1 - je .LBB0_622 -.LBB0_1013: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm4, 1 - mov rax, qword ptr [rsp + 200] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rax], 10 - vinserti128 ymm4, ymm4, xmm1, 1 - vpextrb eax, xmm6, 11 - test al, 1 - jne .LBB0_1014 -.LBB0_623: # in Loop: Header=BB0_26 Depth=1 - vpextrb esi, xmm6, 12 - test sil, 1 - je .LBB0_624 -.LBB0_1015: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm4, 1 - vpinsrb xmm1, xmm1, byte ptr [rdi + rbx], 12 - vinserti128 ymm4, ymm4, xmm1, 1 - vpextrb r10d, xmm6, 13 - test r10b, 1 - jne .LBB0_1016 -.LBB0_625: # in Loop: Header=BB0_26 Depth=1 - mov rdx, qword ptr [rsp + 168] # 8-byte Reload - vpextrb r13d, xmm6, 14 - test r13b, 1 - je .LBB0_626 -.LBB0_1017: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm4, 1 - vpinsrb xmm1, xmm1, byte ptr [rdi + rdx], 14 - vinserti128 ymm4, ymm4, xmm1, 1 - mov rdx, qword ptr [rsp + 160] # 8-byte Reload - vpextrb r14d, xmm6, 15 - test r14b, 1 - jne .LBB0_627 - jmp .LBB0_628 - .p2align 4, 0x90 -.LBB0_612: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 2 - mov dword ptr [rsp + 448], ecx # 4-byte Spill - test cl, 1 - je .LBB0_613 -.LBB0_1006: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm4, 1 - vpinsrb xmm1, xmm1, byte ptr [rdi + rbx], 2 - vinserti128 ymm4, ymm4, xmm1, 1 - vpextrb ecx, xmm6, 3 - mov dword ptr [rsp + 416], ecx # 4-byte Spill - test cl, 1 - jne .LBB0_1007 -.LBB0_614: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 4 - mov dword ptr [rsp + 384], ecx # 4-byte Spill - test cl, 1 - je .LBB0_615 -.LBB0_1008: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm4, 1 - vpinsrb xmm1, xmm1, byte ptr [rdi + rax], 4 - vinserti128 ymm4, ymm4, xmm1, 1 - vpextrb eax, xmm6, 5 - mov dword ptr [rsp + 352], eax # 4-byte Spill - test al, 1 - jne .LBB0_616 - jmp .LBB0_617 - .p2align 4, 0x90 -.LBB0_618: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm6, 7 - mov dword ptr [rsp + 152], eax # 4-byte Spill - test al, 1 - je .LBB0_619 -.LBB0_1010: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm4, 1 - mov rax, qword ptr [rsp + 240] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rax], 7 - vinserti128 ymm4, ymm4, xmm1, 1 - vpextrb r9d, xmm6, 8 - test r9b, 1 - jne .LBB0_1011 -.LBB0_620: # in Loop: Header=BB0_26 Depth=1 - vpextrb ecx, xmm6, 9 - test cl, 1 - je .LBB0_621 -.LBB0_1012: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm4, 1 - mov rax, qword ptr [rsp + 208] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rax], 9 - vinserti128 ymm4, ymm4, xmm1, 1 - vpextrb r11d, xmm6, 10 - test r11b, 1 - jne .LBB0_1013 -.LBB0_622: # in Loop: Header=BB0_26 Depth=1 - vpextrb eax, xmm6, 11 - test al, 1 - je .LBB0_623 -.LBB0_1014: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm4, 1 - mov rsi, qword ptr [rsp + 192] # 8-byte Reload - vpinsrb xmm1, xmm1, byte ptr [rdi + rsi], 11 - vinserti128 ymm4, ymm4, xmm1, 1 - vpextrb esi, xmm6, 12 - test sil, 1 - jne .LBB0_1015 -.LBB0_624: # in Loop: Header=BB0_26 Depth=1 - vpextrb r10d, xmm6, 13 - test r10b, 1 - je .LBB0_625 -.LBB0_1016: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm4, 1 - vpinsrb xmm1, xmm1, byte ptr [rdi + rdx], 13 - vinserti128 ymm4, ymm4, xmm1, 1 - mov rdx, qword ptr [rsp + 168] # 8-byte Reload - vpextrb r13d, xmm6, 14 - test r13b, 1 - jne .LBB0_1017 -.LBB0_626: # in Loop: Header=BB0_26 Depth=1 - mov rdx, qword ptr [rsp + 160] # 8-byte Reload - vpextrb r14d, xmm6, 15 - test r14b, 1 - je .LBB0_628 -.LBB0_627: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm4, 1 - vpinsrb xmm1, xmm1, byte ptr [rdi + rdx], 15 - vinserti128 ymm4, ymm4, xmm1, 1 -.LBB0_628: # in Loop: Header=BB0_26 Depth=1 - vpsrlw ymm1, ymm4, 7 - vpand ymm4, ymm1, ymmword ptr [rip + .LCPI0_4] - vmovd r15d, xmm3 - test r15b, 1 - je .LBB0_629 -# %bb.1018: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm11 - vpextrb byte ptr [r8 + rbx], xmm4, 0 - vpextrb ebx, xmm3, 1 - test bl, 1 - jne .LBB0_1019 -.LBB0_630: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm3, 2 - test bl, 1 - je .LBB0_631 -.LBB0_1020: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm11, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm4, 2 - vpextrb ebx, xmm3, 3 - test bl, 1 - jne .LBB0_1021 -.LBB0_632: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm3, 4 - test bl, 1 - je .LBB0_633 -.LBB0_1022: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm10 - vpextrb byte ptr [r8 + rbx], xmm4, 4 - vpextrb ebx, xmm3, 5 - test bl, 1 - jne .LBB0_1023 -.LBB0_634: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm3, 6 - test bl, 1 - je .LBB0_635 -.LBB0_1024: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm10, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm4, 6 - vpextrb ebx, xmm3, 7 - test bl, 1 - jne .LBB0_1025 -.LBB0_636: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm3, 8 - test bl, 1 - je .LBB0_637 -.LBB0_1026: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm9 - vpextrb byte ptr [r8 + rbx], xmm4, 8 - vpextrb ebx, xmm3, 9 - test bl, 1 - jne .LBB0_1027 -.LBB0_638: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm3, 10 - test bl, 1 - je .LBB0_639 -.LBB0_1028: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm9, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm4, 10 - vpextrb ebx, xmm3, 11 - test bl, 1 - jne .LBB0_1029 -.LBB0_640: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm3, 12 - test bl, 1 - je .LBB0_641 -.LBB0_1030: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm5 - vpextrb byte ptr [r8 + rbx], xmm4, 12 - vpextrb ebx, xmm3, 13 - test bl, 1 - vmovdqa ymm9, ymmword ptr [rsp + 896] # 32-byte Reload - jne .LBB0_1031 -.LBB0_642: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm3, 14 - test bl, 1 - je .LBB0_643 -.LBB0_1032: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm5, 1 - vmovq rbx, xmm1 - vpextrb byte ptr [r8 + rbx], xmm4, 14 - vpextrb ebx, xmm3, 15 - test bl, 1 - jne .LBB0_1033 -.LBB0_644: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 512], 1 # 1-byte Folded Reload - vmovdqa ymm3, ymmword ptr [rsp + 832] # 32-byte Reload - je .LBB0_645 -.LBB0_1034: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm8 - vextracti128 xmm1, ymm4, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 0 - test byte ptr [rsp + 480], 1 # 1-byte Folded Reload - jne .LBB0_1035 -.LBB0_646: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 448], 1 # 1-byte Folded Reload - je .LBB0_647 -.LBB0_1036: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm8, 1 - vmovq rbx, xmm1 - vextracti128 xmm1, ymm4, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 2 - test byte ptr [rsp + 416], 1 # 1-byte Folded Reload - jne .LBB0_1037 -.LBB0_648: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 384], 1 # 1-byte Folded Reload - je .LBB0_649 -.LBB0_1038: # in Loop: Header=BB0_26 Depth=1 - vmovq rbx, xmm7 - vextracti128 xmm1, ymm4, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 4 - test byte ptr [rsp + 352], 1 # 1-byte Folded Reload - vmovdqa ymm8, ymmword ptr [rsp + 864] # 32-byte Reload - jne .LBB0_1039 -.LBB0_650: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 320], 1 # 1-byte Folded Reload - je .LBB0_651 -.LBB0_1040: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm7, 1 - vmovq rbx, xmm1 - vextracti128 xmm1, ymm4, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 6 - test byte ptr [rsp + 152], 1 # 1-byte Folded Reload - jne .LBB0_1041 -.LBB0_652: # in Loop: Header=BB0_26 Depth=1 - test r9b, 1 - mov r9d, dword ptr [rsp + 16] # 4-byte Reload - je .LBB0_653 -.LBB0_1042: # in Loop: Header=BB0_26 Depth=1 - vmovq rdx, xmm2 - vextracti128 xmm1, ymm4, 1 - vpextrb byte ptr [r8 + rdx], xmm1, 8 - test cl, 1 - jne .LBB0_1043 -.LBB0_654: # in Loop: Header=BB0_26 Depth=1 - test r11b, 1 - mov r11, qword ptr [rsp + 304] # 8-byte Reload - je .LBB0_655 -.LBB0_1044: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm2, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm4, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 10 - test al, 1 - jne .LBB0_1045 -.LBB0_656: # in Loop: Header=BB0_26 Depth=1 - test sil, 1 - je .LBB0_657 -.LBB0_1046: # in Loop: Header=BB0_26 Depth=1 - vmovq rcx, xmm15 - vextracti128 xmm1, ymm4, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 12 - test r10b, 1 - vmovdqa ymm2, ymmword ptr [rsp + 800] # 32-byte Reload - jne .LBB0_1047 -.LBB0_658: # in Loop: Header=BB0_26 Depth=1 - test r13b, 1 - mov r10, qword ptr [rsp + 48] # 8-byte Reload - je .LBB0_659 -.LBB0_1048: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm15, 1 - vmovq rcx, xmm1 - vextracti128 xmm1, ymm4, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 14 - test r14b, 1 - je .LBB0_25 - jmp .LBB0_1049 - .p2align 4, 0x90 -.LBB0_629: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm3, 1 - test bl, 1 - je .LBB0_630 -.LBB0_1019: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm11, 1 - vpextrb byte ptr [r8 + rbx], xmm4, 1 - vpextrb ebx, xmm3, 2 - test bl, 1 - jne .LBB0_1020 -.LBB0_631: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm3, 3 - test bl, 1 - je .LBB0_632 -.LBB0_1021: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm11, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm4, 3 - vpextrb ebx, xmm3, 4 - test bl, 1 - jne .LBB0_1022 -.LBB0_633: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm3, 5 - test bl, 1 - je .LBB0_634 -.LBB0_1023: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm10, 1 - vpextrb byte ptr [r8 + rbx], xmm4, 5 - vpextrb ebx, xmm3, 6 - test bl, 1 - jne .LBB0_1024 -.LBB0_635: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm3, 7 - test bl, 1 - je .LBB0_636 -.LBB0_1025: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm10, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm4, 7 - vpextrb ebx, xmm3, 8 - test bl, 1 - jne .LBB0_1026 -.LBB0_637: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm3, 9 - test bl, 1 - je .LBB0_638 -.LBB0_1027: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm9, 1 - vpextrb byte ptr [r8 + rbx], xmm4, 9 - vpextrb ebx, xmm3, 10 - test bl, 1 - jne .LBB0_1028 -.LBB0_639: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm3, 11 - test bl, 1 - je .LBB0_640 -.LBB0_1029: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm9, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm4, 11 - vpextrb ebx, xmm3, 12 - test bl, 1 - jne .LBB0_1030 -.LBB0_641: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm3, 13 - test bl, 1 - vmovdqa ymm9, ymmword ptr [rsp + 896] # 32-byte Reload - je .LBB0_642 -.LBB0_1031: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm5, 1 - vpextrb byte ptr [r8 + rbx], xmm4, 13 - vpextrb ebx, xmm3, 14 - test bl, 1 - jne .LBB0_1032 -.LBB0_643: # in Loop: Header=BB0_26 Depth=1 - vpextrb ebx, xmm3, 15 - test bl, 1 - je .LBB0_644 -.LBB0_1033: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm5, 1 - vpextrq rbx, xmm1, 1 - vpextrb byte ptr [r8 + rbx], xmm4, 15 - test byte ptr [rsp + 512], 1 # 1-byte Folded Reload - vmovdqa ymm3, ymmword ptr [rsp + 832] # 32-byte Reload - jne .LBB0_1034 -.LBB0_645: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 480], 1 # 1-byte Folded Reload - je .LBB0_646 -.LBB0_1035: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm8, 1 - vextracti128 xmm1, ymm4, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 1 - test byte ptr [rsp + 448], 1 # 1-byte Folded Reload - jne .LBB0_1036 -.LBB0_647: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 416], 1 # 1-byte Folded Reload - je .LBB0_648 -.LBB0_1037: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm8, 1 - vpextrq rbx, xmm1, 1 - vextracti128 xmm1, ymm4, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 3 - test byte ptr [rsp + 384], 1 # 1-byte Folded Reload - jne .LBB0_1038 -.LBB0_649: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 352], 1 # 1-byte Folded Reload - vmovdqa ymm8, ymmword ptr [rsp + 864] # 32-byte Reload - je .LBB0_650 -.LBB0_1039: # in Loop: Header=BB0_26 Depth=1 - vpextrq rbx, xmm7, 1 - vextracti128 xmm1, ymm4, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 5 - test byte ptr [rsp + 320], 1 # 1-byte Folded Reload - jne .LBB0_1040 -.LBB0_651: # in Loop: Header=BB0_26 Depth=1 - test byte ptr [rsp + 152], 1 # 1-byte Folded Reload - je .LBB0_652 -.LBB0_1041: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm7, 1 - vpextrq rbx, xmm1, 1 - vextracti128 xmm1, ymm4, 1 - vpextrb byte ptr [r8 + rbx], xmm1, 7 - test r9b, 1 - mov r9d, dword ptr [rsp + 16] # 4-byte Reload - jne .LBB0_1042 -.LBB0_653: # in Loop: Header=BB0_26 Depth=1 - test cl, 1 - je .LBB0_654 -.LBB0_1043: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm2, 1 - vextracti128 xmm1, ymm4, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 9 - test r11b, 1 - mov r11, qword ptr [rsp + 304] # 8-byte Reload - jne .LBB0_1044 -.LBB0_655: # in Loop: Header=BB0_26 Depth=1 - test al, 1 - je .LBB0_656 -.LBB0_1045: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm2, 1 - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm4, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 11 - test sil, 1 - jne .LBB0_1046 -.LBB0_657: # in Loop: Header=BB0_26 Depth=1 - test r10b, 1 - vmovdqa ymm2, ymmword ptr [rsp + 800] # 32-byte Reload - je .LBB0_658 -.LBB0_1047: # in Loop: Header=BB0_26 Depth=1 - vpextrq rcx, xmm15, 1 - vextracti128 xmm1, ymm4, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 13 - test r13b, 1 - mov r10, qword ptr [rsp + 48] # 8-byte Reload - jne .LBB0_1048 -.LBB0_659: # in Loop: Header=BB0_26 Depth=1 - test r14b, 1 - je .LBB0_25 -.LBB0_1049: # in Loop: Header=BB0_26 Depth=1 - vextracti128 xmm1, ymm15, 1 - vpextrq rcx, xmm1, 1 - vextracti128 xmm1, ymm4, 1 - vpextrb byte ptr [r8 + rcx], xmm1, 15 - jmp .LBB0_25 -.LBB0_1050: - cmp r12, r10 - jne .LBB0_1055 -.LBB0_1051: - lea rsp, [rbp - 40] - pop rbx - pop r12 - pop r13 - pop r14 - pop r15 + mov r8d, esi + shl r8, 3 + xor r10d, r10d + jmp .LBB0_2 + .p2align 4, 0x90 +.LBB0_4: # in Loop: Header=BB0_2 Depth=1 + add r10, 8 + add rdi, 1 + cmp r8, r10 + je .LBB0_5 +.LBB0_2: # =>This Inner Loop Header: Depth=1 + cmp r10d, ecx + jge .LBB0_4 +# %bb.3: # in Loop: Header=BB0_2 Depth=1 + mov r9d, r10d + movzx eax, byte ptr [rdi] + and al, 1 + mov byte ptr [rdx + r9], al + mov rsi, r9 + or rsi, 1 + cmp esi, ecx + jge .LBB0_4 +# %bb.6: # in Loop: Header=BB0_2 Depth=1 + movzx eax, byte ptr [rdi] + shr al + and al, 1 + mov byte ptr [rdx + rsi], al + mov rsi, r9 + or rsi, 2 + cmp esi, ecx + jge .LBB0_4 +# %bb.7: # in Loop: Header=BB0_2 Depth=1 + movzx eax, byte ptr [rdi] + shr al, 2 + and al, 1 + mov byte ptr [rdx + rsi], al + mov rsi, r9 + or rsi, 3 + cmp esi, ecx + jge .LBB0_4 +# %bb.8: # in Loop: Header=BB0_2 Depth=1 + movzx eax, byte ptr [rdi] + shr al, 3 + and al, 1 + mov byte ptr [rdx + rsi], al + mov rsi, r9 + or rsi, 4 + cmp esi, ecx + jge .LBB0_4 +# %bb.9: # in Loop: Header=BB0_2 Depth=1 + movzx eax, byte ptr [rdi] + shr al, 4 + and al, 1 + mov byte ptr [rdx + rsi], al + mov rsi, r9 + or rsi, 5 + cmp esi, ecx + jge .LBB0_4 +# %bb.10: # in Loop: Header=BB0_2 Depth=1 + movzx eax, byte ptr [rdi] + shr al, 5 + and al, 1 + mov byte ptr [rdx + rsi], al + mov rsi, r9 + or rsi, 6 + cmp esi, ecx + jge .LBB0_4 +# %bb.11: # in Loop: Header=BB0_2 Depth=1 + movzx eax, byte ptr [rdi] + shr al, 6 + and al, 1 + mov byte ptr [rdx + rsi], al + or r9, 7 + cmp r9d, ecx + jge .LBB0_4 +# %bb.12: # in Loop: Header=BB0_2 Depth=1 + movzx eax, byte ptr [rdi] + shr al, 7 + mov byte ptr [rdx + r9], al + jmp .LBB0_4 +.LBB0_5: + mov rsp, rbp pop rbp - vzeroupper ret -.LBB0_1052: - mov r9d, dword ptr [rsp + 16] # 4-byte Reload - mov r10, qword ptr [rsp + 48] # 8-byte Reload - jmp .LBB0_1055 -.LBB0_1054: - mov r9d, dword ptr [rsp + 16] # 4-byte Reload - jmp .LBB0_1055 .Lfunc_end0: .size bytes_to_bools_avx2, .Lfunc_end0-bytes_to_bools_avx2 # -- End function - .ident "Ubuntu clang version 11.1.0-++20210204121720+1fdec59bffc1-1~exp1~20210203232336.162" + .ident "Debian clang version 11.1.0-++20210428103820+1fdec59bffc1-1~exp1~20210428204437.162" .section ".note.GNU-stack","",@progbits .addrsig diff --git a/go/parquet/internal/utils/_lib/unpack_bool_sse4.s b/go/parquet/internal/utils/_lib/unpack_bool_sse4.s index 18caa0473df..6719771b865 100644 --- a/go/parquet/internal/utils/_lib/unpack_bool_sse4.s +++ b/go/parquet/internal/utils/_lib/unpack_bool_sse4.s @@ -99,6 +99,6 @@ bytes_to_bools_sse4: # @bytes_to_bools_sse4 .Lfunc_end0: .size bytes_to_bools_sse4, .Lfunc_end0-bytes_to_bools_sse4 # -- End function - .ident "Ubuntu clang version 11.1.0-++20210204121720+1fdec59bffc1-1~exp1~20210203232336.162" + .ident "Debian clang version 11.1.0-++20210428103820+1fdec59bffc1-1~exp1~20210428204437.162" .section ".note.GNU-stack","",@progbits .addrsig diff --git a/go/parquet/internal/utils/bitmap_writer.go b/go/parquet/internal/utils/bitmap_writer.go index eed9f867554..f7c1f7a57cd 100644 --- a/go/parquet/internal/utils/bitmap_writer.go +++ b/go/parquet/internal/utils/bitmap_writer.go @@ -96,6 +96,9 @@ type BitmapWriter interface { Finish() // AppendWord takes nbits from word which should be an LSB bitmap and appends them to the bitmap. AppendWord(word uint64, nbits int64) + // AppendBools appends the bit representation of the bools slice, returning the number + // of bools that were able to fit in the remaining length of the bitmapwriter. + AppendBools(in []bool) int // Pos is the current position that will be written next Pos() int64 // Reset allows reusing the bitmapwriter by resetting Pos to start with length as @@ -140,7 +143,7 @@ func (b *bitmapWriter) Reset(start, length int64) { func (b *bitmapWriter) Pos() int64 { return b.pos } func (b *bitmapWriter) Set() { b.curByte |= b.bitMask } -func (b *bitmapWriter) Clear() { b.curByte &= b.bitMask ^ 0xFF } +func (b *bitmapWriter) Clear() { b.curByte &= ^b.bitMask } func (b *bitmapWriter) Next() { b.bitMask = b.bitMask << 1 @@ -155,6 +158,30 @@ func (b *bitmapWriter) Next() { } } +func (b *bitmapWriter) AppendBools(in []bool) int { + space := Min(bitutil.BytesForBits(b.length-b.pos), int64(len(in))) + + // location that the first byte needs to be written to for appending + appslice := b.buf[int(b.byteOffset):] + // update everything but curByte + bitOffset := bits.TrailingZeros32(uint32(b.bitMask)) + appslice[0] = b.curByte + for i, b := range in[:space] { + if b { + bitutil.SetBit(appslice, i) + } else { + bitutil.ClearBit(appslice, i) + } + } + + b.pos += space + b.bitMask = bitutil.BitMask[(int64(bitOffset)+space)%8] + b.byteOffset += (int64(bitOffset) + space) / 8 + b.curByte = appslice[len(appslice)-1] + + return int(space) +} + func (b *bitmapWriter) Finish() { if b.length > 0 && (b.bitMask != 0x01 || b.pos < b.length) { b.buf[int(b.byteOffset)] = b.curByte @@ -267,6 +294,10 @@ func (bw *firstTimeBitmapWriter) Next() { } } +func (b *firstTimeBitmapWriter) AppendBools(in []bool) int { + panic("Append Bools not yet implemented for firstTimeBitmapWriter") +} + func (bw *firstTimeBitmapWriter) Finish() { // store curByte into the bitmap if bw.length > 0 && bw.bitMask != 0x01 || bw.pos < bw.length { diff --git a/go/parquet/internal/utils/min_max_avx2_amd64.s b/go/parquet/internal/utils/min_max_avx2_amd64.s index 6a1bb18fde6..a54758ba1ed 100644 --- a/go/parquet/internal/utils/min_max_avx2_amd64.s +++ b/go/parquet/internal/utils/min_max_avx2_amd64.s @@ -4,364 +4,188 @@ DATA LCDATA1<>+0x000(SB)/8, $0x7fffffff80000000 GLOBL LCDATA1<>(SB), 8, $8 -TEXT ·_int32_max_min_avx2(SB), $72-32 +TEXT ·_int32_max_min_avx2(SB), $0-32 MOVQ values+0(FP), DI MOVQ length+8(FP), SI MOVQ minout+16(FP), DX MOVQ maxout+24(FP), CX - ADDQ $8, SP LEAQ LCDATA1<>(SB), BP WORD $0xf685 // test esi, esi JLE LBB0_1 WORD $0x8941; BYTE $0xf0 // mov r8d, esi WORD $0xfe83; BYTE $0x1f // cmp esi, 31 - JA LBB0_6 - LONG $0x000000b8; BYTE $0x80 // mov eax, -2147483648 - LONG $0xffffb941; WORD $0x7fff // mov r9d, 2147483647 - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - JMP LBB0_4 + JA LBB0_4 + LONG $0x0000ba41; WORD $0x8000 // mov r10d, -2147483648 + LONG $0xffffffb8; BYTE $0x7f // mov eax, 2147483647 + WORD $0x3145; BYTE $0xc9 // xor r9d, r9d + JMP LBB0_7 LBB0_1: - LONG $0xffffb941; WORD $0x7fff // mov r9d, 2147483647 - LONG $0x000000b8; BYTE $0x80 // mov eax, -2147483648 - JMP LBB0_14 + LONG $0xffffffb8; BYTE $0x7f // mov eax, 2147483647 + LONG $0x000000be; BYTE $0x80 // mov esi, -2147483648 + JMP LBB0_8 -LBB0_6: - WORD $0x8945; BYTE $0xc3 // mov r11d, r8d - LONG $0xe0e38341 // and r11d, -32 - LONG $0xe0438d49 // lea rax, [r11 - 32] - WORD $0x8949; BYTE $0xc2 // mov r10, rax - LONG $0x05eac149 // shr r10, 5 - LONG $0x01c28349 // add r10, 1 - WORD $0x8945; BYTE $0xd1 // mov r9d, r10d - LONG $0x03e18341 // and r9d, 3 - LONG $0x60f88348 // cmp rax, 96 - JAE LBB0_8 - LONG $0x587de2c4; WORD $0x0045 // vpbroadcastd ymm0, dword 0[rbp] /* [rip + .LCPI0_0] */ - LONG $0x587de2c4; WORD $0x044d // vpbroadcastd ymm1, dword 4[rbp] /* [rip + .LCPI0_1] */ - WORD $0xc031 // xor eax, eax - LONG $0xd16ffdc5 // vmovdqa ymm2, ymm1 - LONG $0xe16ffdc5 // vmovdqa ymm4, ymm1 - LONG $0xf16ffdc5 // vmovdqa ymm6, ymm1 - LONG $0xd86ffdc5 // vmovdqa ymm3, ymm0 - LONG $0xe86ffdc5 // vmovdqa ymm5, ymm0 - LONG $0xf86ffdc5 // vmovdqa ymm7, ymm0 - JMP LBB0_10 - -LBB0_8: - LONG $0xfce28349 // and r10, -4 - LONG $0x587de2c4; WORD $0x0045 // vpbroadcastd ymm0, dword 0[rbp] /* [rip + .LCPI0_0] */ - WORD $0xf749; BYTE $0xda // neg r10 - LONG $0x587de2c4; WORD $0x044d // vpbroadcastd ymm1, dword 4[rbp] /* [rip + .LCPI0_1] */ +LBB0_4: + WORD $0x8945; BYTE $0xc1 // mov r9d, r8d + LONG $0x587de2c4; WORD $0x0065 // vpbroadcastd ymm4, dword 0[rbp] /* [rip + .LCPI0_0] */ + LONG $0xe0e18341 // and r9d, -32 + LONG $0x587de2c4; WORD $0x0445 // vpbroadcastd ymm0, dword 4[rbp] /* [rip + .LCPI0_1] */ WORD $0xc031 // xor eax, eax - LONG $0xd16ffdc5 // vmovdqa ymm2, ymm1 - LONG $0xe16ffdc5 // vmovdqa ymm4, ymm1 - LONG $0xf16ffdc5 // vmovdqa ymm6, ymm1 + LONG $0xc86ffdc5 // vmovdqa ymm1, ymm0 + LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0 LONG $0xd86ffdc5 // vmovdqa ymm3, ymm0 - LONG $0xe86ffdc5 // vmovdqa ymm5, ymm0 - LONG $0xf86ffdc5 // vmovdqa ymm7, ymm0 + LONG $0xec6ffdc5 // vmovdqa ymm5, ymm4 + LONG $0xf46ffdc5 // vmovdqa ymm6, ymm4 + LONG $0xfc6ffdc5 // vmovdqa ymm7, ymm4 -LBB0_9: - LONG $0x046f7ec5; BYTE $0x87 // vmovdqu ymm8, yword [rdi + 4*rax] - LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu ymm9, yword [rdi + 4*rax + 32] - LONG $0x546f7ec5; WORD $0x4087 // vmovdqu ymm10, yword [rdi + 4*rax + 64] - LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu ymm11, yword [rdi + 4*rax + 96] - LONG $0x394dc2c4; BYTE $0xf3 // vpminsd ymm6, ymm6, ymm11 - LONG $0x395dc2c4; BYTE $0xe2 // vpminsd ymm4, ymm4, ymm10 - LONG $0x3975c2c4; BYTE $0xc8 // vpminsd ymm1, ymm1, ymm8 - LONG $0x396dc2c4; BYTE $0xd1 // vpminsd ymm2, ymm2, ymm9 - LONG $0x3d45c2c4; BYTE $0xfb // vpmaxsd ymm7, ymm7, ymm11 - LONG $0x3d55c2c4; BYTE $0xea // vpmaxsd ymm5, ymm5, ymm10 - LONG $0x3d7dc2c4; BYTE $0xc0 // vpmaxsd ymm0, ymm0, ymm8 - LONG $0x3d65c2c4; BYTE $0xd9 // vpmaxsd ymm3, ymm3, ymm9 - QUAD $0x0000e087846f7ec5; BYTE $0x00 // vmovdqu ymm8, yword [rdi + 4*rax + 224] - QUAD $0x0000c0878c6f7ec5; BYTE $0x00 // vmovdqu ymm9, yword [rdi + 4*rax + 192] - QUAD $0x00008087946f7ec5; BYTE $0x00 // vmovdqu ymm10, yword [rdi + 4*rax + 128] - QUAD $0x0000a0879c6f7ec5; BYTE $0x00 // vmovdqu ymm11, yword [rdi + 4*rax + 160] - QUAD $0x00010087a46f7ec5; BYTE $0x00 // vmovdqu ymm12, yword [rdi + 4*rax + 256] - QUAD $0x00014087ac6f7ec5; BYTE $0x00 // vmovdqu ymm13, yword [rdi + 4*rax + 320] - QUAD $0x00016087b46f7ec5; BYTE $0x00 // vmovdqu ymm14, yword [rdi + 4*rax + 352] - LONG $0x393d42c4; BYTE $0xfe // vpminsd ymm15, ymm8, ymm14 - LONG $0x394dc2c4; BYTE $0xf7 // vpminsd ymm6, ymm6, ymm15 - LONG $0x347ffdc5; BYTE $0x24 // vmovdqa yword [rsp], ymm6 - LONG $0x393542c4; BYTE $0xfd // vpminsd ymm15, ymm9, ymm13 - LONG $0x395dc2c4; BYTE $0xe7 // vpminsd ymm4, ymm4, ymm15 - LONG $0x392d42c4; BYTE $0xfc // vpminsd ymm15, ymm10, ymm12 - LONG $0x3975c2c4; BYTE $0xcf // vpminsd ymm1, ymm1, ymm15 - QUAD $0x00012087bc6f7ec5; BYTE $0x00 // vmovdqu ymm15, yword [rdi + 4*rax + 288] - LONG $0x3925c2c4; BYTE $0xf7 // vpminsd ymm6, ymm11, ymm15 - LONG $0x396de2c4; BYTE $0xd6 // vpminsd ymm2, ymm2, ymm6 - LONG $0x3d3dc2c4; BYTE $0xf6 // vpmaxsd ymm6, ymm8, ymm14 - LONG $0x3d45e2c4; BYTE $0xfe // vpmaxsd ymm7, ymm7, ymm6 - LONG $0x3d35c2c4; BYTE $0xf5 // vpmaxsd ymm6, ymm9, ymm13 - LONG $0x3d55e2c4; BYTE $0xee // vpmaxsd ymm5, ymm5, ymm6 - LONG $0x3d2dc2c4; BYTE $0xf4 // vpmaxsd ymm6, ymm10, ymm12 - LONG $0x3d7de2c4; BYTE $0xc6 // vpmaxsd ymm0, ymm0, ymm6 - LONG $0x3d25c2c4; BYTE $0xf7 // vpmaxsd ymm6, ymm11, ymm15 - LONG $0x3d65e2c4; BYTE $0xde // vpmaxsd ymm3, ymm3, ymm6 - QUAD $0x0001a087b46ffec5; BYTE $0x00 // vmovdqu ymm6, yword [rdi + 4*rax + 416] - LONG $0x396de2c4; BYTE $0xd6 // vpminsd ymm2, ymm2, ymm6 - LONG $0x3d65e2c4; BYTE $0xde // vpmaxsd ymm3, ymm3, ymm6 - QUAD $0x00018087b46ffec5; BYTE $0x00 // vmovdqu ymm6, yword [rdi + 4*rax + 384] - LONG $0x3975e2c4; BYTE $0xce // vpminsd ymm1, ymm1, ymm6 - LONG $0x3d7de2c4; BYTE $0xc6 // vpmaxsd ymm0, ymm0, ymm6 - QUAD $0x0001c087b46ffec5; BYTE $0x00 // vmovdqu ymm6, yword [rdi + 4*rax + 448] - LONG $0x395de2c4; BYTE $0xe6 // vpminsd ymm4, ymm4, ymm6 - LONG $0x3d55e2c4; BYTE $0xee // vpmaxsd ymm5, ymm5, ymm6 - QUAD $0x0001e087846f7ec5; BYTE $0x00 // vmovdqu ymm8, yword [rdi + 4*rax + 480] - LONG $0x393de2c4; WORD $0x2434 // vpminsd ymm6, ymm8, yword [rsp] - LONG $0x3d45c2c4; BYTE $0xf8 // vpmaxsd ymm7, ymm7, ymm8 - LONG $0x80e88348 // sub rax, -128 - LONG $0x04c28349 // add r10, 4 - JNE LBB0_9 - -LBB0_10: - WORD $0x854d; BYTE $0xc9 // test r9, r9 - JE LBB0_13 - LONG $0x87048d48 // lea rax, [rdi + 4*rax] - WORD $0xf749; BYTE $0xd9 // neg r9 - -LBB0_12: - LONG $0x006f7ec5 // vmovdqu ymm8, yword [rax] - LONG $0x486f7ec5; BYTE $0x20 // vmovdqu ymm9, yword [rax + 32] - LONG $0x506f7ec5; BYTE $0x40 // vmovdqu ymm10, yword [rax + 64] - LONG $0x586f7ec5; BYTE $0x60 // vmovdqu ymm11, yword [rax + 96] - LONG $0x396dc2c4; BYTE $0xd1 // vpminsd ymm2, ymm2, ymm9 - LONG $0x3975c2c4; BYTE $0xc8 // vpminsd ymm1, ymm1, ymm8 - LONG $0x395dc2c4; BYTE $0xe2 // vpminsd ymm4, ymm4, ymm10 - LONG $0x394dc2c4; BYTE $0xf3 // vpminsd ymm6, ymm6, ymm11 - LONG $0x3d65c2c4; BYTE $0xd9 // vpmaxsd ymm3, ymm3, ymm9 - LONG $0x3d7dc2c4; BYTE $0xc0 // vpmaxsd ymm0, ymm0, ymm8 - LONG $0x3d55c2c4; BYTE $0xea // vpmaxsd ymm5, ymm5, ymm10 - LONG $0x3d45c2c4; BYTE $0xfb // vpmaxsd ymm7, ymm7, ymm11 - LONG $0x80e88348 // sub rax, -128 - WORD $0xff49; BYTE $0xc1 // inc r9 - JNE LBB0_12 - -LBB0_13: - LONG $0x396de2c4; BYTE $0xd6 // vpminsd ymm2, ymm2, ymm6 - LONG $0x3975e2c4; BYTE $0xcc // vpminsd ymm1, ymm1, ymm4 - LONG $0x3975e2c4; BYTE $0xca // vpminsd ymm1, ymm1, ymm2 - LONG $0x3d65e2c4; BYTE $0xd7 // vpmaxsd ymm2, ymm3, ymm7 - LONG $0x3d7de2c4; BYTE $0xc5 // vpmaxsd ymm0, ymm0, ymm5 - LONG $0x3d7de2c4; BYTE $0xc2 // vpmaxsd ymm0, ymm0, ymm2 - LONG $0x397de3c4; WORD $0x01c2 // vextracti128 xmm2, ymm0, 1 - LONG $0x3d79e2c4; BYTE $0xc2 // vpmaxsd xmm0, xmm0, xmm2 - LONG $0xd070f9c5; BYTE $0x4e // vpshufd xmm2, xmm0, 78 - LONG $0x3d79e2c4; BYTE $0xc2 // vpmaxsd xmm0, xmm0, xmm2 - LONG $0xd070f9c5; BYTE $0xe5 // vpshufd xmm2, xmm0, 229 - LONG $0x3d79e2c4; BYTE $0xc2 // vpmaxsd xmm0, xmm0, xmm2 - LONG $0xc07ef9c5 // vmovd eax, xmm0 - LONG $0x397de3c4; WORD $0x01c8 // vextracti128 xmm0, ymm1, 1 - LONG $0x3971e2c4; BYTE $0xc0 // vpminsd xmm0, xmm1, xmm0 +LBB0_5: + LONG $0x046f7ec5; BYTE $0x87 // vmovdqu ymm8, yword [rdi + 4*rax] + LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu ymm9, yword [rdi + 4*rax + 32] + LONG $0x546f7ec5; WORD $0x4087 // vmovdqu ymm10, yword [rdi + 4*rax + 64] + LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu ymm11, yword [rdi + 4*rax + 96] + LONG $0x397dc2c4; BYTE $0xc0 // vpminsd ymm0, ymm0, ymm8 + LONG $0x3975c2c4; BYTE $0xc9 // vpminsd ymm1, ymm1, ymm9 + LONG $0x396dc2c4; BYTE $0xd2 // vpminsd ymm2, ymm2, ymm10 + LONG $0x3965c2c4; BYTE $0xdb // vpminsd ymm3, ymm3, ymm11 + LONG $0x3d5dc2c4; BYTE $0xe0 // vpmaxsd ymm4, ymm4, ymm8 + LONG $0x3d55c2c4; BYTE $0xe9 // vpmaxsd ymm5, ymm5, ymm9 + LONG $0x3d4dc2c4; BYTE $0xf2 // vpmaxsd ymm6, ymm6, ymm10 + LONG $0x3d45c2c4; BYTE $0xfb // vpmaxsd ymm7, ymm7, ymm11 + LONG $0x20c08348 // add rax, 32 + WORD $0x3949; BYTE $0xc1 // cmp r9, rax + JNE LBB0_5 + LONG $0x3d5de2c4; BYTE $0xe5 // vpmaxsd ymm4, ymm4, ymm5 + LONG $0x3d5de2c4; BYTE $0xe6 // vpmaxsd ymm4, ymm4, ymm6 + LONG $0x3d5de2c4; BYTE $0xe7 // vpmaxsd ymm4, ymm4, ymm7 + LONG $0x397de3c4; WORD $0x01e5 // vextracti128 xmm5, ymm4, 1 + LONG $0x3d59e2c4; BYTE $0xe5 // vpmaxsd xmm4, xmm4, xmm5 + LONG $0xec70f9c5; BYTE $0x4e // vpshufd xmm5, xmm4, 78 + LONG $0x3d59e2c4; BYTE $0xe5 // vpmaxsd xmm4, xmm4, xmm5 + LONG $0xec70f9c5; BYTE $0xe5 // vpshufd xmm5, xmm4, 229 + LONG $0x3d59e2c4; BYTE $0xe5 // vpmaxsd xmm4, xmm4, xmm5 + LONG $0x7e79c1c4; BYTE $0xe2 // vmovd r10d, xmm4 + LONG $0x397de2c4; BYTE $0xc1 // vpminsd ymm0, ymm0, ymm1 + LONG $0x397de2c4; BYTE $0xc2 // vpminsd ymm0, ymm0, ymm2 + LONG $0x397de2c4; BYTE $0xc3 // vpminsd ymm0, ymm0, ymm3 + LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1 + LONG $0x3979e2c4; BYTE $0xc1 // vpminsd xmm0, xmm0, xmm1 LONG $0xc870f9c5; BYTE $0x4e // vpshufd xmm1, xmm0, 78 LONG $0x3979e2c4; BYTE $0xc1 // vpminsd xmm0, xmm0, xmm1 LONG $0xc870f9c5; BYTE $0xe5 // vpshufd xmm1, xmm0, 229 LONG $0x3979e2c4; BYTE $0xc1 // vpminsd xmm0, xmm0, xmm1 - LONG $0x7e79c1c4; BYTE $0xc1 // vmovd r9d, xmm0 - WORD $0x394d; BYTE $0xc3 // cmp r11, r8 - JE LBB0_14 - -LBB0_4: - WORD $0xc689 // mov esi, eax + LONG $0xc07ef9c5 // vmovd eax, xmm0 + WORD $0x8944; BYTE $0xd6 // mov esi, r10d + WORD $0x394d; BYTE $0xc1 // cmp r9, r8 + JE LBB0_8 -LBB0_5: - LONG $0x9f048b42 // mov eax, dword [rdi + 4*r11] - WORD $0x3941; BYTE $0xc1 // cmp r9d, eax - LONG $0xc84f0f44 // cmovg r9d, eax - WORD $0xc639 // cmp esi, eax - WORD $0x4d0f; BYTE $0xc6 // cmovge eax, esi - LONG $0x01c38349 // add r11, 1 - WORD $0xc689 // mov esi, eax - WORD $0x394d; BYTE $0xd8 // cmp r8, r11 - JNE LBB0_5 +LBB0_7: + LONG $0x8f348b42 // mov esi, dword [rdi + 4*r9] + WORD $0xf039 // cmp eax, esi + WORD $0x4f0f; BYTE $0xc6 // cmovg eax, esi + WORD $0x3941; BYTE $0xf2 // cmp r10d, esi + LONG $0xf24d0f41 // cmovge esi, r10d + LONG $0x01c18349 // add r9, 1 + WORD $0x8941; BYTE $0xf2 // mov r10d, esi + WORD $0x394d; BYTE $0xc8 // cmp r8, r9 + JNE LBB0_7 -LBB0_14: - WORD $0x0189 // mov dword [rcx], eax - WORD $0x8944; BYTE $0x0a // mov dword [rdx], r9d - SUBQ $8, SP +LBB0_8: + WORD $0x3189 // mov dword [rcx], esi + WORD $0x0289 // mov dword [rdx], eax VZEROUPPER RET -TEXT ·_uint32_max_min_avx2(SB), $72-32 +TEXT ·_uint32_max_min_avx2(SB), $0-32 MOVQ values+0(FP), DI MOVQ length+8(FP), SI MOVQ minout+16(FP), DX MOVQ maxout+24(FP), CX - ADDQ $8, SP - WORD $0xf685 // test esi, esi + WORD $0xf685 // test esi, esi JLE LBB1_1 - WORD $0x8941; BYTE $0xf0 // mov r8d, esi - WORD $0xfe83; BYTE $0x1f // cmp esi, 31 - JA LBB1_6 - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - LONG $0xffffb941; WORD $0xffff // mov r9d, -1 - WORD $0xf631 // xor esi, esi - JMP LBB1_4 + WORD $0x8941; BYTE $0xf0 // mov r8d, esi + WORD $0xfe83; BYTE $0x1f // cmp esi, 31 + JA LBB1_4 + WORD $0x3145; BYTE $0xc9 // xor r9d, r9d + LONG $0xffffffb8; BYTE $0xff // mov eax, -1 + WORD $0x3145; BYTE $0xd2 // xor r10d, r10d + JMP LBB1_7 LBB1_1: - LONG $0xffffb941; WORD $0xffff // mov r9d, -1 - WORD $0xf631 // xor esi, esi - JMP LBB1_14 + LONG $0xffffffb8; BYTE $0xff // mov eax, -1 + WORD $0xf631 // xor esi, esi + JMP LBB1_8 -LBB1_6: - WORD $0x8945; BYTE $0xc3 // mov r11d, r8d - LONG $0xe0e38341 // and r11d, -32 - LONG $0xe0438d49 // lea rax, [r11 - 32] - WORD $0x8949; BYTE $0xc2 // mov r10, rax - LONG $0x05eac149 // shr r10, 5 - LONG $0x01c28349 // add r10, 1 - WORD $0x8945; BYTE $0xd1 // mov r9d, r10d - LONG $0x03e18341 // and r9d, 3 - LONG $0x60f88348 // cmp rax, 96 - JAE LBB1_8 - LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0 - LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1 +LBB1_4: + WORD $0x8945; BYTE $0xc1 // mov r9d, r8d + LONG $0xe0e18341 // and r9d, -32 + LONG $0xe4efd9c5 // vpxor xmm4, xmm4, xmm4 + LONG $0xc076fdc5 // vpcmpeqd ymm0, ymm0, ymm0 WORD $0xc031 // xor eax, eax - LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2 - LONG $0xe476ddc5 // vpcmpeqd ymm4, ymm4, ymm4 - LONG $0xf676cdc5 // vpcmpeqd ymm6, ymm6, ymm6 - LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3 - LONG $0xedefd1c5 // vpxor xmm5, xmm5, xmm5 - LONG $0xffefc1c5 // vpxor xmm7, xmm7, xmm7 - JMP LBB1_10 - -LBB1_8: - LONG $0xfce28349 // and r10, -4 - WORD $0xf749; BYTE $0xda // neg r10 - LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0 LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1 - WORD $0xc031 // xor eax, eax LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2 - LONG $0xe476ddc5 // vpcmpeqd ymm4, ymm4, ymm4 - LONG $0xf676cdc5 // vpcmpeqd ymm6, ymm6, ymm6 - LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3 + LONG $0xdb76e5c5 // vpcmpeqd ymm3, ymm3, ymm3 LONG $0xedefd1c5 // vpxor xmm5, xmm5, xmm5 + LONG $0xf6efc9c5 // vpxor xmm6, xmm6, xmm6 LONG $0xffefc1c5 // vpxor xmm7, xmm7, xmm7 -LBB1_9: - LONG $0x046f7ec5; BYTE $0x87 // vmovdqu ymm8, yword [rdi + 4*rax] - LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu ymm9, yword [rdi + 4*rax + 32] - LONG $0x546f7ec5; WORD $0x4087 // vmovdqu ymm10, yword [rdi + 4*rax + 64] - LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu ymm11, yword [rdi + 4*rax + 96] - LONG $0x3b4dc2c4; BYTE $0xf3 // vpminud ymm6, ymm6, ymm11 - LONG $0x3b5dc2c4; BYTE $0xe2 // vpminud ymm4, ymm4, ymm10 - LONG $0x3b75c2c4; BYTE $0xc8 // vpminud ymm1, ymm1, ymm8 - LONG $0x3b6dc2c4; BYTE $0xd1 // vpminud ymm2, ymm2, ymm9 - LONG $0x3f45c2c4; BYTE $0xfb // vpmaxud ymm7, ymm7, ymm11 - LONG $0x3f55c2c4; BYTE $0xea // vpmaxud ymm5, ymm5, ymm10 - LONG $0x3f7dc2c4; BYTE $0xc0 // vpmaxud ymm0, ymm0, ymm8 - LONG $0x3f65c2c4; BYTE $0xd9 // vpmaxud ymm3, ymm3, ymm9 - QUAD $0x0000e087846f7ec5; BYTE $0x00 // vmovdqu ymm8, yword [rdi + 4*rax + 224] - QUAD $0x0000c0878c6f7ec5; BYTE $0x00 // vmovdqu ymm9, yword [rdi + 4*rax + 192] - QUAD $0x00008087946f7ec5; BYTE $0x00 // vmovdqu ymm10, yword [rdi + 4*rax + 128] - QUAD $0x0000a0879c6f7ec5; BYTE $0x00 // vmovdqu ymm11, yword [rdi + 4*rax + 160] - QUAD $0x00010087a46f7ec5; BYTE $0x00 // vmovdqu ymm12, yword [rdi + 4*rax + 256] - QUAD $0x00014087ac6f7ec5; BYTE $0x00 // vmovdqu ymm13, yword [rdi + 4*rax + 320] - QUAD $0x00016087b46f7ec5; BYTE $0x00 // vmovdqu ymm14, yword [rdi + 4*rax + 352] - LONG $0x3b3d42c4; BYTE $0xfe // vpminud ymm15, ymm8, ymm14 - LONG $0x3b4dc2c4; BYTE $0xf7 // vpminud ymm6, ymm6, ymm15 - LONG $0x347ffdc5; BYTE $0x24 // vmovdqa yword [rsp], ymm6 - LONG $0x3b3542c4; BYTE $0xfd // vpminud ymm15, ymm9, ymm13 - LONG $0x3b5dc2c4; BYTE $0xe7 // vpminud ymm4, ymm4, ymm15 - LONG $0x3b2d42c4; BYTE $0xfc // vpminud ymm15, ymm10, ymm12 - LONG $0x3b75c2c4; BYTE $0xcf // vpminud ymm1, ymm1, ymm15 - QUAD $0x00012087bc6f7ec5; BYTE $0x00 // vmovdqu ymm15, yword [rdi + 4*rax + 288] - LONG $0x3b25c2c4; BYTE $0xf7 // vpminud ymm6, ymm11, ymm15 - LONG $0x3b6de2c4; BYTE $0xd6 // vpminud ymm2, ymm2, ymm6 - LONG $0x3f3dc2c4; BYTE $0xf6 // vpmaxud ymm6, ymm8, ymm14 - LONG $0x3f45e2c4; BYTE $0xfe // vpmaxud ymm7, ymm7, ymm6 - LONG $0x3f35c2c4; BYTE $0xf5 // vpmaxud ymm6, ymm9, ymm13 - LONG $0x3f55e2c4; BYTE $0xee // vpmaxud ymm5, ymm5, ymm6 - LONG $0x3f2dc2c4; BYTE $0xf4 // vpmaxud ymm6, ymm10, ymm12 - LONG $0x3f7de2c4; BYTE $0xc6 // vpmaxud ymm0, ymm0, ymm6 - LONG $0x3f25c2c4; BYTE $0xf7 // vpmaxud ymm6, ymm11, ymm15 - LONG $0x3f65e2c4; BYTE $0xde // vpmaxud ymm3, ymm3, ymm6 - QUAD $0x0001a087b46ffec5; BYTE $0x00 // vmovdqu ymm6, yword [rdi + 4*rax + 416] - LONG $0x3b6de2c4; BYTE $0xd6 // vpminud ymm2, ymm2, ymm6 - LONG $0x3f65e2c4; BYTE $0xde // vpmaxud ymm3, ymm3, ymm6 - QUAD $0x00018087b46ffec5; BYTE $0x00 // vmovdqu ymm6, yword [rdi + 4*rax + 384] - LONG $0x3b75e2c4; BYTE $0xce // vpminud ymm1, ymm1, ymm6 - LONG $0x3f7de2c4; BYTE $0xc6 // vpmaxud ymm0, ymm0, ymm6 - QUAD $0x0001c087b46ffec5; BYTE $0x00 // vmovdqu ymm6, yword [rdi + 4*rax + 448] - LONG $0x3b5de2c4; BYTE $0xe6 // vpminud ymm4, ymm4, ymm6 - LONG $0x3f55e2c4; BYTE $0xee // vpmaxud ymm5, ymm5, ymm6 - QUAD $0x0001e087846f7ec5; BYTE $0x00 // vmovdqu ymm8, yword [rdi + 4*rax + 480] - LONG $0x3b3de2c4; WORD $0x2434 // vpminud ymm6, ymm8, yword [rsp] - LONG $0x3f45c2c4; BYTE $0xf8 // vpmaxud ymm7, ymm7, ymm8 - LONG $0x80e88348 // sub rax, -128 - LONG $0x04c28349 // add r10, 4 - JNE LBB1_9 - -LBB1_10: - WORD $0x854d; BYTE $0xc9 // test r9, r9 - JE LBB1_13 - LONG $0x87048d48 // lea rax, [rdi + 4*rax] - WORD $0xf749; BYTE $0xd9 // neg r9 - -LBB1_12: - LONG $0x006f7ec5 // vmovdqu ymm8, yword [rax] - LONG $0x486f7ec5; BYTE $0x20 // vmovdqu ymm9, yword [rax + 32] - LONG $0x506f7ec5; BYTE $0x40 // vmovdqu ymm10, yword [rax + 64] - LONG $0x586f7ec5; BYTE $0x60 // vmovdqu ymm11, yword [rax + 96] - LONG $0x3b6dc2c4; BYTE $0xd1 // vpminud ymm2, ymm2, ymm9 - LONG $0x3b75c2c4; BYTE $0xc8 // vpminud ymm1, ymm1, ymm8 - LONG $0x3b5dc2c4; BYTE $0xe2 // vpminud ymm4, ymm4, ymm10 - LONG $0x3b4dc2c4; BYTE $0xf3 // vpminud ymm6, ymm6, ymm11 - LONG $0x3f65c2c4; BYTE $0xd9 // vpmaxud ymm3, ymm3, ymm9 - LONG $0x3f7dc2c4; BYTE $0xc0 // vpmaxud ymm0, ymm0, ymm8 - LONG $0x3f55c2c4; BYTE $0xea // vpmaxud ymm5, ymm5, ymm10 - LONG $0x3f45c2c4; BYTE $0xfb // vpmaxud ymm7, ymm7, ymm11 - LONG $0x80e88348 // sub rax, -128 - WORD $0xff49; BYTE $0xc1 // inc r9 - JNE LBB1_12 - -LBB1_13: - LONG $0x3b6de2c4; BYTE $0xd6 // vpminud ymm2, ymm2, ymm6 - LONG $0x3b75e2c4; BYTE $0xcc // vpminud ymm1, ymm1, ymm4 - LONG $0x3b75e2c4; BYTE $0xca // vpminud ymm1, ymm1, ymm2 - LONG $0x3f65e2c4; BYTE $0xd7 // vpmaxud ymm2, ymm3, ymm7 - LONG $0x3f7de2c4; BYTE $0xc5 // vpmaxud ymm0, ymm0, ymm5 - LONG $0x3f7de2c4; BYTE $0xc2 // vpmaxud ymm0, ymm0, ymm2 - LONG $0x397de3c4; WORD $0x01c2 // vextracti128 xmm2, ymm0, 1 - LONG $0x3f79e2c4; BYTE $0xc2 // vpmaxud xmm0, xmm0, xmm2 - LONG $0xd070f9c5; BYTE $0x4e // vpshufd xmm2, xmm0, 78 - LONG $0x3f79e2c4; BYTE $0xc2 // vpmaxud xmm0, xmm0, xmm2 - LONG $0xd070f9c5; BYTE $0xe5 // vpshufd xmm2, xmm0, 229 - LONG $0x3f79e2c4; BYTE $0xc2 // vpmaxud xmm0, xmm0, xmm2 - LONG $0xc67ef9c5 // vmovd esi, xmm0 - LONG $0x397de3c4; WORD $0x01c8 // vextracti128 xmm0, ymm1, 1 - LONG $0x3b71e2c4; BYTE $0xc0 // vpminud xmm0, xmm1, xmm0 +LBB1_5: + LONG $0x046f7ec5; BYTE $0x87 // vmovdqu ymm8, yword [rdi + 4*rax] + LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu ymm9, yword [rdi + 4*rax + 32] + LONG $0x546f7ec5; WORD $0x4087 // vmovdqu ymm10, yword [rdi + 4*rax + 64] + LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu ymm11, yword [rdi + 4*rax + 96] + LONG $0x3b7dc2c4; BYTE $0xc0 // vpminud ymm0, ymm0, ymm8 + LONG $0x3b75c2c4; BYTE $0xc9 // vpminud ymm1, ymm1, ymm9 + LONG $0x3b6dc2c4; BYTE $0xd2 // vpminud ymm2, ymm2, ymm10 + LONG $0x3b65c2c4; BYTE $0xdb // vpminud ymm3, ymm3, ymm11 + LONG $0x3f5dc2c4; BYTE $0xe0 // vpmaxud ymm4, ymm4, ymm8 + LONG $0x3f55c2c4; BYTE $0xe9 // vpmaxud ymm5, ymm5, ymm9 + LONG $0x3f4dc2c4; BYTE $0xf2 // vpmaxud ymm6, ymm6, ymm10 + LONG $0x3f45c2c4; BYTE $0xfb // vpmaxud ymm7, ymm7, ymm11 + LONG $0x20c08348 // add rax, 32 + WORD $0x3949; BYTE $0xc1 // cmp r9, rax + JNE LBB1_5 + LONG $0x3f5de2c4; BYTE $0xe5 // vpmaxud ymm4, ymm4, ymm5 + LONG $0x3f5de2c4; BYTE $0xe6 // vpmaxud ymm4, ymm4, ymm6 + LONG $0x3f5de2c4; BYTE $0xe7 // vpmaxud ymm4, ymm4, ymm7 + LONG $0x397de3c4; WORD $0x01e5 // vextracti128 xmm5, ymm4, 1 + LONG $0x3f59e2c4; BYTE $0xe5 // vpmaxud xmm4, xmm4, xmm5 + LONG $0xec70f9c5; BYTE $0x4e // vpshufd xmm5, xmm4, 78 + LONG $0x3f59e2c4; BYTE $0xe5 // vpmaxud xmm4, xmm4, xmm5 + LONG $0xec70f9c5; BYTE $0xe5 // vpshufd xmm5, xmm4, 229 + LONG $0x3f59e2c4; BYTE $0xe5 // vpmaxud xmm4, xmm4, xmm5 + LONG $0x7e79c1c4; BYTE $0xe2 // vmovd r10d, xmm4 + LONG $0x3b7de2c4; BYTE $0xc1 // vpminud ymm0, ymm0, ymm1 + LONG $0x3b7de2c4; BYTE $0xc2 // vpminud ymm0, ymm0, ymm2 + LONG $0x3b7de2c4; BYTE $0xc3 // vpminud ymm0, ymm0, ymm3 + LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1 + LONG $0x3b79e2c4; BYTE $0xc1 // vpminud xmm0, xmm0, xmm1 LONG $0xc870f9c5; BYTE $0x4e // vpshufd xmm1, xmm0, 78 LONG $0x3b79e2c4; BYTE $0xc1 // vpminud xmm0, xmm0, xmm1 LONG $0xc870f9c5; BYTE $0xe5 // vpshufd xmm1, xmm0, 229 LONG $0x3b79e2c4; BYTE $0xc1 // vpminud xmm0, xmm0, xmm1 - LONG $0x7e79c1c4; BYTE $0xc1 // vmovd r9d, xmm0 - WORD $0x394d; BYTE $0xc3 // cmp r11, r8 - JE LBB1_14 - -LBB1_4: - WORD $0xf089 // mov eax, esi + LONG $0xc07ef9c5 // vmovd eax, xmm0 + WORD $0x8944; BYTE $0xd6 // mov esi, r10d + WORD $0x394d; BYTE $0xc1 // cmp r9, r8 + JE LBB1_8 -LBB1_5: - LONG $0x9f348b42 // mov esi, dword [rdi + 4*r11] - WORD $0x3941; BYTE $0xf1 // cmp r9d, esi - LONG $0xce430f44 // cmovae r9d, esi +LBB1_7: + LONG $0x8f348b42 // mov esi, dword [rdi + 4*r9] WORD $0xf039 // cmp eax, esi - WORD $0x470f; BYTE $0xf0 // cmova esi, eax - LONG $0x01c38349 // add r11, 1 - WORD $0xf089 // mov eax, esi - WORD $0x394d; BYTE $0xd8 // cmp r8, r11 - JNE LBB1_5 + WORD $0x430f; BYTE $0xc6 // cmovae eax, esi + WORD $0x3941; BYTE $0xf2 // cmp r10d, esi + LONG $0xf2470f41 // cmova esi, r10d + LONG $0x01c18349 // add r9, 1 + WORD $0x8941; BYTE $0xf2 // mov r10d, esi + WORD $0x394d; BYTE $0xc8 // cmp r8, r9 + JNE LBB1_7 -LBB1_14: - WORD $0x3189 // mov dword [rcx], esi - WORD $0x8944; BYTE $0x0a // mov dword [rdx], r9d - SUBQ $8, SP +LBB1_8: + WORD $0x3189 // mov dword [rcx], esi + WORD $0x0289 // mov dword [rdx], eax VZEROUPPER RET @@ -369,984 +193,251 @@ DATA LCDATA2<>+0x000(SB)/8, $0x8000000000000000 DATA LCDATA2<>+0x008(SB)/8, $0x7fffffffffffffff GLOBL LCDATA2<>(SB), 8, $16 -TEXT ·_int64_max_min_avx2(SB), $232-32 +TEXT ·_int64_max_min_avx2(SB), $0-32 MOVQ values+0(FP), DI MOVQ length+8(FP), SI MOVQ minout+16(FP), DX MOVQ maxout+24(FP), CX - ADDQ $8, SP LEAQ LCDATA2<>(SB), BP - QUAD $0xffffffffffffb949; WORD $0x7fff // mov r9, 9223372036854775807 + QUAD $0xffffffffffffb848; WORD $0x7fff // mov rax, 9223372036854775807 WORD $0xf685 // test esi, esi JLE LBB2_1 WORD $0x8941; BYTE $0xf0 // mov r8d, esi - WORD $0xfe83; BYTE $0x1f // cmp esi, 31 - JA LBB2_6 - LONG $0x01718d49 // lea rsi, [r9 + 1] - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - JMP LBB2_4 + WORD $0xfe83; BYTE $0x0f // cmp esi, 15 + JA LBB2_4 + LONG $0x01508d4c // lea r10, [rax + 1] + WORD $0x3145; BYTE $0xc9 // xor r9d, r9d + JMP LBB2_7 LBB2_1: - LONG $0x01718d49 // lea rsi, [r9 + 1] - JMP LBB2_14 + LONG $0x01708d48 // lea rsi, [rax + 1] + JMP LBB2_8 -LBB2_6: - WORD $0x8945; BYTE $0xc3 // mov r11d, r8d - LONG $0xe0e38341 // and r11d, -32 - LONG $0xe0438d49 // lea rax, [r11 - 32] - WORD $0x8949; BYTE $0xc2 // mov r10, rax - LONG $0x05eac149 // shr r10, 5 - LONG $0x01c28349 // add r10, 1 - WORD $0x8945; BYTE $0xd1 // mov r9d, r10d - LONG $0x03e18341 // and r9d, 3 - LONG $0x60f88348 // cmp rax, 96 - JAE LBB2_8 - LONG $0x597d62c4; WORD $0x007d // vpbroadcastq ymm15, qword 0[rbp] /* [rip + .LCPI2_0] */ - LONG $0x597d62c4; WORD $0x085d // vpbroadcastq ymm11, qword 8[rbp] /* [rip + .LCPI2_1] */ - WORD $0xc031 // xor eax, eax - LONG $0x5c7f7dc5; WORD $0x2024 // vmovdqa yword [rsp + 32], ymm11 - LONG $0x6f7dc1c4; BYTE $0xdb // vmovdqa ymm3, ymm11 - LONG $0x6f7d41c4; BYTE $0xcb // vmovdqa ymm9, ymm11 - LONG $0x6f7dc1c4; BYTE $0xeb // vmovdqa ymm5, ymm11 - LONG $0x6f7dc1c4; BYTE $0xe3 // vmovdqa ymm4, ymm11 - LONG $0x6f7dc1c4; BYTE $0xf3 // vmovdqa ymm6, ymm11 - LONG $0x5c7f7dc5; WORD $0x6024 // vmovdqa yword [rsp + 96], ymm11 - LONG $0x7c7f7dc5; WORD $0x4024 // vmovdqa yword [rsp + 64], ymm15 - LONG $0x6f7dc1c4; BYTE $0xd7 // vmovdqa ymm2, ymm15 - LONG $0x6f7d41c4; BYTE $0xc7 // vmovdqa ymm8, ymm15 - LONG $0x6f7d41c4; BYTE $0xe7 // vmovdqa ymm12, ymm15 - LONG $0x6f7d41c4; BYTE $0xef // vmovdqa ymm13, ymm15 - LONG $0x6f7d41c4; BYTE $0xf7 // vmovdqa ymm14, ymm15 - LONG $0x3c7f7dc5; BYTE $0x24 // vmovdqa yword [rsp], ymm15 - JMP LBB2_10 - -LBB2_8: - LONG $0xfce28349 // and r10, -4 - LONG $0x597d62c4; WORD $0x007d // vpbroadcastq ymm15, qword 0[rbp] /* [rip + .LCPI2_0] */ - WORD $0xf749; BYTE $0xda // neg r10 - LONG $0x597d62c4; WORD $0x085d // vpbroadcastq ymm11, qword 8[rbp] /* [rip + .LCPI2_1] */ +LBB2_4: + WORD $0x8945; BYTE $0xc1 // mov r9d, r8d + LONG $0x597de2c4; WORD $0x0065 // vpbroadcastq ymm4, qword 0[rbp] /* [rip + .LCPI2_0] */ + LONG $0xf0e18341 // and r9d, -16 + LONG $0x597de2c4; WORD $0x0845 // vpbroadcastq ymm0, qword 8[rbp] /* [rip + .LCPI2_1] */ WORD $0xc031 // xor eax, eax - LONG $0x5c7f7dc5; WORD $0x2024 // vmovdqa yword [rsp + 32], ymm11 - LONG $0x6f7dc1c4; BYTE $0xdb // vmovdqa ymm3, ymm11 - LONG $0x6f7d41c4; BYTE $0xcb // vmovdqa ymm9, ymm11 - LONG $0x6f7dc1c4; BYTE $0xeb // vmovdqa ymm5, ymm11 - LONG $0x6f7dc1c4; BYTE $0xe3 // vmovdqa ymm4, ymm11 - LONG $0x6f7dc1c4; BYTE $0xf3 // vmovdqa ymm6, ymm11 - LONG $0x5c7f7dc5; WORD $0x6024 // vmovdqa yword [rsp + 96], ymm11 - LONG $0x7c7f7dc5; WORD $0x4024 // vmovdqa yword [rsp + 64], ymm15 - LONG $0x6f7dc1c4; BYTE $0xd7 // vmovdqa ymm2, ymm15 - LONG $0x6f7d41c4; BYTE $0xc7 // vmovdqa ymm8, ymm15 - LONG $0x6f7d41c4; BYTE $0xe7 // vmovdqa ymm12, ymm15 - LONG $0x6f7d41c4; BYTE $0xef // vmovdqa ymm13, ymm15 - LONG $0x6f7d41c4; BYTE $0xf7 // vmovdqa ymm14, ymm15 - LONG $0x3c7f7dc5; BYTE $0x24 // vmovdqa yword [rsp], ymm15 - -LBB2_9: - QUAD $0x0000e0c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 224] - LONG $0x6f7d41c4; BYTE $0xd0 // vmovdqa ymm10, ymm8 - LONG $0xc26f7dc5 // vmovdqa ymm8, ymm2 - LONG $0xd36ffdc5 // vmovdqa ymm2, ymm3 - LONG $0x6f7dc1c4; BYTE $0xd9 // vmovdqa ymm3, ymm9 - LONG $0x377d42c4; BYTE $0xcb // vpcmpgtq ymm9, ymm0, ymm11 - LONG $0x4b7dc3c4; WORD $0x90cb // vblendvpd ymm1, ymm0, ymm11, ymm9 - QUAD $0x0000a0248c29fdc5; BYTE $0x00 // vmovapd yword [rsp + 160], ymm1 - LONG $0x370562c4; BYTE $0xc8 // vpcmpgtq ymm9, ymm15, ymm0 - LONG $0x4b7dc3c4; WORD $0x90c7 // vblendvpd ymm0, ymm0, ymm15, ymm9 - QUAD $0x000080248429fdc5; BYTE $0x00 // vmovapd yword [rsp + 128], ymm0 - QUAD $0x0000c0c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 192] - LONG $0x377d62c4; BYTE $0xce // vpcmpgtq ymm9, ymm0, ymm6 - LONG $0x4b7de3c4; WORD $0x90fe // vblendvpd ymm7, ymm0, ymm6, ymm9 - LONG $0x370d62c4; BYTE $0xc8 // vpcmpgtq ymm9, ymm14, ymm0 - LONG $0x4b7d43c4; WORD $0x90f6 // vblendvpd ymm14, ymm0, ymm14, ymm9 - QUAD $0x0000a0c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 160] - LONG $0x377d62c4; BYTE $0xcc // vpcmpgtq ymm9, ymm0, ymm4 - LONG $0x4b7de3c4; WORD $0x90f4 // vblendvpd ymm6, ymm0, ymm4, ymm9 - LONG $0x371562c4; BYTE $0xc8 // vpcmpgtq ymm9, ymm13, ymm0 - LONG $0x4b7d43c4; WORD $0x90ed // vblendvpd ymm13, ymm0, ymm13, ymm9 - QUAD $0x000080c78c6f7ec5; BYTE $0x00 // vmovdqu ymm9, yword [rdi + 8*rax + 128] - LONG $0x3735e2c4; BYTE $0xc5 // vpcmpgtq ymm0, ymm9, ymm5 - LONG $0x4b35e3c4; WORD $0x00cd // vblendvpd ymm1, ymm9, ymm5, ymm0 - LONG $0x371dc2c4; BYTE $0xe9 // vpcmpgtq ymm5, ymm12, ymm9 - LONG $0x4b3543c4; WORD $0x50e4 // vblendvpd ymm12, ymm9, ymm12, ymm5 - LONG $0x6c6ffec5; WORD $0x60c7 // vmovdqu ymm5, yword [rdi + 8*rax + 96] - LONG $0x375562c4; BYTE $0xcb // vpcmpgtq ymm9, ymm5, ymm3 - LONG $0x4b5563c4; WORD $0x90cb // vblendvpd ymm9, ymm5, ymm3, ymm9 - LONG $0x372de2c4; BYTE $0xe5 // vpcmpgtq ymm4, ymm10, ymm5 - LONG $0x4b5543c4; WORD $0x40d2 // vblendvpd ymm10, ymm5, ymm10, ymm4 - LONG $0x646ffec5; WORD $0x40c7 // vmovdqu ymm4, yword [rdi + 8*rax + 64] - LONG $0x375de2c4; BYTE $0xea // vpcmpgtq ymm5, ymm4, ymm2 - LONG $0x4b5de3c4; WORD $0x50ea // vblendvpd ymm5, ymm4, ymm2, ymm5 - LONG $0x373de2c4; BYTE $0xdc // vpcmpgtq ymm3, ymm8, ymm4 - LONG $0x4b5dc3c4; WORD $0x30c0 // vblendvpd ymm0, ymm4, ymm8, ymm3 - LONG $0x146ffec5; BYTE $0xc7 // vmovdqu ymm2, yword [rdi + 8*rax] - LONG $0x646ffdc5; WORD $0x6024 // vmovdqa ymm4, yword [rsp + 96] - LONG $0x376de2c4; BYTE $0xdc // vpcmpgtq ymm3, ymm2, ymm4 - LONG $0x4b6de3c4; WORD $0x30dc // vblendvpd ymm3, ymm2, ymm4, ymm3 - LONG $0x1c6f7dc5; BYTE $0x24 // vmovdqa ymm11, yword [rsp] - LONG $0x3725e2c4; BYTE $0xe2 // vpcmpgtq ymm4, ymm11, ymm2 - LONG $0x4b6dc3c4; WORD $0x40e3 // vblendvpd ymm4, ymm2, ymm11, ymm4 - LONG $0x546ffec5; WORD $0x20c7 // vmovdqu ymm2, yword [rdi + 8*rax + 32] - LONG $0x7c6f7dc5; WORD $0x2024 // vmovdqa ymm15, yword [rsp + 32] - LONG $0x376d42c4; BYTE $0xdf // vpcmpgtq ymm11, ymm2, ymm15 - LONG $0x4b6d43c4; WORD $0xb0df // vblendvpd ymm11, ymm2, ymm15, ymm11 - LONG $0x446f7dc5; WORD $0x4024 // vmovdqa ymm8, yword [rsp + 64] - LONG $0x373d62c4; BYTE $0xfa // vpcmpgtq ymm15, ymm8, ymm2 - LONG $0x4b6dc3c4; WORD $0xf0d0 // vblendvpd ymm2, ymm2, ymm8, ymm15 - QUAD $0x000120c7846f7ec5; BYTE $0x00 // vmovdqu ymm8, yword [rdi + 8*rax + 288] - LONG $0x373d42c4; BYTE $0xfb // vpcmpgtq ymm15, ymm8, ymm11 - LONG $0x4b3d43c4; WORD $0xf0db // vblendvpd ymm11, ymm8, ymm11, ymm15 - LONG $0x5c297dc5; WORD $0x2024 // vmovapd yword [rsp + 32], ymm11 - LONG $0x376d42c4; BYTE $0xd8 // vpcmpgtq ymm11, ymm2, ymm8 - LONG $0x4b3de3c4; WORD $0xb0d2 // vblendvpd ymm2, ymm8, ymm2, ymm11 - LONG $0x1429fdc5; BYTE $0x24 // vmovapd yword [rsp], ymm2 - QUAD $0x000100c79c6f7ec5; BYTE $0x00 // vmovdqu ymm11, yword [rdi + 8*rax + 256] - LONG $0x3725e2c4; BYTE $0xd3 // vpcmpgtq ymm2, ymm11, ymm3 - LONG $0x4b2563c4; WORD $0x20c3 // vblendvpd ymm8, ymm11, ymm3, ymm2 - LONG $0x375dc2c4; BYTE $0xdb // vpcmpgtq ymm3, ymm4, ymm11 - LONG $0x4b25e3c4; WORD $0x30dc // vblendvpd ymm3, ymm11, ymm4, ymm3 - QUAD $0x000140c79c6f7ec5; BYTE $0x00 // vmovdqu ymm11, yword [rdi + 8*rax + 320] - LONG $0x3725e2c4; BYTE $0xe5 // vpcmpgtq ymm4, ymm11, ymm5 - LONG $0x4b25e3c4; WORD $0x40e5 // vblendvpd ymm4, ymm11, ymm5, ymm4 - LONG $0x377dc2c4; BYTE $0xeb // vpcmpgtq ymm5, ymm0, ymm11 - LONG $0x4b25e3c4; WORD $0x50e8 // vblendvpd ymm5, ymm11, ymm0, ymm5 - QUAD $0x000160c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 352] - LONG $0x377d42c4; BYTE $0xd9 // vpcmpgtq ymm11, ymm0, ymm9 - LONG $0x4b7d43c4; WORD $0xb0c9 // vblendvpd ymm9, ymm0, ymm9, ymm11 - LONG $0x372d62c4; BYTE $0xd8 // vpcmpgtq ymm11, ymm10, ymm0 - LONG $0x4b7d43c4; WORD $0xb0d2 // vblendvpd ymm10, ymm0, ymm10, ymm11 - QUAD $0x000180c79c6f7ec5; BYTE $0x00 // vmovdqu ymm11, yword [rdi + 8*rax + 384] - LONG $0x3725e2c4; BYTE $0xc1 // vpcmpgtq ymm0, ymm11, ymm1 - LONG $0x4b25e3c4; WORD $0x00d1 // vblendvpd ymm2, ymm11, ymm1, ymm0 - LONG $0x371dc2c4; BYTE $0xcb // vpcmpgtq ymm1, ymm12, ymm11 - LONG $0x4b2543c4; WORD $0x10e4 // vblendvpd ymm12, ymm11, ymm12, ymm1 - QUAD $0x0001a0c78c6ffec5; BYTE $0x00 // vmovdqu ymm1, yword [rdi + 8*rax + 416] - LONG $0x377562c4; BYTE $0xde // vpcmpgtq ymm11, ymm1, ymm6 - LONG $0x4b75e3c4; WORD $0xb0f6 // vblendvpd ymm6, ymm1, ymm6, ymm11 - LONG $0x371562c4; BYTE $0xd9 // vpcmpgtq ymm11, ymm13, ymm1 - LONG $0x4b75c3c4; WORD $0xb0cd // vblendvpd ymm1, ymm1, ymm13, ymm11 - QUAD $0x0001c0c79c6f7ec5; BYTE $0x00 // vmovdqu ymm11, yword [rdi + 8*rax + 448] - LONG $0x372562c4; BYTE $0xef // vpcmpgtq ymm13, ymm11, ymm7 - LONG $0x4b25e3c4; WORD $0xd0ff // vblendvpd ymm7, ymm11, ymm7, ymm13 - LONG $0x370d42c4; BYTE $0xeb // vpcmpgtq ymm13, ymm14, ymm11 - LONG $0x4b2543c4; WORD $0xd0ee // vblendvpd ymm13, ymm11, ymm14, ymm13 - QUAD $0x0001e0c79c6f7ec5; BYTE $0x00 // vmovdqu ymm11, yword [rdi + 8*rax + 480] - QUAD $0x0000a024846ffdc5; BYTE $0x00 // vmovdqa ymm0, yword [rsp + 160] - LONG $0x372562c4; BYTE $0xf0 // vpcmpgtq ymm14, ymm11, ymm0 - LONG $0x4b2563c4; WORD $0xe0f0 // vblendvpd ymm14, ymm11, ymm0, ymm14 - QUAD $0x00008024846ffdc5; BYTE $0x00 // vmovdqa ymm0, yword [rsp + 128] - LONG $0x377d42c4; BYTE $0xfb // vpcmpgtq ymm15, ymm0, ymm11 - LONG $0x4b2563c4; WORD $0xf0f8 // vblendvpd ymm15, ymm11, ymm0, ymm15 - QUAD $0x0002e0c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 736] - LONG $0x377d42c4; BYTE $0xde // vpcmpgtq ymm11, ymm0, ymm14 - LONG $0x4b7d43c4; WORD $0xb0de // vblendvpd ymm11, ymm0, ymm14, ymm11 - QUAD $0x0000a0249c297dc5; BYTE $0x00 // vmovapd yword [rsp + 160], ymm11 - LONG $0x370562c4; BYTE $0xf0 // vpcmpgtq ymm14, ymm15, ymm0 - LONG $0x4b7dc3c4; WORD $0xe0c7 // vblendvpd ymm0, ymm0, ymm15, ymm14 - QUAD $0x000080248429fdc5; BYTE $0x00 // vmovapd yword [rsp + 128], ymm0 - QUAD $0x0002c0c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 704] - LONG $0x377d62c4; BYTE $0xf7 // vpcmpgtq ymm14, ymm0, ymm7 - LONG $0x4b7de3c4; WORD $0xe0ff // vblendvpd ymm7, ymm0, ymm7, ymm14 - LONG $0x371562c4; BYTE $0xf0 // vpcmpgtq ymm14, ymm13, ymm0 - LONG $0x4b7d43c4; WORD $0xe0f5 // vblendvpd ymm14, ymm0, ymm13, ymm14 - QUAD $0x0002a0c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 672] - LONG $0x377d62c4; BYTE $0xee // vpcmpgtq ymm13, ymm0, ymm6 - LONG $0x4b7de3c4; WORD $0xd0f6 // vblendvpd ymm6, ymm0, ymm6, ymm13 - LONG $0x377562c4; BYTE $0xe8 // vpcmpgtq ymm13, ymm1, ymm0 - LONG $0x4b7d63c4; WORD $0xd0e9 // vblendvpd ymm13, ymm0, ymm1, ymm13 - QUAD $0x000280c78c6ffec5; BYTE $0x00 // vmovdqu ymm1, yword [rdi + 8*rax + 640] - LONG $0x3775e2c4; BYTE $0xc2 // vpcmpgtq ymm0, ymm1, ymm2 - LONG $0x4b75e3c4; WORD $0x00c2 // vblendvpd ymm0, ymm1, ymm2, ymm0 - LONG $0x371de2c4; BYTE $0xd1 // vpcmpgtq ymm2, ymm12, ymm1 - LONG $0x4b7543c4; WORD $0x20e4 // vblendvpd ymm12, ymm1, ymm12, ymm2 - QUAD $0x000260c78c6ffec5; BYTE $0x00 // vmovdqu ymm1, yword [rdi + 8*rax + 608] - LONG $0x3775c2c4; BYTE $0xd1 // vpcmpgtq ymm2, ymm1, ymm9 - LONG $0x4b7543c4; WORD $0x20c9 // vblendvpd ymm9, ymm1, ymm9, ymm2 - LONG $0x372de2c4; BYTE $0xd1 // vpcmpgtq ymm2, ymm10, ymm1 - LONG $0x4b7543c4; WORD $0x20d2 // vblendvpd ymm10, ymm1, ymm10, ymm2 - QUAD $0x000240c78c6ffec5; BYTE $0x00 // vmovdqu ymm1, yword [rdi + 8*rax + 576] - LONG $0x3775e2c4; BYTE $0xd4 // vpcmpgtq ymm2, ymm1, ymm4 - LONG $0x4b75e3c4; WORD $0x20d4 // vblendvpd ymm2, ymm1, ymm4, ymm2 - LONG $0x3755e2c4; BYTE $0xe1 // vpcmpgtq ymm4, ymm5, ymm1 - LONG $0x4b75e3c4; WORD $0x40cd // vblendvpd ymm1, ymm1, ymm5, ymm4 - QUAD $0x000200c7a46ffec5; BYTE $0x00 // vmovdqu ymm4, yword [rdi + 8*rax + 512] - LONG $0x375dc2c4; BYTE $0xe8 // vpcmpgtq ymm5, ymm4, ymm8 - LONG $0x4b5dc3c4; WORD $0x50e8 // vblendvpd ymm5, ymm4, ymm8, ymm5 - LONG $0x376562c4; BYTE $0xc4 // vpcmpgtq ymm8, ymm3, ymm4 - LONG $0x4b5de3c4; WORD $0x80db // vblendvpd ymm3, ymm4, ymm3, ymm8 - QUAD $0x000220c7a46ffec5; BYTE $0x00 // vmovdqu ymm4, yword [rdi + 8*rax + 544] - LONG $0x5c6f7dc5; WORD $0x2024 // vmovdqa ymm11, yword [rsp + 32] - LONG $0x375d42c4; BYTE $0xc3 // vpcmpgtq ymm8, ymm4, ymm11 - LONG $0x4b5d43c4; WORD $0x80c3 // vblendvpd ymm8, ymm4, ymm11, ymm8 - LONG $0x3c6f7dc5; BYTE $0x24 // vmovdqa ymm15, yword [rsp] - LONG $0x370562c4; BYTE $0xdc // vpcmpgtq ymm11, ymm15, ymm4 - LONG $0x4b5dc3c4; WORD $0xb0e7 // vblendvpd ymm4, ymm4, ymm15, ymm11 - QUAD $0x000320c79c6f7ec5; BYTE $0x00 // vmovdqu ymm11, yword [rdi + 8*rax + 800] - LONG $0x372542c4; BYTE $0xf8 // vpcmpgtq ymm15, ymm11, ymm8 - LONG $0x4b2543c4; WORD $0xf0c0 // vblendvpd ymm8, ymm11, ymm8, ymm15 - LONG $0x44297dc5; WORD $0x2024 // vmovapd yword [rsp + 32], ymm8 - LONG $0x375d42c4; BYTE $0xc3 // vpcmpgtq ymm8, ymm4, ymm11 - LONG $0x4b25e3c4; WORD $0x80e4 // vblendvpd ymm4, ymm11, ymm4, ymm8 - LONG $0x6429fdc5; WORD $0x4024 // vmovapd yword [rsp + 64], ymm4 - QUAD $0x000300c7a46ffec5; BYTE $0x00 // vmovdqu ymm4, yword [rdi + 8*rax + 768] - LONG $0x375d62c4; BYTE $0xdd // vpcmpgtq ymm11, ymm4, ymm5 - LONG $0x4b5de3c4; WORD $0xb0ed // vblendvpd ymm5, ymm4, ymm5, ymm11 - LONG $0x6c29fdc5; WORD $0x6024 // vmovapd yword [rsp + 96], ymm5 - LONG $0x3765e2c4; BYTE $0xec // vpcmpgtq ymm5, ymm3, ymm4 - LONG $0x4b5de3c4; WORD $0x50db // vblendvpd ymm3, ymm4, ymm3, ymm5 - LONG $0x1c29fdc5; BYTE $0x24 // vmovapd yword [rsp], ymm3 - QUAD $0x000340c7a46ffec5; BYTE $0x00 // vmovdqu ymm4, yword [rdi + 8*rax + 832] - LONG $0x375de2c4; BYTE $0xda // vpcmpgtq ymm3, ymm4, ymm2 - LONG $0x4b5de3c4; WORD $0x30da // vblendvpd ymm3, ymm4, ymm2, ymm3 - LONG $0x3775e2c4; BYTE $0xd4 // vpcmpgtq ymm2, ymm1, ymm4 - LONG $0x4b5de3c4; WORD $0x20d1 // vblendvpd ymm2, ymm4, ymm1, ymm2 - QUAD $0x000360c78c6ffec5; BYTE $0x00 // vmovdqu ymm1, yword [rdi + 8*rax + 864] - LONG $0x3775c2c4; BYTE $0xe1 // vpcmpgtq ymm4, ymm1, ymm9 - LONG $0x4b7543c4; WORD $0x40c9 // vblendvpd ymm9, ymm1, ymm9, ymm4 - LONG $0x372de2c4; BYTE $0xe9 // vpcmpgtq ymm5, ymm10, ymm1 - LONG $0x4b7543c4; WORD $0x50c2 // vblendvpd ymm8, ymm1, ymm10, ymm5 - QUAD $0x000380c78c6ffec5; BYTE $0x00 // vmovdqu ymm1, yword [rdi + 8*rax + 896] - LONG $0x3775e2c4; BYTE $0xe8 // vpcmpgtq ymm5, ymm1, ymm0 - LONG $0x4b75e3c4; WORD $0x50e8 // vblendvpd ymm5, ymm1, ymm0, ymm5 - LONG $0x371de2c4; BYTE $0xc1 // vpcmpgtq ymm0, ymm12, ymm1 - LONG $0x4b7543c4; WORD $0x00e4 // vblendvpd ymm12, ymm1, ymm12, ymm0 - QUAD $0x0003a0c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 928] - LONG $0x377de2c4; BYTE $0xce // vpcmpgtq ymm1, ymm0, ymm6 - LONG $0x4b7de3c4; WORD $0x10e6 // vblendvpd ymm4, ymm0, ymm6, ymm1 - LONG $0x3715e2c4; BYTE $0xc8 // vpcmpgtq ymm1, ymm13, ymm0 - LONG $0x4b7d43c4; WORD $0x10ed // vblendvpd ymm13, ymm0, ymm13, ymm1 - QUAD $0x0003c0c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 960] - LONG $0x377de2c4; BYTE $0xcf // vpcmpgtq ymm1, ymm0, ymm7 - LONG $0x4b7de3c4; WORD $0x10f7 // vblendvpd ymm6, ymm0, ymm7, ymm1 - LONG $0x370de2c4; BYTE $0xc8 // vpcmpgtq ymm1, ymm14, ymm0 - LONG $0x4b7d43c4; WORD $0x10f6 // vblendvpd ymm14, ymm0, ymm14, ymm1 - QUAD $0x0003e0c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 992] - QUAD $0x0000a024bc6ffdc5; BYTE $0x00 // vmovdqa ymm7, yword [rsp + 160] - LONG $0x377de2c4; BYTE $0xcf // vpcmpgtq ymm1, ymm0, ymm7 - LONG $0x4b7d63c4; WORD $0x10df // vblendvpd ymm11, ymm0, ymm7, ymm1 - QUAD $0x00008024bc6ffdc5; BYTE $0x00 // vmovdqa ymm7, yword [rsp + 128] - LONG $0x3745e2c4; BYTE $0xc8 // vpcmpgtq ymm1, ymm7, ymm0 - LONG $0x4b7d63c4; WORD $0x10ff // vblendvpd ymm15, ymm0, ymm7, ymm1 - LONG $0x80e88348 // sub rax, -128 - LONG $0x04c28349 // add r10, 4 - JNE LBB2_9 - -LBB2_10: - WORD $0x854d; BYTE $0xc9 // test r9, r9 - LONG $0xfd6ffdc5 // vmovdqa ymm7, ymm5 - LONG $0x6f7dc1c4; BYTE $0xe9 // vmovdqa ymm5, ymm9 - LONG $0x4c6f7dc5; WORD $0x6024 // vmovdqa ymm9, yword [rsp + 96] - LONG $0xd36f7dc5 // vmovdqa ymm10, ymm3 - JE LBB2_13 - LONG $0xc7048d48 // lea rax, [rdi + 8*rax] - WORD $0xf749; BYTE $0xd9 // neg r9 - -LBB2_12: - LONG $0x406ffec5; BYTE $0x20 // vmovdqu ymm0, yword [rax + 32] - LONG $0x5c6ffdc5; WORD $0x2024 // vmovdqa ymm3, yword [rsp + 32] - LONG $0x377de2c4; BYTE $0xcb // vpcmpgtq ymm1, ymm0, ymm3 - LONG $0x4b7de3c4; WORD $0x10db // vblendvpd ymm3, ymm0, ymm3, ymm1 - LONG $0x5c29fdc5; WORD $0x2024 // vmovapd yword [rsp + 32], ymm3 - LONG $0x5c6ffdc5; WORD $0x4024 // vmovdqa ymm3, yword [rsp + 64] - LONG $0x3765e2c4; BYTE $0xc8 // vpcmpgtq ymm1, ymm3, ymm0 - LONG $0x4b7de3c4; WORD $0x10db // vblendvpd ymm3, ymm0, ymm3, ymm1 - LONG $0x5c29fdc5; WORD $0x4024 // vmovapd yword [rsp + 64], ymm3 - LONG $0x006ffec5 // vmovdqu ymm0, yword [rax] - LONG $0x377dc2c4; BYTE $0xc9 // vpcmpgtq ymm1, ymm0, ymm9 - LONG $0x4b7d43c4; WORD $0x10c9 // vblendvpd ymm9, ymm0, ymm9, ymm1 - LONG $0x1c6ffdc5; BYTE $0x24 // vmovdqa ymm3, yword [rsp] - LONG $0x3765e2c4; BYTE $0xc8 // vpcmpgtq ymm1, ymm3, ymm0 - LONG $0x4b7de3c4; WORD $0x10db // vblendvpd ymm3, ymm0, ymm3, ymm1 - LONG $0x1c29fdc5; BYTE $0x24 // vmovapd yword [rsp], ymm3 - LONG $0x406ffec5; BYTE $0x40 // vmovdqu ymm0, yword [rax + 64] - LONG $0x377dc2c4; BYTE $0xca // vpcmpgtq ymm1, ymm0, ymm10 - LONG $0x4b7d43c4; WORD $0x10d2 // vblendvpd ymm10, ymm0, ymm10, ymm1 - LONG $0x376de2c4; BYTE $0xc8 // vpcmpgtq ymm1, ymm2, ymm0 - LONG $0x4b7de3c4; WORD $0x10d2 // vblendvpd ymm2, ymm0, ymm2, ymm1 - LONG $0x406ffec5; BYTE $0x60 // vmovdqu ymm0, yword [rax + 96] - LONG $0x377de2c4; BYTE $0xcd // vpcmpgtq ymm1, ymm0, ymm5 - LONG $0x4b7de3c4; WORD $0x10ed // vblendvpd ymm5, ymm0, ymm5, ymm1 - LONG $0x373de2c4; BYTE $0xc8 // vpcmpgtq ymm1, ymm8, ymm0 - LONG $0x4b7d43c4; WORD $0x10c0 // vblendvpd ymm8, ymm0, ymm8, ymm1 - QUAD $0x00000080806ffec5 // vmovdqu ymm0, yword [rax + 128] - LONG $0x377de2c4; BYTE $0xcf // vpcmpgtq ymm1, ymm0, ymm7 - LONG $0x4b7de3c4; WORD $0x10ff // vblendvpd ymm7, ymm0, ymm7, ymm1 - LONG $0x371de2c4; BYTE $0xc8 // vpcmpgtq ymm1, ymm12, ymm0 - LONG $0x4b7d43c4; WORD $0x10e4 // vblendvpd ymm12, ymm0, ymm12, ymm1 - QUAD $0x000000a0806ffec5 // vmovdqu ymm0, yword [rax + 160] - LONG $0x377de2c4; BYTE $0xcc // vpcmpgtq ymm1, ymm0, ymm4 - LONG $0x4b7de3c4; WORD $0x10e4 // vblendvpd ymm4, ymm0, ymm4, ymm1 - LONG $0x3715e2c4; BYTE $0xc8 // vpcmpgtq ymm1, ymm13, ymm0 - LONG $0x4b7d43c4; WORD $0x10ed // vblendvpd ymm13, ymm0, ymm13, ymm1 - QUAD $0x000000c0806ffec5 // vmovdqu ymm0, yword [rax + 192] - LONG $0x377de2c4; BYTE $0xce // vpcmpgtq ymm1, ymm0, ymm6 - LONG $0x4b7de3c4; WORD $0x10f6 // vblendvpd ymm6, ymm0, ymm6, ymm1 - LONG $0x370de2c4; BYTE $0xc8 // vpcmpgtq ymm1, ymm14, ymm0 - LONG $0x4b7d43c4; WORD $0x10f6 // vblendvpd ymm14, ymm0, ymm14, ymm1 - QUAD $0x000000e0806ffec5 // vmovdqu ymm0, yword [rax + 224] - LONG $0x377dc2c4; BYTE $0xcb // vpcmpgtq ymm1, ymm0, ymm11 - LONG $0x4b7d43c4; WORD $0x10db // vblendvpd ymm11, ymm0, ymm11, ymm1 - LONG $0x3705e2c4; BYTE $0xc8 // vpcmpgtq ymm1, ymm15, ymm0 - LONG $0x4b7d43c4; WORD $0x10ff // vblendvpd ymm15, ymm0, ymm15, ymm1 - LONG $0x01000548; WORD $0x0000 // add rax, 256 - WORD $0xff49; BYTE $0xc1 // inc r9 - JNE LBB2_12 + LONG $0xd86ffdc5 // vmovdqa ymm3, ymm0 + LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0 + LONG $0xc86ffdc5 // vmovdqa ymm1, ymm0 + LONG $0xfc6ffdc5 // vmovdqa ymm7, ymm4 + LONG $0xf46ffdc5 // vmovdqa ymm6, ymm4 + LONG $0xec6ffdc5 // vmovdqa ymm5, ymm4 -LBB2_13: - LONG $0x4c6ffdc5; WORD $0x4024 // vmovdqa ymm1, yword [rsp + 64] - LONG $0x3775c2c4; BYTE $0xc5 // vpcmpgtq ymm0, ymm1, ymm13 - LONG $0x4b15e3c4; WORD $0x00c1 // vblendvpd ymm0, ymm13, ymm1, ymm0 - LONG $0x373dc2c4; BYTE $0xcf // vpcmpgtq ymm1, ymm8, ymm15 - LONG $0x4b05c3c4; WORD $0x10c8 // vblendvpd ymm1, ymm15, ymm8, ymm1 - LONG $0x1c6ffdc5; BYTE $0x24 // vmovdqa ymm3, yword [rsp] - LONG $0x376542c4; BYTE $0xc4 // vpcmpgtq ymm8, ymm3, ymm12 - LONG $0x4b1d63c4; WORD $0x80c3 // vblendvpd ymm8, ymm12, ymm3, ymm8 - LONG $0x6f7dc1c4; BYTE $0xd9 // vmovdqa ymm3, ymm9 - LONG $0x376d42c4; BYTE $0xce // vpcmpgtq ymm9, ymm2, ymm14 - LONG $0x4b0de3c4; WORD $0x90d2 // vblendvpd ymm2, ymm14, ymm2, ymm9 - LONG $0x373d62c4; BYTE $0xca // vpcmpgtq ymm9, ymm8, ymm2 - LONG $0x4b6dc3c4; WORD $0x90d0 // vblendvpd ymm2, ymm2, ymm8, ymm9 - LONG $0x377d62c4; BYTE $0xc1 // vpcmpgtq ymm8, ymm0, ymm1 - LONG $0x4b75e3c4; WORD $0x80c0 // vblendvpd ymm0, ymm1, ymm0, ymm8 - LONG $0x376de2c4; BYTE $0xc8 // vpcmpgtq ymm1, ymm2, ymm0 - LONG $0x4b7de3c4; WORD $0x10c2 // vblendvpd ymm0, ymm0, ymm2, ymm1 +LBB2_5: + LONG $0x046f7ec5; BYTE $0xc7 // vmovdqu ymm8, yword [rdi + 8*rax] + LONG $0x373d62c4; BYTE $0xc8 // vpcmpgtq ymm9, ymm8, ymm0 + LONG $0x4b3de3c4; WORD $0x90c0 // vblendvpd ymm0, ymm8, ymm0, ymm9 + LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu ymm9, yword [rdi + 8*rax + 32] + LONG $0x373562c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm9, ymm3 + LONG $0x4b35e3c4; WORD $0xa0db // vblendvpd ymm3, ymm9, ymm3, ymm10 + LONG $0x546f7ec5; WORD $0x40c7 // vmovdqu ymm10, yword [rdi + 8*rax + 64] + LONG $0x372d62c4; BYTE $0xda // vpcmpgtq ymm11, ymm10, ymm2 + LONG $0x4b2de3c4; WORD $0xb0d2 // vblendvpd ymm2, ymm10, ymm2, ymm11 + LONG $0x5c6f7ec5; WORD $0x60c7 // vmovdqu ymm11, yword [rdi + 8*rax + 96] + LONG $0x372562c4; BYTE $0xe1 // vpcmpgtq ymm12, ymm11, ymm1 + LONG $0x4b25e3c4; WORD $0xc0c9 // vblendvpd ymm1, ymm11, ymm1, ymm12 + LONG $0x375d42c4; BYTE $0xe0 // vpcmpgtq ymm12, ymm4, ymm8 + LONG $0x4b3de3c4; WORD $0xc0e4 // vblendvpd ymm4, ymm8, ymm4, ymm12 + LONG $0x374542c4; BYTE $0xc1 // vpcmpgtq ymm8, ymm7, ymm9 + LONG $0x4b35e3c4; WORD $0x80ff // vblendvpd ymm7, ymm9, ymm7, ymm8 + LONG $0x374d42c4; BYTE $0xc2 // vpcmpgtq ymm8, ymm6, ymm10 + LONG $0x4b2de3c4; WORD $0x80f6 // vblendvpd ymm6, ymm10, ymm6, ymm8 + LONG $0x375542c4; BYTE $0xc3 // vpcmpgtq ymm8, ymm5, ymm11 + LONG $0x4b25e3c4; WORD $0x80ed // vblendvpd ymm5, ymm11, ymm5, ymm8 + LONG $0x10c08348 // add rax, 16 + WORD $0x3949; BYTE $0xc1 // cmp r9, rax + JNE LBB2_5 + LONG $0x375d62c4; BYTE $0xc7 // vpcmpgtq ymm8, ymm4, ymm7 + LONG $0x4b45e3c4; WORD $0x80e4 // vblendvpd ymm4, ymm7, ymm4, ymm8 + LONG $0x375de2c4; BYTE $0xfe // vpcmpgtq ymm7, ymm4, ymm6 + LONG $0x4b4de3c4; WORD $0x70e4 // vblendvpd ymm4, ymm6, ymm4, ymm7 + LONG $0x375de2c4; BYTE $0xf5 // vpcmpgtq ymm6, ymm4, ymm5 + LONG $0x4b55e3c4; WORD $0x60e4 // vblendvpd ymm4, ymm5, ymm4, ymm6 + LONG $0x197de3c4; WORD $0x01e5 // vextractf128 xmm5, ymm4, 1 + LONG $0x3759e2c4; BYTE $0xf5 // vpcmpgtq xmm6, xmm4, xmm5 + LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd xmm4, xmm5, xmm4, xmm6 + LONG $0x0479e3c4; WORD $0x4eec // vpermilps xmm5, xmm4, 78 + LONG $0x3759e2c4; BYTE $0xf5 // vpcmpgtq xmm6, xmm4, xmm5 + LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd xmm4, xmm5, xmm4, xmm6 + LONG $0x7ef9c1c4; BYTE $0xe2 // vmovq r10, xmm4 + LONG $0x3765e2c4; BYTE $0xe0 // vpcmpgtq ymm4, ymm3, ymm0 + LONG $0x4b65e3c4; WORD $0x40c0 // vblendvpd ymm0, ymm3, ymm0, ymm4 + LONG $0x376de2c4; BYTE $0xd8 // vpcmpgtq ymm3, ymm2, ymm0 + LONG $0x4b6de3c4; WORD $0x30c0 // vblendvpd ymm0, ymm2, ymm0, ymm3 + LONG $0x3775e2c4; BYTE $0xd0 // vpcmpgtq ymm2, ymm1, ymm0 + LONG $0x4b75e3c4; WORD $0x20c0 // vblendvpd ymm0, ymm1, ymm0, ymm2 LONG $0x197de3c4; WORD $0x01c1 // vextractf128 xmm1, ymm0, 1 - LONG $0x3779e2c4; BYTE $0xd1 // vpcmpgtq xmm2, xmm0, xmm1 + LONG $0x3771e2c4; BYTE $0xd0 // vpcmpgtq xmm2, xmm1, xmm0 LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd xmm0, xmm1, xmm0, xmm2 LONG $0x0479e3c4; WORD $0x4ec8 // vpermilps xmm1, xmm0, 78 - LONG $0x3779e2c4; BYTE $0xd1 // vpcmpgtq xmm2, xmm0, xmm1 + LONG $0x3771e2c4; BYTE $0xd0 // vpcmpgtq xmm2, xmm1, xmm0 LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd xmm0, xmm1, xmm0, xmm2 - LONG $0x546ffdc5; WORD $0x2024 // vmovdqa ymm2, yword [rsp + 32] - LONG $0x375de2c4; BYTE $0xca // vpcmpgtq ymm1, ymm4, ymm2 - LONG $0x4b5de3c4; WORD $0x10ca // vblendvpd ymm1, ymm4, ymm2, ymm1 - LONG $0x3725e2c4; BYTE $0xd5 // vpcmpgtq ymm2, ymm11, ymm5 - LONG $0x4b25e3c4; WORD $0x20d5 // vblendvpd ymm2, ymm11, ymm5, ymm2 - LONG $0x3745e2c4; BYTE $0xe3 // vpcmpgtq ymm4, ymm7, ymm3 - LONG $0x4b45e3c4; WORD $0x40e3 // vblendvpd ymm4, ymm7, ymm3, ymm4 - LONG $0x374dc2c4; BYTE $0xea // vpcmpgtq ymm5, ymm6, ymm10 - LONG $0x4b4dc3c4; WORD $0x50da // vblendvpd ymm3, ymm6, ymm10, ymm5 - LONG $0x3765e2c4; BYTE $0xec // vpcmpgtq ymm5, ymm3, ymm4 - LONG $0x4b65e3c4; WORD $0x50dc // vblendvpd ymm3, ymm3, ymm4, ymm5 - LONG $0x376de2c4; BYTE $0xe1 // vpcmpgtq ymm4, ymm2, ymm1 - LONG $0x4b6de3c4; WORD $0x40c9 // vblendvpd ymm1, ymm2, ymm1, ymm4 - LONG $0x3775e2c4; BYTE $0xd3 // vpcmpgtq ymm2, ymm1, ymm3 - LONG $0x4b75e3c4; WORD $0x20cb // vblendvpd ymm1, ymm1, ymm3, ymm2 - LONG $0x197de3c4; WORD $0x01ca // vextractf128 xmm2, ymm1, 1 - LONG $0x3769e2c4; BYTE $0xd9 // vpcmpgtq xmm3, xmm2, xmm1 - LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd xmm1, xmm2, xmm1, xmm3 - LONG $0x0479e3c4; WORD $0x4ed1 // vpermilps xmm2, xmm1, 78 - LONG $0x3769e2c4; BYTE $0xd9 // vpcmpgtq xmm3, xmm2, xmm1 - LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd xmm1, xmm2, xmm1, xmm3 - LONG $0x7ef9e1c4; BYTE $0xc6 // vmovq rsi, xmm0 - LONG $0x7ef9c1c4; BYTE $0xc9 // vmovq r9, xmm1 - WORD $0x394d; BYTE $0xc3 // cmp r11, r8 - JE LBB2_14 - -LBB2_4: - WORD $0x8948; BYTE $0xf0 // mov rax, rsi + LONG $0x7ef9e1c4; BYTE $0xc0 // vmovq rax, xmm0 + WORD $0x894c; BYTE $0xd6 // mov rsi, r10 + WORD $0x394d; BYTE $0xc1 // cmp r9, r8 + JE LBB2_8 -LBB2_5: - LONG $0xdf348b4a // mov rsi, qword [rdi + 8*r11] - WORD $0x3949; BYTE $0xf1 // cmp r9, rsi - LONG $0xce4f0f4c // cmovg r9, rsi +LBB2_7: + LONG $0xcf348b4a // mov rsi, qword [rdi + 8*r9] WORD $0x3948; BYTE $0xf0 // cmp rax, rsi - LONG $0xf04d0f48 // cmovge rsi, rax - LONG $0x01c38349 // add r11, 1 - WORD $0x8948; BYTE $0xf0 // mov rax, rsi - WORD $0x394d; BYTE $0xd8 // cmp r8, r11 - JNE LBB2_5 + LONG $0xc64f0f48 // cmovg rax, rsi + WORD $0x3949; BYTE $0xf2 // cmp r10, rsi + LONG $0xf24d0f49 // cmovge rsi, r10 + LONG $0x01c18349 // add r9, 1 + WORD $0x8949; BYTE $0xf2 // mov r10, rsi + WORD $0x394d; BYTE $0xc8 // cmp r8, r9 + JNE LBB2_7 -LBB2_14: +LBB2_8: WORD $0x8948; BYTE $0x31 // mov qword [rcx], rsi - WORD $0x894c; BYTE $0x0a // mov qword [rdx], r9 - SUBQ $8, SP + WORD $0x8948; BYTE $0x02 // mov qword [rdx], rax VZEROUPPER RET DATA LCDATA3<>+0x000(SB)/8, $0x8000000000000000 GLOBL LCDATA3<>(SB), 8, $8 -TEXT ·_uint64_max_min_avx2(SB), $296-32 +TEXT ·_uint64_max_min_avx2(SB), $0-32 MOVQ values+0(FP), DI MOVQ length+8(FP), SI MOVQ minout+16(FP), DX MOVQ maxout+24(FP), CX - ADDQ $8, SP LEAQ LCDATA3<>(SB), BP WORD $0xf685 // test esi, esi JLE LBB3_1 WORD $0x8941; BYTE $0xf0 // mov r8d, esi - WORD $0xfe83; BYTE $0x1f // cmp esi, 31 - JA LBB3_6 - LONG $0xffc1c749; WORD $0xffff; BYTE $0xff // mov r9, -1 - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - WORD $0xf631 // xor esi, esi - JMP LBB3_4 + WORD $0xfe83; BYTE $0x0f // cmp esi, 15 + JA LBB3_4 + LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov rax, -1 + WORD $0x3145; BYTE $0xc9 // xor r9d, r9d + WORD $0x3145; BYTE $0xd2 // xor r10d, r10d + JMP LBB3_7 LBB3_1: - LONG $0xffc1c749; WORD $0xffff; BYTE $0xff // mov r9, -1 + LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov rax, -1 WORD $0xf631 // xor esi, esi - JMP LBB3_14 - -LBB3_6: - WORD $0x8945; BYTE $0xc3 // mov r11d, r8d - LONG $0xe0e38341 // and r11d, -32 - LONG $0xe0438d49 // lea rax, [r11 - 32] - WORD $0x8949; BYTE $0xc2 // mov r10, rax - LONG $0x05eac149 // shr r10, 5 - LONG $0x01c28349 // add r10, 1 - WORD $0x8945; BYTE $0xd1 // mov r9d, r10d - LONG $0x03e18341 // and r9d, 3 - LONG $0x60f88348 // cmp rax, 96 - JAE LBB3_8 - LONG $0xe4efd9c5 // vpxor xmm4, xmm4, xmm4 - LONG $0xc076fdc5 // vpcmpeqd ymm0, ymm0, ymm0 - LONG $0x447ffdc5; WORD $0x4024 // vmovdqa yword [rsp + 64], ymm0 - WORD $0xc031 // xor eax, eax - LONG $0xc076fdc5 // vpcmpeqd ymm0, ymm0, ymm0 - LONG $0x447ffdc5; WORD $0x6024 // vmovdqa yword [rsp + 96], ymm0 - LONG $0xed76d5c5 // vpcmpeqd ymm5, ymm5, ymm5 - LONG $0xff76c5c5 // vpcmpeqd ymm7, ymm7, ymm7 - LONG $0x761d41c4; BYTE $0xe4 // vpcmpeqd ymm12, ymm12, ymm12 - LONG $0x762d41c4; BYTE $0xd2 // vpcmpeqd ymm10, ymm10, ymm10 - LONG $0x762541c4; BYTE $0xdb // vpcmpeqd ymm11, ymm11, ymm11 - LONG $0x761541c4; BYTE $0xed // vpcmpeqd ymm13, ymm13, ymm13 - LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0 - LONG $0x447ffdc5; WORD $0x2024 // vmovdqa yword [rsp + 32], ymm0 - LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0 - LONG $0x047ffdc5; BYTE $0x24 // vmovdqa yword [rsp], ymm0 - LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3 - LONG $0xef3141c4; BYTE $0xc9 // vpxor xmm9, xmm9, xmm9 - LONG $0xef3941c4; BYTE $0xc0 // vpxor xmm8, xmm8, xmm8 - LONG $0xef0141c4; BYTE $0xff // vpxor xmm15, xmm15, xmm15 - LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0 - JMP LBB3_10 + JMP LBB3_8 -LBB3_8: - LONG $0xfce28349 // and r10, -4 - WORD $0xf749; BYTE $0xda // neg r10 - LONG $0xe4efd9c5 // vpxor xmm4, xmm4, xmm4 - LONG $0xc076fdc5 // vpcmpeqd ymm0, ymm0, ymm0 - LONG $0x447ffdc5; WORD $0x4024 // vmovdqa yword [rsp + 64], ymm0 +LBB3_4: + WORD $0x8945; BYTE $0xc1 // mov r9d, r8d + LONG $0xf0e18341 // and r9d, -16 + LONG $0xedefd1c5 // vpxor xmm5, xmm5, xmm5 + LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1 WORD $0xc031 // xor eax, eax - LONG $0x597d62c4; WORD $0x0075 // vpbroadcastq ymm14, qword 0[rbp] /* [rip + .LCPI3_0] */ - LONG $0xc076fdc5 // vpcmpeqd ymm0, ymm0, ymm0 - LONG $0x447ffdc5; WORD $0x6024 // vmovdqa yword [rsp + 96], ymm0 - LONG $0xed76d5c5 // vpcmpeqd ymm5, ymm5, ymm5 - LONG $0xff76c5c5 // vpcmpeqd ymm7, ymm7, ymm7 - LONG $0x761d41c4; BYTE $0xe4 // vpcmpeqd ymm12, ymm12, ymm12 - LONG $0x762d41c4; BYTE $0xd2 // vpcmpeqd ymm10, ymm10, ymm10 - LONG $0x762541c4; BYTE $0xdb // vpcmpeqd ymm11, ymm11, ymm11 - LONG $0x761541c4; BYTE $0xed // vpcmpeqd ymm13, ymm13, ymm13 - LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0 - LONG $0x447ffdc5; WORD $0x2024 // vmovdqa yword [rsp + 32], ymm0 - LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0 - LONG $0x047ffdc5; BYTE $0x24 // vmovdqa yword [rsp], ymm0 - LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3 - LONG $0xef3141c4; BYTE $0xc9 // vpxor xmm9, xmm9, xmm9 + LONG $0x597de2c4; WORD $0x0045 // vpbroadcastq ymm0, qword 0[rbp] /* [rip + .LCPI3_0] */ + LONG $0xe476ddc5 // vpcmpeqd ymm4, ymm4, ymm4 + LONG $0xdb76e5c5 // vpcmpeqd ymm3, ymm3, ymm3 + LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2 LONG $0xef3941c4; BYTE $0xc0 // vpxor xmm8, xmm8, xmm8 - LONG $0xef0141c4; BYTE $0xff // vpxor xmm15, xmm15, xmm15 - LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0 - -LBB3_9: - QUAD $0x0000e0c78c6ffec5; BYTE $0x00 // vmovdqu ymm1, yword [rdi + 8*rax + 224] - LONG $0xd1ef8dc5 // vpxor ymm2, ymm14, ymm1 - LONG $0xf36ffdc5 // vmovdqa ymm6, ymm3 - LONG $0xef15c1c4; BYTE $0xde // vpxor ymm3, ymm13, ymm14 - LONG $0x376de2c4; BYTE $0xdb // vpcmpgtq ymm3, ymm2, ymm3 - LONG $0x4b75c3c4; WORD $0x30dd // vblendvpd ymm3, ymm1, ymm13, ymm3 - QUAD $0x000080249c29fdc5; BYTE $0x00 // vmovapd yword [rsp + 128], ymm3 - LONG $0xd8ef8dc5 // vpxor ymm3, ymm14, ymm0 - LONG $0x3765e2c4; BYTE $0xd2 // vpcmpgtq ymm2, ymm3, ymm2 - LONG $0x4b75e3c4; WORD $0x20c0 // vblendvpd ymm0, ymm1, ymm0, ymm2 - QUAD $0x0000e0248429fdc5; BYTE $0x00 // vmovapd yword [rsp + 224], ymm0 - QUAD $0x0000c0c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 192] - LONG $0xc8ef8dc5 // vpxor ymm1, ymm14, ymm0 - LONG $0xef25c1c4; BYTE $0xd6 // vpxor ymm2, ymm11, ymm14 - LONG $0x3775e2c4; BYTE $0xd2 // vpcmpgtq ymm2, ymm1, ymm2 - LONG $0x4b7dc3c4; WORD $0x20d3 // vblendvpd ymm2, ymm0, ymm11, ymm2 - QUAD $0x0000a0249429fdc5; BYTE $0x00 // vmovapd yword [rsp + 160], ymm2 - LONG $0xef05c1c4; BYTE $0xd6 // vpxor ymm2, ymm15, ymm14 - LONG $0x376de2c4; BYTE $0xc9 // vpcmpgtq ymm1, ymm2, ymm1 - LONG $0x4b7dc3c4; WORD $0x10c7 // vblendvpd ymm0, ymm0, ymm15, ymm1 - QUAD $0x0000c0248429fdc5; BYTE $0x00 // vmovapd yword [rsp + 192], ymm0 - QUAD $0x0000a0c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 160] - LONG $0xc8ef8dc5 // vpxor ymm1, ymm14, ymm0 - LONG $0xef2dc1c4; BYTE $0xd6 // vpxor ymm2, ymm10, ymm14 - LONG $0x3775e2c4; BYTE $0xd2 // vpcmpgtq ymm2, ymm1, ymm2 - LONG $0x6f7dc1c4; BYTE $0xd8 // vmovdqa ymm3, ymm8 - LONG $0x4b7d43c4; WORD $0x20c2 // vblendvpd ymm8, ymm0, ymm10, ymm2 - LONG $0xd3ef8dc5 // vpxor ymm2, ymm14, ymm3 - LONG $0x376de2c4; BYTE $0xc9 // vpcmpgtq ymm1, ymm2, ymm1 - LONG $0x4b7d63c4; WORD $0x10eb // vblendvpd ymm13, ymm0, ymm3, ymm1 - QUAD $0x000080c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 128] - LONG $0xd0ef8dc5 // vpxor ymm2, ymm14, ymm0 - LONG $0xef1dc1c4; BYTE $0xce // vpxor ymm1, ymm12, ymm14 - LONG $0x376de2c4; BYTE $0xc9 // vpcmpgtq ymm1, ymm2, ymm1 - LONG $0x4b7dc3c4; WORD $0x10cc // vblendvpd ymm1, ymm0, ymm12, ymm1 - LONG $0xef35c1c4; BYTE $0xde // vpxor ymm3, ymm9, ymm14 - LONG $0x3765e2c4; BYTE $0xd2 // vpcmpgtq ymm2, ymm3, ymm2 - LONG $0x4b7d43c4; WORD $0x20e1 // vblendvpd ymm12, ymm0, ymm9, ymm2 - LONG $0x546ffec5; WORD $0x60c7 // vmovdqu ymm2, yword [rdi + 8*rax + 96] - LONG $0xc7ef8dc5 // vpxor ymm0, ymm14, ymm7 - LONG $0xdaef8dc5 // vpxor ymm3, ymm14, ymm2 - LONG $0x3765e2c4; BYTE $0xc0 // vpcmpgtq ymm0, ymm3, ymm0 - LONG $0x4b6de3c4; WORD $0x00c7 // vblendvpd ymm0, ymm2, ymm7, ymm0 - LONG $0xfc6f7dc5 // vmovdqa ymm15, ymm4 - LONG $0xe6ef8dc5 // vpxor ymm4, ymm14, ymm6 - LONG $0x375de2c4; BYTE $0xdb // vpcmpgtq ymm3, ymm4, ymm3 - LONG $0x4b6d63c4; WORD $0x30d6 // vblendvpd ymm10, ymm2, ymm6, ymm3 - LONG $0x546ffec5; WORD $0x40c7 // vmovdqu ymm2, yword [rdi + 8*rax + 64] - LONG $0xddef8dc5 // vpxor ymm3, ymm14, ymm5 - LONG $0xe2ef8dc5 // vpxor ymm4, ymm14, ymm2 - LONG $0x375de2c4; BYTE $0xdb // vpcmpgtq ymm3, ymm4, ymm3 - LONG $0x4b6de3c4; WORD $0x30ed // vblendvpd ymm5, ymm2, ymm5, ymm3 - LONG $0x346ffdc5; BYTE $0x24 // vmovdqa ymm6, yword [rsp] - LONG $0xdeef8dc5 // vpxor ymm3, ymm14, ymm6 - LONG $0x3765e2c4; BYTE $0xdc // vpcmpgtq ymm3, ymm3, ymm4 - LONG $0x4b6d63c4; WORD $0x30ce // vblendvpd ymm9, ymm2, ymm6, ymm3 - LONG $0x146ffec5; BYTE $0xc7 // vmovdqu ymm2, yword [rdi + 8*rax] - LONG $0x7c6ffdc5; WORD $0x4024 // vmovdqa ymm7, yword [rsp + 64] - LONG $0xdfef8dc5 // vpxor ymm3, ymm14, ymm7 - LONG $0xe2ef8dc5 // vpxor ymm4, ymm14, ymm2 - LONG $0x375de2c4; BYTE $0xdb // vpcmpgtq ymm3, ymm4, ymm3 - LONG $0x4b6de3c4; WORD $0x30df // vblendvpd ymm3, ymm2, ymm7, ymm3 - LONG $0xef0541c4; BYTE $0xde // vpxor ymm11, ymm15, ymm14 - LONG $0x3725e2c4; BYTE $0xe4 // vpcmpgtq ymm4, ymm11, ymm4 - LONG $0x4b6dc3c4; WORD $0x40e7 // vblendvpd ymm4, ymm2, ymm15, ymm4 - LONG $0x546ffec5; WORD $0x20c7 // vmovdqu ymm2, yword [rdi + 8*rax + 32] - LONG $0x7c6f7dc5; WORD $0x6024 // vmovdqa ymm15, yword [rsp + 96] - LONG $0xef0541c4; BYTE $0xde // vpxor ymm11, ymm15, ymm14 - LONG $0xfaef8dc5 // vpxor ymm7, ymm14, ymm2 - LONG $0x374542c4; BYTE $0xdb // vpcmpgtq ymm11, ymm7, ymm11 - LONG $0x4b6d43c4; WORD $0xb0df // vblendvpd ymm11, ymm2, ymm15, ymm11 - LONG $0x746ffdc5; WORD $0x2024 // vmovdqa ymm6, yword [rsp + 32] - LONG $0xfeef0dc5 // vpxor ymm15, ymm14, ymm6 - LONG $0x3705e2c4; BYTE $0xff // vpcmpgtq ymm7, ymm15, ymm7 - LONG $0x4b6de3c4; WORD $0x70d6 // vblendvpd ymm2, ymm2, ymm6, ymm7 - QUAD $0x000120c7b46ffec5; BYTE $0x00 // vmovdqu ymm6, yword [rdi + 8*rax + 288] - LONG $0x5725c1c4; BYTE $0xfe // vxorpd ymm7, ymm11, ymm14 - LONG $0xfeef0dc5 // vpxor ymm15, ymm14, ymm6 - LONG $0x3705e2c4; BYTE $0xff // vpcmpgtq ymm7, ymm15, ymm7 - LONG $0x4b4dc3c4; WORD $0x70fb // vblendvpd ymm7, ymm6, ymm11, ymm7 - LONG $0x7c29fdc5; WORD $0x6024 // vmovapd yword [rsp + 96], ymm7 - LONG $0xfa578dc5 // vxorpd ymm7, ymm14, ymm2 - LONG $0x3745c2c4; BYTE $0xff // vpcmpgtq ymm7, ymm7, ymm15 - LONG $0x4b4de3c4; WORD $0x70d2 // vblendvpd ymm2, ymm6, ymm2, ymm7 - LONG $0x5429fdc5; WORD $0x4024 // vmovapd yword [rsp + 64], ymm2 - QUAD $0x000100c7b46ffec5; BYTE $0x00 // vmovdqu ymm6, yword [rdi + 8*rax + 256] - LONG $0xfb578dc5 // vxorpd ymm7, ymm14, ymm3 - LONG $0xdeef0dc5 // vpxor ymm11, ymm14, ymm6 - LONG $0x3725e2c4; BYTE $0xff // vpcmpgtq ymm7, ymm11, ymm7 - LONG $0x4b4de3c4; WORD $0x70d3 // vblendvpd ymm2, ymm6, ymm3, ymm7 - LONG $0x1429fdc5; BYTE $0x24 // vmovapd yword [rsp], ymm2 - LONG $0xfc578dc5 // vxorpd ymm7, ymm14, ymm4 - LONG $0x3745c2c4; BYTE $0xfb // vpcmpgtq ymm7, ymm7, ymm11 - LONG $0x4b4de3c4; WORD $0x70d4 // vblendvpd ymm2, ymm6, ymm4, ymm7 - LONG $0x5429fdc5; WORD $0x2024 // vmovapd yword [rsp + 32], ymm2 - QUAD $0x000140c7b46ffec5; BYTE $0x00 // vmovdqu ymm6, yword [rdi + 8*rax + 320] - LONG $0xfd578dc5 // vxorpd ymm7, ymm14, ymm5 - LONG $0xdeef0dc5 // vpxor ymm11, ymm14, ymm6 - LONG $0x3725e2c4; BYTE $0xff // vpcmpgtq ymm7, ymm11, ymm7 - LONG $0x4b4de3c4; WORD $0x70ed // vblendvpd ymm5, ymm6, ymm5, ymm7 - LONG $0x5735c1c4; BYTE $0xfe // vxorpd ymm7, ymm9, ymm14 - LONG $0x3745c2c4; BYTE $0xfb // vpcmpgtq ymm7, ymm7, ymm11 - LONG $0x4b4dc3c4; WORD $0x70f9 // vblendvpd ymm7, ymm6, ymm9, ymm7 - QUAD $0x000160c7b46ffec5; BYTE $0x00 // vmovdqu ymm6, yword [rdi + 8*rax + 352] - LONG $0xc8570dc5 // vxorpd ymm9, ymm14, ymm0 - LONG $0xdeef0dc5 // vpxor ymm11, ymm14, ymm6 - LONG $0x372542c4; BYTE $0xc9 // vpcmpgtq ymm9, ymm11, ymm9 - LONG $0x4b4d63c4; WORD $0x90c8 // vblendvpd ymm9, ymm6, ymm0, ymm9 - LONG $0x572dc1c4; BYTE $0xc6 // vxorpd ymm0, ymm10, ymm14 - LONG $0x377dc2c4; BYTE $0xc3 // vpcmpgtq ymm0, ymm0, ymm11 - LONG $0x4b4d43c4; WORD $0x00d2 // vblendvpd ymm10, ymm6, ymm10, ymm0 - QUAD $0x000180c7b46ffec5; BYTE $0x00 // vmovdqu ymm6, yword [rdi + 8*rax + 384] - LONG $0xc1578dc5 // vxorpd ymm0, ymm14, ymm1 - LONG $0xdeef0dc5 // vpxor ymm11, ymm14, ymm6 - LONG $0x3725e2c4; BYTE $0xc0 // vpcmpgtq ymm0, ymm11, ymm0 - LONG $0x4b4de3c4; WORD $0x00e1 // vblendvpd ymm4, ymm6, ymm1, ymm0 - LONG $0x571dc1c4; BYTE $0xce // vxorpd ymm1, ymm12, ymm14 - LONG $0x3775c2c4; BYTE $0xcb // vpcmpgtq ymm1, ymm1, ymm11 - LONG $0x4b4dc3c4; WORD $0x10dc // vblendvpd ymm3, ymm6, ymm12, ymm1 - QUAD $0x0001a0c79c6f7ec5; BYTE $0x00 // vmovdqu ymm11, yword [rdi + 8*rax + 416] - LONG $0x573dc1c4; BYTE $0xf6 // vxorpd ymm6, ymm8, ymm14 - LONG $0xef2541c4; BYTE $0xe6 // vpxor ymm12, ymm11, ymm14 - LONG $0x371de2c4; BYTE $0xf6 // vpcmpgtq ymm6, ymm12, ymm6 - LONG $0x4b25c3c4; WORD $0x60f0 // vblendvpd ymm6, ymm11, ymm8, ymm6 - LONG $0x571541c4; BYTE $0xc6 // vxorpd ymm8, ymm13, ymm14 - LONG $0x373d42c4; BYTE $0xc4 // vpcmpgtq ymm8, ymm8, ymm12 - LONG $0x4b2543c4; WORD $0x80e5 // vblendvpd ymm12, ymm11, ymm13, ymm8 - QUAD $0x0001c0c79c6f7ec5; BYTE $0x00 // vmovdqu ymm11, yword [rdi + 8*rax + 448] - QUAD $0x0000a024846ffdc5; BYTE $0x00 // vmovdqa ymm0, yword [rsp + 160] - LONG $0xc0ef0dc5 // vpxor ymm8, ymm14, ymm0 - LONG $0xef2541c4; BYTE $0xee // vpxor ymm13, ymm11, ymm14 - LONG $0x371542c4; BYTE $0xc0 // vpcmpgtq ymm8, ymm13, ymm8 - LONG $0x4b2563c4; WORD $0x80c0 // vblendvpd ymm8, ymm11, ymm0, ymm8 - QUAD $0x0000c024846ffdc5; BYTE $0x00 // vmovdqa ymm0, yword [rsp + 192] - LONG $0xf8ef0dc5 // vpxor ymm15, ymm14, ymm0 - LONG $0x370542c4; BYTE $0xed // vpcmpgtq ymm13, ymm15, ymm13 - LONG $0x4b2563c4; WORD $0xd0e8 // vblendvpd ymm13, ymm11, ymm0, ymm13 - QUAD $0x0001e0c79c6f7ec5; BYTE $0x00 // vmovdqu ymm11, yword [rdi + 8*rax + 480] - QUAD $0x000080248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 128] - LONG $0xf9ef0dc5 // vpxor ymm15, ymm14, ymm1 - LONG $0xef25c1c4; BYTE $0xc6 // vpxor ymm0, ymm11, ymm14 - LONG $0x377d42c4; BYTE $0xff // vpcmpgtq ymm15, ymm0, ymm15 - LONG $0x4b25e3c4; WORD $0xf0c9 // vblendvpd ymm1, ymm11, ymm1, ymm15 - QUAD $0x0000e024946ffdc5; BYTE $0x00 // vmovdqa ymm2, yword [rsp + 224] - LONG $0xfaef0dc5 // vpxor ymm15, ymm14, ymm2 - LONG $0x3705e2c4; BYTE $0xc0 // vpcmpgtq ymm0, ymm15, ymm0 - LONG $0x4b2563c4; WORD $0x00fa // vblendvpd ymm15, ymm11, ymm2, ymm0 - QUAD $0x0002e0c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 736] - LONG $0xd9570dc5 // vxorpd ymm11, ymm14, ymm1 - LONG $0xd0ef8dc5 // vpxor ymm2, ymm14, ymm0 - LONG $0x376d42c4; BYTE $0xdb // vpcmpgtq ymm11, ymm2, ymm11 - LONG $0x4b7de3c4; WORD $0xb0c9 // vblendvpd ymm1, ymm0, ymm1, ymm11 - QUAD $0x000080248c29fdc5; BYTE $0x00 // vmovapd yword [rsp + 128], ymm1 - LONG $0x5705c1c4; BYTE $0xce // vxorpd ymm1, ymm15, ymm14 - LONG $0x3775e2c4; BYTE $0xca // vpcmpgtq ymm1, ymm1, ymm2 - LONG $0x4b7dc3c4; WORD $0x10c7 // vblendvpd ymm0, ymm0, ymm15, ymm1 - QUAD $0x0000e0248429fdc5; BYTE $0x00 // vmovapd yword [rsp + 224], ymm0 - QUAD $0x0002c0c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 704] - LONG $0x573dc1c4; BYTE $0xce // vxorpd ymm1, ymm8, ymm14 - LONG $0xd0ef8dc5 // vpxor ymm2, ymm14, ymm0 - LONG $0x376de2c4; BYTE $0xc9 // vpcmpgtq ymm1, ymm2, ymm1 - LONG $0x4b7dc3c4; WORD $0x10c8 // vblendvpd ymm1, ymm0, ymm8, ymm1 - QUAD $0x0000a0248c29fdc5; BYTE $0x00 // vmovapd yword [rsp + 160], ymm1 - LONG $0x5715c1c4; BYTE $0xce // vxorpd ymm1, ymm13, ymm14 - LONG $0x3775e2c4; BYTE $0xca // vpcmpgtq ymm1, ymm1, ymm2 - LONG $0x4b7dc3c4; WORD $0x10c5 // vblendvpd ymm0, ymm0, ymm13, ymm1 - QUAD $0x0000c0248429fdc5; BYTE $0x00 // vmovapd yword [rsp + 192], ymm0 - QUAD $0x0002a0c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 672] - LONG $0xce578dc5 // vxorpd ymm1, ymm14, ymm6 - LONG $0xd0ef8dc5 // vpxor ymm2, ymm14, ymm0 - LONG $0x376de2c4; BYTE $0xc9 // vpcmpgtq ymm1, ymm2, ymm1 - LONG $0x4b7d63c4; WORD $0x10fe // vblendvpd ymm15, ymm0, ymm6, ymm1 - LONG $0x571dc1c4; BYTE $0xce // vxorpd ymm1, ymm12, ymm14 - LONG $0x3775e2c4; BYTE $0xca // vpcmpgtq ymm1, ymm1, ymm2 - LONG $0x4b7d43c4; WORD $0x10ec // vblendvpd ymm13, ymm0, ymm12, ymm1 - QUAD $0x000280c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 640] - LONG $0xcc578dc5 // vxorpd ymm1, ymm14, ymm4 - LONG $0xd0ef8dc5 // vpxor ymm2, ymm14, ymm0 - LONG $0x376de2c4; BYTE $0xc9 // vpcmpgtq ymm1, ymm2, ymm1 - LONG $0x4b7d63c4; WORD $0x10e4 // vblendvpd ymm12, ymm0, ymm4, ymm1 - LONG $0xcb578dc5 // vxorpd ymm1, ymm14, ymm3 - LONG $0x3775e2c4; BYTE $0xca // vpcmpgtq ymm1, ymm1, ymm2 - LONG $0x4b7d63c4; WORD $0x10c3 // vblendvpd ymm8, ymm0, ymm3, ymm1 - QUAD $0x000260c7946ffec5; BYTE $0x00 // vmovdqu ymm2, yword [rdi + 8*rax + 608] - LONG $0x5735c1c4; BYTE $0xce // vxorpd ymm1, ymm9, ymm14 - LONG $0xdaef8dc5 // vpxor ymm3, ymm14, ymm2 - LONG $0x3765e2c4; BYTE $0xc9 // vpcmpgtq ymm1, ymm3, ymm1 - LONG $0x4b6dc3c4; WORD $0x10c9 // vblendvpd ymm1, ymm2, ymm9, ymm1 - LONG $0x572dc1c4; BYTE $0xe6 // vxorpd ymm4, ymm10, ymm14 - LONG $0x375de2c4; BYTE $0xdb // vpcmpgtq ymm3, ymm4, ymm3 - LONG $0x4b6d43c4; WORD $0x30d2 // vblendvpd ymm10, ymm2, ymm10, ymm3 - QUAD $0x000240c7946ffec5; BYTE $0x00 // vmovdqu ymm2, yword [rdi + 8*rax + 576] - LONG $0xdd578dc5 // vxorpd ymm3, ymm14, ymm5 - LONG $0xe2ef8dc5 // vpxor ymm4, ymm14, ymm2 - LONG $0x375de2c4; BYTE $0xdb // vpcmpgtq ymm3, ymm4, ymm3 - LONG $0x4b6de3c4; WORD $0x30ed // vblendvpd ymm5, ymm2, ymm5, ymm3 - LONG $0xdf578dc5 // vxorpd ymm3, ymm14, ymm7 - LONG $0x3765e2c4; BYTE $0xdc // vpcmpgtq ymm3, ymm3, ymm4 - LONG $0x4b6d63c4; WORD $0x30cf // vblendvpd ymm9, ymm2, ymm7, ymm3 - QUAD $0x000200c7946ffec5; BYTE $0x00 // vmovdqu ymm2, yword [rdi + 8*rax + 512] - LONG $0x046ffdc5; BYTE $0x24 // vmovdqa ymm0, yword [rsp] - LONG $0xd8ef8dc5 // vpxor ymm3, ymm14, ymm0 - LONG $0xe2ef8dc5 // vpxor ymm4, ymm14, ymm2 - LONG $0x375de2c4; BYTE $0xdb // vpcmpgtq ymm3, ymm4, ymm3 - LONG $0x4b6de3c4; WORD $0x30d8 // vblendvpd ymm3, ymm2, ymm0, ymm3 - LONG $0x446ffdc5; WORD $0x2024 // vmovdqa ymm0, yword [rsp + 32] - LONG $0xf0ef8dc5 // vpxor ymm6, ymm14, ymm0 - LONG $0x374de2c4; BYTE $0xe4 // vpcmpgtq ymm4, ymm6, ymm4 - LONG $0x4b6de3c4; WORD $0x40e0 // vblendvpd ymm4, ymm2, ymm0, ymm4 - QUAD $0x000220c7946ffec5; BYTE $0x00 // vmovdqu ymm2, yword [rdi + 8*rax + 544] - LONG $0x446ffdc5; WORD $0x6024 // vmovdqa ymm0, yword [rsp + 96] - LONG $0xf0ef8dc5 // vpxor ymm6, ymm14, ymm0 - LONG $0xfaef8dc5 // vpxor ymm7, ymm14, ymm2 - LONG $0x3745e2c4; BYTE $0xf6 // vpcmpgtq ymm6, ymm7, ymm6 - LONG $0x4b6de3c4; WORD $0x60f0 // vblendvpd ymm6, ymm2, ymm0, ymm6 - LONG $0x446ffdc5; WORD $0x4024 // vmovdqa ymm0, yword [rsp + 64] - LONG $0xd8ef0dc5 // vpxor ymm11, ymm14, ymm0 - LONG $0x3725e2c4; BYTE $0xff // vpcmpgtq ymm7, ymm11, ymm7 - LONG $0x4b6de3c4; WORD $0x70d0 // vblendvpd ymm2, ymm2, ymm0, ymm7 - QUAD $0x000320c7bc6ffec5; BYTE $0x00 // vmovdqu ymm7, yword [rdi + 8*rax + 800] - LONG $0xde570dc5 // vxorpd ymm11, ymm14, ymm6 - LONG $0xc7ef8dc5 // vpxor ymm0, ymm14, ymm7 - LONG $0x377d42c4; BYTE $0xdb // vpcmpgtq ymm11, ymm0, ymm11 - LONG $0x4b45e3c4; WORD $0xb0f6 // vblendvpd ymm6, ymm7, ymm6, ymm11 - LONG $0x7429fdc5; WORD $0x6024 // vmovapd yword [rsp + 96], ymm6 - LONG $0xf2578dc5 // vxorpd ymm6, ymm14, ymm2 - LONG $0x374de2c4; BYTE $0xc0 // vpcmpgtq ymm0, ymm6, ymm0 - LONG $0x4b45e3c4; WORD $0x00c2 // vblendvpd ymm0, ymm7, ymm2, ymm0 - LONG $0x4429fdc5; WORD $0x2024 // vmovapd yword [rsp + 32], ymm0 - QUAD $0x000300c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 768] - LONG $0xd3578dc5 // vxorpd ymm2, ymm14, ymm3 - LONG $0xf8ef8dc5 // vpxor ymm7, ymm14, ymm0 - LONG $0x3745e2c4; BYTE $0xd2 // vpcmpgtq ymm2, ymm7, ymm2 - LONG $0x4b7de3c4; WORD $0x20d3 // vblendvpd ymm2, ymm0, ymm3, ymm2 - LONG $0x5429fdc5; WORD $0x4024 // vmovapd yword [rsp + 64], ymm2 - LONG $0xd4578dc5 // vxorpd ymm2, ymm14, ymm4 - LONG $0x376de2c4; BYTE $0xd7 // vpcmpgtq ymm2, ymm2, ymm7 - LONG $0x4b7de3c4; WORD $0x20e4 // vblendvpd ymm4, ymm0, ymm4, ymm2 - QUAD $0x000340c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 832] - LONG $0xd5578dc5 // vxorpd ymm2, ymm14, ymm5 - LONG $0xd8ef8dc5 // vpxor ymm3, ymm14, ymm0 - LONG $0x3765e2c4; BYTE $0xd2 // vpcmpgtq ymm2, ymm3, ymm2 - LONG $0x4b7de3c4; WORD $0x20ed // vblendvpd ymm5, ymm0, ymm5, ymm2 - LONG $0x5735c1c4; BYTE $0xd6 // vxorpd ymm2, ymm9, ymm14 - LONG $0x376de2c4; BYTE $0xd3 // vpcmpgtq ymm2, ymm2, ymm3 - LONG $0x4b7dc3c4; WORD $0x20c1 // vblendvpd ymm0, ymm0, ymm9, ymm2 - LONG $0x0429fdc5; BYTE $0x24 // vmovapd yword [rsp], ymm0 - QUAD $0x000360c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 864] - LONG $0xd1578dc5 // vxorpd ymm2, ymm14, ymm1 - LONG $0xd8ef8dc5 // vpxor ymm3, ymm14, ymm0 - LONG $0x3765e2c4; BYTE $0xd2 // vpcmpgtq ymm2, ymm3, ymm2 - LONG $0x4b7de3c4; WORD $0x20f9 // vblendvpd ymm7, ymm0, ymm1, ymm2 - LONG $0x572dc1c4; BYTE $0xce // vxorpd ymm1, ymm10, ymm14 - LONG $0x3775e2c4; BYTE $0xcb // vpcmpgtq ymm1, ymm1, ymm3 - LONG $0x4b7dc3c4; WORD $0x10da // vblendvpd ymm3, ymm0, ymm10, ymm1 - QUAD $0x000380c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 896] - LONG $0x571dc1c4; BYTE $0xce // vxorpd ymm1, ymm12, ymm14 - LONG $0xd0ef8dc5 // vpxor ymm2, ymm14, ymm0 - LONG $0x376de2c4; BYTE $0xc9 // vpcmpgtq ymm1, ymm2, ymm1 - LONG $0x4b7d43c4; WORD $0x10e4 // vblendvpd ymm12, ymm0, ymm12, ymm1 - LONG $0x573dc1c4; BYTE $0xce // vxorpd ymm1, ymm8, ymm14 - LONG $0x3775e2c4; BYTE $0xca // vpcmpgtq ymm1, ymm1, ymm2 - LONG $0x4b7d43c4; WORD $0x10c8 // vblendvpd ymm9, ymm0, ymm8, ymm1 - QUAD $0x0003a0c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 928] - LONG $0x5705c1c4; BYTE $0xce // vxorpd ymm1, ymm15, ymm14 - LONG $0xd0ef8dc5 // vpxor ymm2, ymm14, ymm0 - LONG $0x376de2c4; BYTE $0xc9 // vpcmpgtq ymm1, ymm2, ymm1 - LONG $0x4b7d43c4; WORD $0x10d7 // vblendvpd ymm10, ymm0, ymm15, ymm1 - LONG $0x5715c1c4; BYTE $0xce // vxorpd ymm1, ymm13, ymm14 - LONG $0x3775e2c4; BYTE $0xca // vpcmpgtq ymm1, ymm1, ymm2 - LONG $0x4b7d43c4; WORD $0x10c5 // vblendvpd ymm8, ymm0, ymm13, ymm1 - QUAD $0x0003c0c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 960] - QUAD $0x0000a024b46ffdc5; BYTE $0x00 // vmovdqa ymm6, yword [rsp + 160] - LONG $0xceef8dc5 // vpxor ymm1, ymm14, ymm6 - LONG $0xd0ef8dc5 // vpxor ymm2, ymm14, ymm0 - LONG $0x376de2c4; BYTE $0xc9 // vpcmpgtq ymm1, ymm2, ymm1 - LONG $0x4b7d63c4; WORD $0x10de // vblendvpd ymm11, ymm0, ymm6, ymm1 - QUAD $0x0000c024b46ffdc5; BYTE $0x00 // vmovdqa ymm6, yword [rsp + 192] - LONG $0xceef8dc5 // vpxor ymm1, ymm14, ymm6 - LONG $0x3775e2c4; BYTE $0xca // vpcmpgtq ymm1, ymm1, ymm2 - LONG $0x4b7d63c4; WORD $0x10fe // vblendvpd ymm15, ymm0, ymm6, ymm1 - QUAD $0x0003e0c7846ffec5; BYTE $0x00 // vmovdqu ymm0, yword [rdi + 8*rax + 992] - QUAD $0x00008024b46ffdc5; BYTE $0x00 // vmovdqa ymm6, yword [rsp + 128] - LONG $0xceef8dc5 // vpxor ymm1, ymm14, ymm6 - LONG $0xd0ef8dc5 // vpxor ymm2, ymm14, ymm0 - LONG $0x376de2c4; BYTE $0xc9 // vpcmpgtq ymm1, ymm2, ymm1 - LONG $0x4b7d63c4; WORD $0x10ee // vblendvpd ymm13, ymm0, ymm6, ymm1 - QUAD $0x0000e024b46ffdc5; BYTE $0x00 // vmovdqa ymm6, yword [rsp + 224] - LONG $0xceef8dc5 // vpxor ymm1, ymm14, ymm6 - LONG $0x3775e2c4; BYTE $0xca // vpcmpgtq ymm1, ymm1, ymm2 - LONG $0x4b7de3c4; WORD $0x10c6 // vblendvpd ymm0, ymm0, ymm6, ymm1 - LONG $0x80e88348 // sub rax, -128 - LONG $0x04c28349 // add r10, 4 - JNE LBB3_9 - -LBB3_10: - QUAD $0x0000802494297cc5; BYTE $0x00 // vmovaps yword [rsp + 128], ymm10 - WORD $0x854d; BYTE $0xc9 // test r9, r9 - LONG $0x6f7d41c4; BYTE $0xd4 // vmovdqa ymm10, ymm12 - LONG $0xe36f7dc5 // vmovdqa ymm12, ymm3 - JE LBB3_13 - LONG $0xc7048d48 // lea rax, [rdi + 8*rax] - WORD $0xf749; BYTE $0xd9 // neg r9 - LONG $0x597d62c4; WORD $0x0075 // vpbroadcastq ymm14, qword 0[rbp] /* [rip + .LCPI3_0] */ - -LBB3_12: - LONG $0x486ffec5; BYTE $0x20 // vmovdqu ymm1, yword [rax + 32] - LONG $0xf76ffdc5 // vmovdqa ymm6, ymm7 - LONG $0xfd6ffdc5 // vmovdqa ymm7, ymm5 - LONG $0xec6ffdc5 // vmovdqa ymm5, ymm4 - LONG $0x646ffdc5; WORD $0x6024 // vmovdqa ymm4, yword [rsp + 96] - LONG $0xd4ef8dc5 // vpxor ymm2, ymm14, ymm4 - LONG $0xd9ef8dc5 // vpxor ymm3, ymm14, ymm1 - LONG $0x3765e2c4; BYTE $0xd2 // vpcmpgtq ymm2, ymm3, ymm2 - LONG $0x4b75e3c4; WORD $0x20e4 // vblendvpd ymm4, ymm1, ymm4, ymm2 - LONG $0x6429fdc5; WORD $0x6024 // vmovapd yword [rsp + 96], ymm4 - LONG $0x646ffdc5; WORD $0x2024 // vmovdqa ymm4, yword [rsp + 32] - LONG $0xd4ef8dc5 // vpxor ymm2, ymm14, ymm4 - LONG $0x376de2c4; BYTE $0xd3 // vpcmpgtq ymm2, ymm2, ymm3 - LONG $0x4b75e3c4; WORD $0x20e4 // vblendvpd ymm4, ymm1, ymm4, ymm2 - LONG $0x6429fdc5; WORD $0x2024 // vmovapd yword [rsp + 32], ymm4 - LONG $0x086ffec5 // vmovdqu ymm1, yword [rax] - LONG $0x646ffdc5; WORD $0x4024 // vmovdqa ymm4, yword [rsp + 64] - LONG $0xd4ef8dc5 // vpxor ymm2, ymm14, ymm4 - LONG $0xd9ef8dc5 // vpxor ymm3, ymm14, ymm1 - LONG $0x3765e2c4; BYTE $0xd2 // vpcmpgtq ymm2, ymm3, ymm2 - LONG $0x4b75e3c4; WORD $0x20e4 // vblendvpd ymm4, ymm1, ymm4, ymm2 - LONG $0x6429fdc5; WORD $0x4024 // vmovapd yword [rsp + 64], ymm4 - LONG $0xe56ffdc5 // vmovdqa ymm4, ymm5 - LONG $0xef6ffdc5 // vmovdqa ymm5, ymm7 - LONG $0xfe6ffdc5 // vmovdqa ymm7, ymm6 - LONG $0xd4ef8dc5 // vpxor ymm2, ymm14, ymm4 - LONG $0x376de2c4; BYTE $0xd3 // vpcmpgtq ymm2, ymm2, ymm3 - LONG $0x586ffec5; BYTE $0x40 // vmovdqu ymm3, yword [rax + 64] - LONG $0x4b75e3c4; WORD $0x20e4 // vblendvpd ymm4, ymm1, ymm4, ymm2 - LONG $0xcbef8dc5 // vpxor ymm1, ymm14, ymm3 - LONG $0xd5ef8dc5 // vpxor ymm2, ymm14, ymm5 - LONG $0x3775e2c4; BYTE $0xd2 // vpcmpgtq ymm2, ymm1, ymm2 - LONG $0x4b65e3c4; WORD $0x20ed // vblendvpd ymm5, ymm3, ymm5, ymm2 - LONG $0x346ffdc5; BYTE $0x24 // vmovdqa ymm6, yword [rsp] - LONG $0xd6ef8dc5 // vpxor ymm2, ymm14, ymm6 - LONG $0x376de2c4; BYTE $0xc9 // vpcmpgtq ymm1, ymm2, ymm1 - LONG $0x4b65e3c4; WORD $0x10f6 // vblendvpd ymm6, ymm3, ymm6, ymm1 - LONG $0x3429fdc5; BYTE $0x24 // vmovapd yword [rsp], ymm6 - LONG $0x486ffec5; BYTE $0x60 // vmovdqu ymm1, yword [rax + 96] - LONG $0xd1ef8dc5 // vpxor ymm2, ymm14, ymm1 - LONG $0xdfef8dc5 // vpxor ymm3, ymm14, ymm7 - LONG $0x376de2c4; BYTE $0xdb // vpcmpgtq ymm3, ymm2, ymm3 - LONG $0x4b75e3c4; WORD $0x30ff // vblendvpd ymm7, ymm1, ymm7, ymm3 - LONG $0xef1dc1c4; BYTE $0xde // vpxor ymm3, ymm12, ymm14 - LONG $0x3765e2c4; BYTE $0xd2 // vpcmpgtq ymm2, ymm3, ymm2 - QUAD $0x00000080986ffec5 // vmovdqu ymm3, yword [rax + 128] - LONG $0x4b7543c4; WORD $0x20e4 // vblendvpd ymm12, ymm1, ymm12, ymm2 - LONG $0xcbef8dc5 // vpxor ymm1, ymm14, ymm3 - LONG $0xef2dc1c4; BYTE $0xd6 // vpxor ymm2, ymm10, ymm14 - LONG $0x3775e2c4; BYTE $0xd2 // vpcmpgtq ymm2, ymm1, ymm2 - LONG $0x4b6543c4; WORD $0x20d2 // vblendvpd ymm10, ymm3, ymm10, ymm2 - LONG $0xef35c1c4; BYTE $0xd6 // vpxor ymm2, ymm9, ymm14 - LONG $0x376de2c4; BYTE $0xc9 // vpcmpgtq ymm1, ymm2, ymm1 - LONG $0x4b6543c4; WORD $0x10c9 // vblendvpd ymm9, ymm3, ymm9, ymm1 - QUAD $0x000000a0886ffec5 // vmovdqu ymm1, yword [rax + 160] - LONG $0xd1ef8dc5 // vpxor ymm2, ymm14, ymm1 - QUAD $0x00008024b46ffdc5; BYTE $0x00 // vmovdqa ymm6, yword [rsp + 128] - LONG $0xdeef8dc5 // vpxor ymm3, ymm14, ymm6 - LONG $0x376de2c4; BYTE $0xdb // vpcmpgtq ymm3, ymm2, ymm3 - LONG $0x4b75e3c4; WORD $0x30f6 // vblendvpd ymm6, ymm1, ymm6, ymm3 - QUAD $0x00008024b429fdc5; BYTE $0x00 // vmovapd yword [rsp + 128], ymm6 - LONG $0xef3dc1c4; BYTE $0xde // vpxor ymm3, ymm8, ymm14 - LONG $0x3765e2c4; BYTE $0xd2 // vpcmpgtq ymm2, ymm3, ymm2 - QUAD $0x000000c0986ffec5 // vmovdqu ymm3, yword [rax + 192] - LONG $0x4b7543c4; WORD $0x20c0 // vblendvpd ymm8, ymm1, ymm8, ymm2 - LONG $0xcbef8dc5 // vpxor ymm1, ymm14, ymm3 - LONG $0xef25c1c4; BYTE $0xd6 // vpxor ymm2, ymm11, ymm14 - LONG $0x3775e2c4; BYTE $0xd2 // vpcmpgtq ymm2, ymm1, ymm2 - LONG $0x4b6543c4; WORD $0x20db // vblendvpd ymm11, ymm3, ymm11, ymm2 - LONG $0xef05c1c4; BYTE $0xd6 // vpxor ymm2, ymm15, ymm14 - LONG $0x376de2c4; BYTE $0xc9 // vpcmpgtq ymm1, ymm2, ymm1 - LONG $0x4b6543c4; WORD $0x10ff // vblendvpd ymm15, ymm3, ymm15, ymm1 - QUAD $0x000000e0886ffec5 // vmovdqu ymm1, yword [rax + 224] - LONG $0xd1ef8dc5 // vpxor ymm2, ymm14, ymm1 - LONG $0xef15c1c4; BYTE $0xde // vpxor ymm3, ymm13, ymm14 - LONG $0x376de2c4; BYTE $0xdb // vpcmpgtq ymm3, ymm2, ymm3 - LONG $0x4b7543c4; WORD $0x30ed // vblendvpd ymm13, ymm1, ymm13, ymm3 - LONG $0xd8ef8dc5 // vpxor ymm3, ymm14, ymm0 - LONG $0x3765e2c4; BYTE $0xd2 // vpcmpgtq ymm2, ymm3, ymm2 - LONG $0x4b75e3c4; WORD $0x20c0 // vblendvpd ymm0, ymm1, ymm0, ymm2 - LONG $0x01000548; WORD $0x0000 // add rax, 256 - WORD $0xff49; BYTE $0xc1 // inc r9 - JNE LBB3_12 - -LBB3_13: - LONG $0x597d62c4; WORD $0x0075 // vpbroadcastq ymm14, qword 0[rbp] /* [rip + .LCPI3_0] */ - LONG $0x1c6ffdc5; BYTE $0x24 // vmovdqa ymm3, yword [rsp] - LONG $0xcbef8dc5 // vpxor ymm1, ymm14, ymm3 - LONG $0xef05c1c4; BYTE $0xd6 // vpxor ymm2, ymm15, ymm14 - LONG $0x3775e2c4; BYTE $0xca // vpcmpgtq ymm1, ymm1, ymm2 - LONG $0x4b05e3c4; WORD $0x10cb // vblendvpd ymm1, ymm15, ymm3, ymm1 - LONG $0xd4ef8dc5 // vpxor ymm2, ymm14, ymm4 - LONG $0xef35c1c4; BYTE $0xde // vpxor ymm3, ymm9, ymm14 - LONG $0x376de2c4; BYTE $0xd3 // vpcmpgtq ymm2, ymm2, ymm3 - LONG $0x4b35e3c4; WORD $0x20d4 // vblendvpd ymm2, ymm9, ymm4, ymm2 - LONG $0xef1dc1c4; BYTE $0xde // vpxor ymm3, ymm12, ymm14 - LONG $0xc8ef0dc5 // vpxor ymm9, ymm14, ymm0 - LONG $0x3765c2c4; BYTE $0xd9 // vpcmpgtq ymm3, ymm3, ymm9 - LONG $0x4b7dc3c4; WORD $0x30c4 // vblendvpd ymm0, ymm0, ymm12, ymm3 - LONG $0x646ffdc5; WORD $0x2024 // vmovdqa ymm4, yword [rsp + 32] - LONG $0xdcef8dc5 // vpxor ymm3, ymm14, ymm4 - LONG $0xef3d41c4; BYTE $0xce // vpxor ymm9, ymm8, ymm14 - LONG $0x3765c2c4; BYTE $0xd9 // vpcmpgtq ymm3, ymm3, ymm9 - LONG $0x4b3de3c4; WORD $0x30dc // vblendvpd ymm3, ymm8, ymm4, ymm3 - LONG $0xf3578dc5 // vxorpd ymm6, ymm14, ymm3 - LONG $0xc8570dc5 // vxorpd ymm9, ymm14, ymm0 - LONG $0x374dc2c4; BYTE $0xf1 // vpcmpgtq ymm6, ymm6, ymm9 - LONG $0x4b7de3c4; WORD $0x60c3 // vblendvpd ymm0, ymm0, ymm3, ymm6 - LONG $0xda578dc5 // vxorpd ymm3, ymm14, ymm2 - LONG $0xf1578dc5 // vxorpd ymm6, ymm14, ymm1 - LONG $0x3765e2c4; BYTE $0xde // vpcmpgtq ymm3, ymm3, ymm6 - LONG $0x4b75e3c4; WORD $0x30ca // vblendvpd ymm1, ymm1, ymm2, ymm3 - LONG $0xd1578dc5 // vxorpd ymm2, ymm14, ymm1 - LONG $0xd8578dc5 // vxorpd ymm3, ymm14, ymm0 - LONG $0x376de2c4; BYTE $0xd3 // vpcmpgtq ymm2, ymm2, ymm3 - LONG $0x4b7de3c4; WORD $0x20c1 // vblendvpd ymm0, ymm0, ymm1, ymm2 - LONG $0x197de3c4; WORD $0x01c1 // vextractf128 xmm1, ymm0, 1 - LONG $0xd15789c5 // vxorpd xmm2, xmm14, xmm1 - LONG $0xd85789c5 // vxorpd xmm3, xmm14, xmm0 - LONG $0x3761e2c4; BYTE $0xd2 // vpcmpgtq xmm2, xmm3, xmm2 - LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd xmm0, xmm1, xmm0, xmm2 - LONG $0x0479e3c4; WORD $0x4ec8 // vpermilps xmm1, xmm0, 78 - LONG $0xd05789c5 // vxorpd xmm2, xmm14, xmm0 - LONG $0xd95789c5 // vxorpd xmm3, xmm14, xmm1 - LONG $0x3769e2c4; BYTE $0xd3 // vpcmpgtq xmm2, xmm2, xmm3 - LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd xmm0, xmm1, xmm0, xmm2 - LONG $0xcdef8dc5 // vpxor ymm1, ymm14, ymm5 - LONG $0xef25c1c4; BYTE $0xd6 // vpxor ymm2, ymm11, ymm14 - LONG $0x376de2c4; BYTE $0xc9 // vpcmpgtq ymm1, ymm2, ymm1 - LONG $0x4b25e3c4; WORD $0x10cd // vblendvpd ymm1, ymm11, ymm5, ymm1 - LONG $0x646ffdc5; WORD $0x4024 // vmovdqa ymm4, yword [rsp + 64] - LONG $0xd4ef8dc5 // vpxor ymm2, ymm14, ymm4 - LONG $0xef2dc1c4; BYTE $0xde // vpxor ymm3, ymm10, ymm14 - LONG $0x3765e2c4; BYTE $0xd2 // vpcmpgtq ymm2, ymm3, ymm2 - LONG $0x4b2de3c4; WORD $0x20d4 // vblendvpd ymm2, ymm10, ymm4, ymm2 - LONG $0xdfef8dc5 // vpxor ymm3, ymm14, ymm7 - LONG $0xef15c1c4; BYTE $0xee // vpxor ymm5, ymm13, ymm14 - LONG $0x3755e2c4; BYTE $0xdb // vpcmpgtq ymm3, ymm5, ymm3 - LONG $0x4b15e3c4; WORD $0x30df // vblendvpd ymm3, ymm13, ymm7, ymm3 - LONG $0x746ffdc5; WORD $0x6024 // vmovdqa ymm6, yword [rsp + 96] - LONG $0xe6ef8dc5 // vpxor ymm4, ymm14, ymm6 - QUAD $0x00008024bc6ffdc5; BYTE $0x00 // vmovdqa ymm7, yword [rsp + 128] - LONG $0xefef8dc5 // vpxor ymm5, ymm14, ymm7 - LONG $0x3755e2c4; BYTE $0xe4 // vpcmpgtq ymm4, ymm5, ymm4 - LONG $0x4b45e3c4; WORD $0x40e6 // vblendvpd ymm4, ymm7, ymm6, ymm4 - LONG $0xec578dc5 // vxorpd ymm5, ymm14, ymm4 - LONG $0xf3578dc5 // vxorpd ymm6, ymm14, ymm3 - LONG $0x374de2c4; BYTE $0xed // vpcmpgtq ymm5, ymm6, ymm5 - LONG $0x4b65e3c4; WORD $0x50dc // vblendvpd ymm3, ymm3, ymm4, ymm5 - LONG $0xe2578dc5 // vxorpd ymm4, ymm14, ymm2 - LONG $0xe9578dc5 // vxorpd ymm5, ymm14, ymm1 - LONG $0x3755e2c4; BYTE $0xe4 // vpcmpgtq ymm4, ymm5, ymm4 - LONG $0x4b75e3c4; WORD $0x40ca // vblendvpd ymm1, ymm1, ymm2, ymm4 - LONG $0xd1578dc5 // vxorpd ymm2, ymm14, ymm1 - LONG $0xe3578dc5 // vxorpd ymm4, ymm14, ymm3 - LONG $0x375de2c4; BYTE $0xd2 // vpcmpgtq ymm2, ymm4, ymm2 - LONG $0x4b65e3c4; WORD $0x20c9 // vblendvpd ymm1, ymm3, ymm1, ymm2 - LONG $0x197de3c4; WORD $0x01ca // vextractf128 xmm2, ymm1, 1 - LONG $0xd95789c5 // vxorpd xmm3, xmm14, xmm1 - LONG $0xe25789c5 // vxorpd xmm4, xmm14, xmm2 - LONG $0x3759e2c4; BYTE $0xdb // vpcmpgtq xmm3, xmm4, xmm3 - LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd xmm1, xmm2, xmm1, xmm3 - LONG $0x0479e3c4; WORD $0x4ed1 // vpermilps xmm2, xmm1, 78 - LONG $0xd95789c5 // vxorpd xmm3, xmm14, xmm1 - LONG $0xe25789c5 // vxorpd xmm4, xmm14, xmm2 - LONG $0x3759e2c4; BYTE $0xdb // vpcmpgtq xmm3, xmm4, xmm3 - LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd xmm1, xmm2, xmm1, xmm3 - LONG $0x7ef9e1c4; BYTE $0xc6 // vmovq rsi, xmm0 - LONG $0x7ef9c1c4; BYTE $0xc9 // vmovq r9, xmm1 - WORD $0x394d; BYTE $0xc3 // cmp r11, r8 - JE LBB3_14 - -LBB3_4: - WORD $0x8948; BYTE $0xf0 // mov rax, rsi + LONG $0xffefc1c5 // vpxor xmm7, xmm7, xmm7 + LONG $0xf6efc9c5 // vpxor xmm6, xmm6, xmm6 LBB3_5: - LONG $0xdf348b4a // mov rsi, qword [rdi + 8*r11] - WORD $0x3949; BYTE $0xf1 // cmp r9, rsi - LONG $0xce430f4c // cmovae r9, rsi - WORD $0x3948; BYTE $0xf0 // cmp rax, rsi - LONG $0xf0470f48 // cmova rsi, rax - LONG $0x01c38349 // add r11, 1 - WORD $0x8948; BYTE $0xf0 // mov rax, rsi - WORD $0x394d; BYTE $0xd8 // cmp r8, r11 + LONG $0x0c6f7ec5; BYTE $0xc7 // vmovdqu ymm9, yword [rdi + 8*rax] + LONG $0xd0ef75c5 // vpxor ymm10, ymm1, ymm0 + LONG $0xd8ef35c5 // vpxor ymm11, ymm9, ymm0 + LONG $0x372542c4; BYTE $0xd2 // vpcmpgtq ymm10, ymm11, ymm10 + LONG $0x4b35e3c4; WORD $0xa0c9 // vblendvpd ymm1, ymm9, ymm1, ymm10 + LONG $0xd0ef55c5 // vpxor ymm10, ymm5, ymm0 + LONG $0x372d42c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm10, ymm11 + LONG $0x4b35e3c4; WORD $0xa0ed // vblendvpd ymm5, ymm9, ymm5, ymm10 + LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu ymm9, yword [rdi + 8*rax + 32] + LONG $0xd0ef5dc5 // vpxor ymm10, ymm4, ymm0 + LONG $0xd8ef35c5 // vpxor ymm11, ymm9, ymm0 + LONG $0x372542c4; BYTE $0xd2 // vpcmpgtq ymm10, ymm11, ymm10 + LONG $0x4b35e3c4; WORD $0xa0e4 // vblendvpd ymm4, ymm9, ymm4, ymm10 + LONG $0xd0ef3dc5 // vpxor ymm10, ymm8, ymm0 + LONG $0x372d42c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm10, ymm11 + LONG $0x5c6f7ec5; WORD $0x40c7 // vmovdqu ymm11, yword [rdi + 8*rax + 64] + LONG $0x4b3543c4; WORD $0xa0c0 // vblendvpd ymm8, ymm9, ymm8, ymm10 + LONG $0xc8ef65c5 // vpxor ymm9, ymm3, ymm0 + LONG $0xd0ef25c5 // vpxor ymm10, ymm11, ymm0 + LONG $0x372d42c4; BYTE $0xc9 // vpcmpgtq ymm9, ymm10, ymm9 + LONG $0x4b25e3c4; WORD $0x90db // vblendvpd ymm3, ymm11, ymm3, ymm9 + LONG $0xc8ef45c5 // vpxor ymm9, ymm7, ymm0 + LONG $0x373542c4; BYTE $0xca // vpcmpgtq ymm9, ymm9, ymm10 + LONG $0x4b25e3c4; WORD $0x90ff // vblendvpd ymm7, ymm11, ymm7, ymm9 + LONG $0x4c6f7ec5; WORD $0x60c7 // vmovdqu ymm9, yword [rdi + 8*rax + 96] + LONG $0xd0ef6dc5 // vpxor ymm10, ymm2, ymm0 + LONG $0xd8ef35c5 // vpxor ymm11, ymm9, ymm0 + LONG $0x372542c4; BYTE $0xd2 // vpcmpgtq ymm10, ymm11, ymm10 + LONG $0x4b35e3c4; WORD $0xa0d2 // vblendvpd ymm2, ymm9, ymm2, ymm10 + LONG $0xd0ef4dc5 // vpxor ymm10, ymm6, ymm0 + LONG $0x372d42c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm10, ymm11 + LONG $0x4b35e3c4; WORD $0xa0f6 // vblendvpd ymm6, ymm9, ymm6, ymm10 + LONG $0x10c08348 // add rax, 16 + WORD $0x3949; BYTE $0xc1 // cmp r9, rax JNE LBB3_5 + LONG $0xc8ef3dc5 // vpxor ymm9, ymm8, ymm0 + LONG $0xd0ef55c5 // vpxor ymm10, ymm5, ymm0 + LONG $0x372d42c4; BYTE $0xc9 // vpcmpgtq ymm9, ymm10, ymm9 + LONG $0x4b3de3c4; WORD $0x90ed // vblendvpd ymm5, ymm8, ymm5, ymm9 + LONG $0xc05755c5 // vxorpd ymm8, ymm5, ymm0 + LONG $0xc8ef45c5 // vpxor ymm9, ymm7, ymm0 + LONG $0x373d42c4; BYTE $0xc1 // vpcmpgtq ymm8, ymm8, ymm9 + LONG $0x4b45e3c4; WORD $0x80ed // vblendvpd ymm5, ymm7, ymm5, ymm8 + LONG $0xf857d5c5 // vxorpd ymm7, ymm5, ymm0 + LONG $0xc0ef4dc5 // vpxor ymm8, ymm6, ymm0 + LONG $0x3745c2c4; BYTE $0xf8 // vpcmpgtq ymm7, ymm7, ymm8 + LONG $0x4b4de3c4; WORD $0x70ed // vblendvpd ymm5, ymm6, ymm5, ymm7 + LONG $0x197de3c4; WORD $0x01ee // vextractf128 xmm6, ymm5, 1 + LONG $0xc05749c5 // vxorpd xmm8, xmm6, xmm0 + LONG $0xf857d1c5 // vxorpd xmm7, xmm5, xmm0 + LONG $0x3741c2c4; BYTE $0xf8 // vpcmpgtq xmm7, xmm7, xmm8 + LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd xmm5, xmm6, xmm5, xmm7 + LONG $0x0479e3c4; WORD $0x4ef5 // vpermilps xmm6, xmm5, 78 + LONG $0xc05751c5 // vxorpd xmm8, xmm5, xmm0 + LONG $0xf857c9c5 // vxorpd xmm7, xmm6, xmm0 + LONG $0x3739e2c4; BYTE $0xff // vpcmpgtq xmm7, xmm8, xmm7 + LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd xmm5, xmm6, xmm5, xmm7 + LONG $0xf0eff5c5 // vpxor ymm6, ymm1, ymm0 + LONG $0xf8efddc5 // vpxor ymm7, ymm4, ymm0 + LONG $0x3745e2c4; BYTE $0xf6 // vpcmpgtq ymm6, ymm7, ymm6 + LONG $0x4b5de3c4; WORD $0x60c9 // vblendvpd ymm1, ymm4, ymm1, ymm6 + LONG $0xe057f5c5 // vxorpd ymm4, ymm1, ymm0 + LONG $0xf0efe5c5 // vpxor ymm6, ymm3, ymm0 + LONG $0x374de2c4; BYTE $0xe4 // vpcmpgtq ymm4, ymm6, ymm4 + LONG $0x4b65e3c4; WORD $0x40c9 // vblendvpd ymm1, ymm3, ymm1, ymm4 + LONG $0x7ef9c1c4; BYTE $0xea // vmovq r10, xmm5 + LONG $0xd857f5c5 // vxorpd ymm3, ymm1, ymm0 + LONG $0xe0efedc5 // vpxor ymm4, ymm2, ymm0 + LONG $0x375de2c4; BYTE $0xdb // vpcmpgtq ymm3, ymm4, ymm3 + LONG $0x4b6de3c4; WORD $0x30c9 // vblendvpd ymm1, ymm2, ymm1, ymm3 + LONG $0x197de3c4; WORD $0x01ca // vextractf128 xmm2, ymm1, 1 + LONG $0xd857f1c5 // vxorpd xmm3, xmm1, xmm0 + LONG $0xe057e9c5 // vxorpd xmm4, xmm2, xmm0 + LONG $0x3759e2c4; BYTE $0xdb // vpcmpgtq xmm3, xmm4, xmm3 + LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd xmm1, xmm2, xmm1, xmm3 + LONG $0x0479e3c4; WORD $0x4ed1 // vpermilps xmm2, xmm1, 78 + LONG $0xd857f1c5 // vxorpd xmm3, xmm1, xmm0 + LONG $0xc057e9c5 // vxorpd xmm0, xmm2, xmm0 + LONG $0x3779e2c4; BYTE $0xc3 // vpcmpgtq xmm0, xmm0, xmm3 + LONG $0x4b69e3c4; WORD $0x00c1 // vblendvpd xmm0, xmm2, xmm1, xmm0 + LONG $0x7ef9e1c4; BYTE $0xc0 // vmovq rax, xmm0 + WORD $0x894c; BYTE $0xd6 // mov rsi, r10 + WORD $0x394d; BYTE $0xc1 // cmp r9, r8 + JE LBB3_8 + +LBB3_7: + LONG $0xcf348b4a // mov rsi, qword [rdi + 8*r9] + WORD $0x3948; BYTE $0xf0 // cmp rax, rsi + LONG $0xc6430f48 // cmovae rax, rsi + WORD $0x3949; BYTE $0xf2 // cmp r10, rsi + LONG $0xf2470f49 // cmova rsi, r10 + LONG $0x01c18349 // add r9, 1 + WORD $0x8949; BYTE $0xf2 // mov r10, rsi + WORD $0x394d; BYTE $0xc8 // cmp r8, r9 + JNE LBB3_7 -LBB3_14: +LBB3_8: WORD $0x8948; BYTE $0x31 // mov qword [rcx], rsi - WORD $0x894c; BYTE $0x0a // mov qword [rdx], r9 - SUBQ $8, SP + WORD $0x8948; BYTE $0x02 // mov qword [rdx], rax VZEROUPPER RET diff --git a/go/parquet/internal/utils/unpack_bool_avx2_amd64.s b/go/parquet/internal/utils/unpack_bool_avx2_amd64.s index 99c2cc88265..459ff78675d 100644 --- a/go/parquet/internal/utils/unpack_bool_avx2_amd64.s +++ b/go/parquet/internal/utils/unpack_bool_avx2_amd64.s @@ -1,6961 +1,88 @@ //+build !noasm !appengine // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT -DATA LCDATA1<>+0x000(SB)/8, $0x0000001900000018 -DATA LCDATA1<>+0x008(SB)/8, $0x0000001b0000001a -DATA LCDATA1<>+0x010(SB)/8, $0x0000001d0000001c -DATA LCDATA1<>+0x018(SB)/8, $0x0000001f0000001e -DATA LCDATA1<>+0x020(SB)/8, $0x0000001100000010 -DATA LCDATA1<>+0x028(SB)/8, $0x0000001300000012 -DATA LCDATA1<>+0x030(SB)/8, $0x0000001500000014 -DATA LCDATA1<>+0x038(SB)/8, $0x0000001700000016 -DATA LCDATA1<>+0x040(SB)/8, $0x0000000900000008 -DATA LCDATA1<>+0x048(SB)/8, $0x0000000b0000000a -DATA LCDATA1<>+0x050(SB)/8, $0x0000000d0000000c -DATA LCDATA1<>+0x058(SB)/8, $0x0000000f0000000e -DATA LCDATA1<>+0x060(SB)/8, $0x0000000100000000 -DATA LCDATA1<>+0x068(SB)/8, $0x0000000300000002 -DATA LCDATA1<>+0x070(SB)/8, $0x0000000500000004 -DATA LCDATA1<>+0x078(SB)/8, $0x0000000700000006 -DATA LCDATA1<>+0x080(SB)/8, $0x0101010101010101 -DATA LCDATA1<>+0x088(SB)/8, $0x0101010101010101 -DATA LCDATA1<>+0x090(SB)/8, $0x0101010101010101 -DATA LCDATA1<>+0x098(SB)/8, $0x0101010101010101 -DATA LCDATA1<>+0x0a0(SB)/8, $0x0000000000000001 -DATA LCDATA1<>+0x0a8(SB)/8, $0x0000000000000002 -DATA LCDATA1<>+0x0b0(SB)/8, $0x0000000000000003 -DATA LCDATA1<>+0x0b8(SB)/8, $0x0000000000000004 -DATA LCDATA1<>+0x0c0(SB)/8, $0x0000000000000005 -DATA LCDATA1<>+0x0c8(SB)/8, $0x0000000000000006 -DATA LCDATA1<>+0x0d0(SB)/8, $0x0000000000000007 -DATA LCDATA1<>+0x0d8(SB)/8, $0x0000000000000020 -GLOBL LCDATA1<>(SB), 8, $224 - -TEXT ·_bytes_to_bools_avx2(SB), $1000-32 +TEXT ·_bytes_to_bools_avx2(SB), $0-32 MOVQ in+0(FP), DI MOVQ len+8(FP), SI MOVQ out+16(FP), DX MOVQ outlen+24(FP), CX - MOVQ SP, BP - ADDQ $32, SP - ANDQ $-32, SP - MOVQ BP, 960(SP) - LEAQ LCDATA1<>(SB), BP WORD $0xf685 // test esi, esi - JLE LBB0_1051 - WORD $0x8941; BYTE $0xc9 // mov r9d, ecx - WORD $0x8949; BYTE $0xd0 // mov r8, rdx - WORD $0x8941; BYTE $0xf2 // mov r10d, esi - WORD $0xfe83; BYTE $0x20 // cmp esi, 32 - JAE LBB0_3 + JLE LBB0_5 + WORD $0x8941; BYTE $0xf0 // mov r8d, esi + LONG $0x03e0c149 // shl r8, 3 + WORD $0x3145; BYTE $0xd2 // xor r10d, r10d + JMP LBB0_2 + +LBB0_4: + LONG $0x08c28349 // add r10, 8 + LONG $0x01c78348 // add rdi, 1 + WORD $0x394d; BYTE $0xd0 // cmp r8, r10 + JE LBB0_5 LBB0_2: - WORD $0x3145; BYTE $0xe4 // xor r12d, r12d - -LBB0_1055: - QUAD $0x00000000e50c8d42 // lea ecx, [8*r12] - JMP LBB0_1057 - -LBB0_1056: - LONG $0x01c48349 // add r12, 1 - WORD $0xc183; BYTE $0x08 // add ecx, 8 - WORD $0x394d; BYTE $0xe2 // cmp r10, r12 - JE LBB0_1051 - -LBB0_1057: - WORD $0xca89 // mov edx, ecx - WORD $0xc989 // mov ecx, ecx - WORD $0x3944; BYTE $0xca // cmp edx, r9d - JGE LBB0_1056 - LONG $0x14b60f42; BYTE $0x27 // movzx edx, byte [rdi + r12] - WORD $0xe280; BYTE $0x01 // and dl, 1 - LONG $0x08148841 // mov byte [r8 + rcx], dl - WORD $0x8948; BYTE $0xca // mov rdx, rcx - LONG $0x01ca8348 // or rdx, 1 - WORD $0x3944; BYTE $0xca // cmp edx, r9d - JGE LBB0_1056 - LONG $0x1cb60f42; BYTE $0x27 // movzx ebx, byte [rdi + r12] - WORD $0xebd0 // shr bl, 1 - WORD $0xe380; BYTE $0x01 // and bl, 1 - LONG $0x101c8841 // mov byte [r8 + rdx], bl - WORD $0x8948; BYTE $0xca // mov rdx, rcx - LONG $0x02ca8348 // or rdx, 2 - WORD $0x3944; BYTE $0xca // cmp edx, r9d - JGE LBB0_1056 - LONG $0x1cb60f42; BYTE $0x27 // movzx ebx, byte [rdi + r12] - WORD $0xebc0; BYTE $0x02 // shr bl, 2 - WORD $0xe380; BYTE $0x01 // and bl, 1 - LONG $0x101c8841 // mov byte [r8 + rdx], bl - WORD $0x8948; BYTE $0xca // mov rdx, rcx - LONG $0x03ca8348 // or rdx, 3 - WORD $0x3944; BYTE $0xca // cmp edx, r9d - JGE LBB0_1056 - LONG $0x1cb60f42; BYTE $0x27 // movzx ebx, byte [rdi + r12] - WORD $0xebc0; BYTE $0x03 // shr bl, 3 - WORD $0xe380; BYTE $0x01 // and bl, 1 - LONG $0x101c8841 // mov byte [r8 + rdx], bl - WORD $0x8948; BYTE $0xca // mov rdx, rcx - LONG $0x04ca8348 // or rdx, 4 - WORD $0x3944; BYTE $0xca // cmp edx, r9d - JGE LBB0_1056 - LONG $0x1cb60f42; BYTE $0x27 // movzx ebx, byte [rdi + r12] - WORD $0xebc0; BYTE $0x04 // shr bl, 4 - WORD $0xe380; BYTE $0x01 // and bl, 1 - LONG $0x101c8841 // mov byte [r8 + rdx], bl - WORD $0x8948; BYTE $0xca // mov rdx, rcx - LONG $0x05ca8348 // or rdx, 5 - WORD $0x3944; BYTE $0xca // cmp edx, r9d - JGE LBB0_1056 - LONG $0x1cb60f42; BYTE $0x27 // movzx ebx, byte [rdi + r12] - WORD $0xebc0; BYTE $0x05 // shr bl, 5 - WORD $0xe380; BYTE $0x01 // and bl, 1 - LONG $0x101c8841 // mov byte [r8 + rdx], bl - WORD $0x8948; BYTE $0xca // mov rdx, rcx - LONG $0x06ca8348 // or rdx, 6 - WORD $0x3944; BYTE $0xca // cmp edx, r9d - JGE LBB0_1056 - LONG $0x1cb60f42; BYTE $0x27 // movzx ebx, byte [rdi + r12] - WORD $0xebc0; BYTE $0x06 // shr bl, 6 - WORD $0xe380; BYTE $0x01 // and bl, 1 - LONG $0x101c8841 // mov byte [r8 + rdx], bl - WORD $0x8948; BYTE $0xca // mov rdx, rcx - LONG $0x07ca8348 // or rdx, 7 - WORD $0x3944; BYTE $0xca // cmp edx, r9d - JGE LBB0_1056 - LONG $0x1cb60f42; BYTE $0x27 // movzx ebx, byte [rdi + r12] - WORD $0xebc0; BYTE $0x07 // shr bl, 7 - LONG $0x101c8841 // mov byte [r8 + rdx], bl - JMP LBB0_1056 - -LBB0_3: - LONG $0x244c8944; BYTE $0x10 // mov dword [rsp + 16], r9d - LONG $0x2454894c; BYTE $0x30 // mov qword [rsp + 48], r10 - LONG $0xff728d49 // lea rsi, [r10 - 1] - LONG $0x000008b9; BYTE $0x00 // mov ecx, 8 - WORD $0xf089 // mov eax, esi - WORD $0xe1f7 // mul ecx - LONG $0xd6900f41 // seto r14b - WORD $0x8948; BYTE $0xf3 // mov rbx, rsi - LONG $0x20ebc148 // shr rbx, 32 - LONG $0x06488d49 // lea rcx, [r8 + 6] - LONG $0x000008ba; BYTE $0x00 // mov edx, 8 - WORD $0x8948; BYTE $0xf0 // mov rax, rsi - WORD $0xf748; BYTE $0xe2 // mul rdx - LONG $0xd6900f40 // seto sil - WORD $0x0148; BYTE $0xc1 // add rcx, rax - WORD $0x920f; BYTE $0xd2 // setb dl - LONG $0x07488d49 // lea rcx, [r8 + 7] - WORD $0x0148; BYTE $0xc1 // add rcx, rax - LONG $0xd5920f41 // setb r13b - LONG $0x05488d49 // lea rcx, [r8 + 5] - WORD $0x0148; BYTE $0xc1 // add rcx, rax - LONG $0xd1920f41 // setb r9b - LONG $0x04488d49 // lea rcx, [r8 + 4] - WORD $0x0148; BYTE $0xc1 // add rcx, rax - LONG $0xd7920f41 // setb r15b - LONG $0x03488d49 // lea rcx, [r8 + 3] - WORD $0x0148; BYTE $0xc1 // add rcx, rax - LONG $0xd3920f41 // setb r11b - LONG $0x02488d49 // lea rcx, [r8 + 2] - WORD $0x0148; BYTE $0xc1 // add rcx, rax - LONG $0xd2920f41 // setb r10b - LONG $0x01488d49 // lea rcx, [r8 + 1] - WORD $0x0148; BYTE $0xc1 // add rcx, rax - WORD $0x920f; BYTE $0xd1 // setb cl - WORD $0x014c; BYTE $0xc0 // add rax, r8 - WORD $0x920f; BYTE $0xd0 // setb al - WORD $0x3145; BYTE $0xe4 // xor r12d, r12d - WORD $0x8548; BYTE $0xdb // test rbx, rbx - JNE LBB0_1052 - WORD $0x8445; BYTE $0xf6 // test r14b, r14b - JNE LBB0_1052 - WORD $0xd284 // test dl, dl - JNE LBB0_1052 - WORD $0x8440; BYTE $0xf6 // test sil, sil - JNE LBB0_1052 - WORD $0x8445; BYTE $0xed // test r13b, r13b - JNE LBB0_1052 - WORD $0x8440; BYTE $0xf6 // test sil, sil - JNE LBB0_1052 - WORD $0x8445; BYTE $0xc9 // test r9b, r9b - JNE LBB0_1052 - WORD $0x8440; BYTE $0xf6 // test sil, sil - JNE LBB0_1052 - WORD $0x8445; BYTE $0xff // test r15b, r15b - JNE LBB0_1052 - WORD $0x8440; BYTE $0xf6 // test sil, sil - JNE LBB0_1052 - WORD $0x8445; BYTE $0xdb // test r11b, r11b - JNE LBB0_1052 - WORD $0x8440; BYTE $0xf6 // test sil, sil - JNE LBB0_1052 - WORD $0x8445; BYTE $0xd2 // test r10b, r10b - JNE LBB0_1052 - WORD $0x8440; BYTE $0xf6 // test sil, sil - LONG $0x24548b4c; BYTE $0x30 // mov r10, qword [rsp + 48] - JNE LBB0_1054 - WORD $0xc984 // test cl, cl - JNE LBB0_1054 - WORD $0x8440; BYTE $0xf6 // test sil, sil - LONG $0x244c8b44; BYTE $0x10 // mov r9d, dword [rsp + 16] - JNE LBB0_1055 - WORD $0xc084 // test al, al - JNE LBB0_1055 - WORD $0x8440; BYTE $0xf6 // test sil, sil - JNE LBB0_1055 - LONG $0xd0048d4b // lea rax, [r8 + 8*r10] - WORD $0x3948; BYTE $0xf8 // cmp rax, rdi - JBE LBB0_24 - LONG $0x17048d4a // lea rax, [rdi + r10] - WORD $0x394c; BYTE $0xc0 // cmp rax, r8 - JA LBB0_2 - -LBB0_24: - WORD $0x8945; BYTE $0xd4 // mov r12d, r10d - LONG $0xe0e48341 // and r12d, -32 - LONG $0x6e79c1c4; BYTE $0xc1 // vmovd xmm0, r9d - LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd ymm0, xmm0 - LONG $0x4d6f7dc5; BYTE $0x00 // vmovdqa ymm9, yword 0[rbp] /* [rip + .LCPI0_0] */ - LONG $0x456f7dc5; BYTE $0x20 // vmovdqa ymm8, yword 32[rbp] /* [rip + .LCPI0_1] */ - LONG $0x5d6ffdc5; BYTE $0x40 // vmovdqa ymm3, yword 64[rbp] /* [rip + .LCPI0_2] */ - LONG $0x556ffdc5; BYTE $0x60 // vmovdqa ymm2, yword 96[rbp] /* [rip + .LCPI0_3] */ - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - QUAD $0x0000a08d197de2c4; BYTE $0x00 // vbroadcastsd ymm1, qword 160[rbp] /* [rip + .LCPI0_5] */ - QUAD $0x000300248c29fcc5; BYTE $0x00 // vmovaps yword [rsp + 768], ymm1 - QUAD $0x0000a88d197de2c4; BYTE $0x00 // vbroadcastsd ymm1, qword 168[rbp] /* [rip + .LCPI0_6] */ - QUAD $0x0002e0248c29fcc5; BYTE $0x00 // vmovaps yword [rsp + 736], ymm1 - QUAD $0x0000b08d197de2c4; BYTE $0x00 // vbroadcastsd ymm1, qword 176[rbp] /* [rip + .LCPI0_7] */ - QUAD $0x0002c0248c29fcc5; BYTE $0x00 // vmovaps yword [rsp + 704], ymm1 - QUAD $0x0000b88d197de2c4; BYTE $0x00 // vbroadcastsd ymm1, qword 184[rbp] /* [rip + .LCPI0_8] */ - QUAD $0x0002a0248c29fcc5; BYTE $0x00 // vmovaps yword [rsp + 672], ymm1 - QUAD $0x0000c08d197de2c4; BYTE $0x00 // vbroadcastsd ymm1, qword 192[rbp] /* [rip + .LCPI0_9] */ - QUAD $0x000280248c29fcc5; BYTE $0x00 // vmovaps yword [rsp + 640], ymm1 - QUAD $0x0000c88d197de2c4; BYTE $0x00 // vbroadcastsd ymm1, qword 200[rbp] /* [rip + .LCPI0_10] */ - QUAD $0x000260248c29fcc5; BYTE $0x00 // vmovaps yword [rsp + 608], ymm1 - QUAD $0x0000d08d197de2c4; BYTE $0x00 // vbroadcastsd ymm1, qword 208[rbp] /* [rip + .LCPI0_11] */ - QUAD $0x000240248c29fcc5; BYTE $0x00 // vmovaps yword [rsp + 576], ymm1 - QUAD $0x0000d88d587de2c4; BYTE $0x00 // vpbroadcastd ymm1, dword 216[rbp] /* [rip + .LCPI0_12] */ - QUAD $0x000220248c7ffdc5; BYTE $0x00 // vmovdqa yword [rsp + 544], ymm1 - JMP LBB0_26 - -LBB0_25: - LONG $0x20c38349 // add r11, 32 - QUAD $0x000220248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 544] - LONG $0xd1feedc5 // vpaddd ymm2, ymm2, ymm1 - LONG $0xd9fee5c5 // vpaddd ymm3, ymm3, ymm1 - LONG $0xc1fe3dc5 // vpaddd ymm8, ymm8, ymm1 - LONG $0xc9fe35c5 // vpaddd ymm9, ymm9, ymm1 - WORD $0x394d; BYTE $0xe3 // cmp r11, r12 - JE LBB0_1050 - -LBB0_26: - QUAD $0x00032024947ffdc5; BYTE $0x00 // vmovdqa yword [rsp + 800], ymm2 - LONG $0xf272f5c5; BYTE $0x03 // vpslld ymm1, ymm2, 3 - LONG $0xd166f9c5 // vpcmpgtd xmm2, xmm0, xmm1 - LONG $0xd17ef9c5 // vmovd ecx, xmm2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_28 - LONG $0x787da2c4; WORD $0x1f24 // vpbroadcastb ymm4, byte [rdi + r11] - -LBB0_28: - WORD $0x894d; BYTE $0xda // mov r10, r11 - LONG $0x01ca8349 // or r10, 1 - LONG $0xd166f9c5 // vpcmpgtd xmm2, xmm0, xmm1 - LONG $0xd26be9c5 // vpackssdw xmm2, xmm2, xmm2 - LONG $0xd263e9c5 // vpacksswb xmm2, xmm2, xmm2 - LONG $0x1479e3c4; WORD $0x01d1 // vpextrb ecx, xmm2, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_30 - LONG $0x2059a3c4; WORD $0x1714; BYTE $0x01 // vpinsrb xmm2, xmm4, byte [rdi + r10], 1 - LONG $0x025de3c4; WORD $0x0fe2 // vpblendd ymm4, ymm4, ymm2, 15 - -LBB0_30: - WORD $0x894d; BYTE $0xde // mov r14, r11 - LONG $0x02ce8349 // or r14, 2 - LONG $0xd166f9c5 // vpcmpgtd xmm2, xmm0, xmm1 - LONG $0xd26be9c5 // vpackssdw xmm2, xmm2, xmm2 - LONG $0xd263e9c5 // vpacksswb xmm2, xmm2, xmm2 - LONG $0x1479e3c4; WORD $0x02d1 // vpextrb ecx, xmm2, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_32 - LONG $0x2059a3c4; WORD $0x3714; BYTE $0x02 // vpinsrb xmm2, xmm4, byte [rdi + r14], 2 - LONG $0x025de3c4; WORD $0x0fe2 // vpblendd ymm4, ymm4, ymm2, 15 - -LBB0_32: - LONG $0x397dc3c4; WORD $0x01cd // vextracti128 xmm13, ymm1, 1 - WORD $0x894c; BYTE $0xda // mov rdx, r11 - LONG $0x03ca8348 // or rdx, 3 - LONG $0xd166f9c5 // vpcmpgtd xmm2, xmm0, xmm1 - LONG $0xd26be9c5 // vpackssdw xmm2, xmm2, xmm2 - LONG $0xd263e9c5 // vpacksswb xmm2, xmm2, xmm2 - LONG $0x1479e3c4; WORD $0x03d1 // vpextrb ecx, xmm2, 3 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_34 - LONG $0x2059e3c4; WORD $0x1714; BYTE $0x03 // vpinsrb xmm2, xmm4, byte [rdi + rdx], 3 - LONG $0x025de3c4; WORD $0x0fe2 // vpblendd ymm4, ymm4, ymm2, 15 - -LBB0_34: - WORD $0x894c; BYTE $0xd9 // mov rcx, r11 - LONG $0x04c98348 // or rcx, 4 - LONG $0x397de3c4; WORD $0x01c7 // vextracti128 xmm7, ymm0, 1 - LONG $0x6641c1c4; BYTE $0xd5 // vpcmpgtd xmm2, xmm7, xmm13 - LONG $0x1479c3c4; WORD $0x00d1 // vpextrb r9d, xmm2, 0 - LONG $0x01c1f641 // test r9b, 1 - QUAD $0x0000011024948948 // mov qword [rsp + 272], rdx - QUAD $0x00000108248c8948 // mov qword [rsp + 264], rcx - JE LBB0_36 - LONG $0x2059e3c4; WORD $0x0f14; BYTE $0x04 // vpinsrb xmm2, xmm4, byte [rdi + rcx], 4 - LONG $0x025de3c4; WORD $0x0fe2 // vpblendd ymm4, ymm4, ymm2, 15 - -LBB0_36: - WORD $0x894d; BYTE $0xdf // mov r15, r11 - LONG $0x05cf8349 // or r15, 5 - LONG $0xf166fdc5 // vpcmpgtd ymm6, ymm0, ymm1 - LONG $0xd06bcdc5 // vpackssdw ymm2, ymm6, ymm0 - LONG $0x397de3c4; WORD $0x01d2 // vextracti128 xmm2, ymm2, 1 - LONG $0x5879e2c4; BYTE $0xd2 // vpbroadcastd xmm2, xmm2 - LONG $0xd263e9c5 // vpacksswb xmm2, xmm2, xmm2 - LONG $0x1479e3c4; WORD $0x05d1 // vpextrb ecx, xmm2, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_38 - LONG $0x2059a3c4; WORD $0x3f14; BYTE $0x05 // vpinsrb xmm2, xmm4, byte [rdi + r15], 5 - LONG $0x025de3c4; WORD $0x0fe2 // vpblendd ymm4, ymm4, ymm2, 15 - -LBB0_38: - WORD $0x894c; BYTE $0xdb // mov rbx, r11 - LONG $0x06cb8348 // or rbx, 6 - LONG $0xd06bcdc5 // vpackssdw ymm2, ymm6, ymm0 - LONG $0x00fde3c4; WORD $0xe8d2 // vpermq ymm2, ymm2, 232 - LONG $0xd263e9c5 // vpacksswb xmm2, xmm2, xmm2 - LONG $0x1479e3c4; WORD $0x06d1 // vpextrb ecx, xmm2, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_40 - LONG $0x2059e3c4; WORD $0x1f14; BYTE $0x06 // vpinsrb xmm2, xmm4, byte [rdi + rbx], 6 - LONG $0x025de3c4; WORD $0x0fe2 // vpblendd ymm4, ymm4, ymm2, 15 - -LBB0_40: - LONG $0xf372edc5; BYTE $0x03 // vpslld ymm2, ymm3, 3 - WORD $0x894c; BYTE $0xd8 // mov rax, r11 - LONG $0x07c88348 // or rax, 7 - LONG $0xe86bcdc5 // vpackssdw ymm5, ymm6, ymm0 - LONG $0x00fde3c4; WORD $0xe8ed // vpermq ymm5, ymm5, 232 - LONG $0xed63d1c5 // vpacksswb xmm5, xmm5, xmm5 - LONG $0x1479e3c4; WORD $0x07e9 // vpextrb ecx, xmm5, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_42 - LONG $0x2059e3c4; WORD $0x072c; BYTE $0x07 // vpinsrb xmm5, xmm4, byte [rdi + rax], 7 - LONG $0x025de3c4; WORD $0x0fe5 // vpblendd ymm4, ymm4, ymm5, 15 - -LBB0_42: - WORD $0x894c; BYTE $0xde // mov rsi, r11 - LONG $0x08ce8348 // or rsi, 8 - LONG $0xea66f9c5 // vpcmpgtd xmm5, xmm0, xmm2 - LONG $0x1479e3c4; WORD $0x00e9 // vpextrb ecx, xmm5, 0 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_44 - LONG $0x2059e3c4; WORD $0x372c; BYTE $0x08 // vpinsrb xmm5, xmm4, byte [rdi + rsi], 8 - LONG $0x025de3c4; WORD $0x0fe5 // vpblendd ymm4, ymm4, ymm5, 15 - -LBB0_44: - WORD $0x894c; BYTE $0xda // mov rdx, r11 - LONG $0x09ca8348 // or rdx, 9 - LONG $0xea66f9c5 // vpcmpgtd xmm5, xmm0, xmm2 - LONG $0xed6bd1c5 // vpackssdw xmm5, xmm5, xmm5 - LONG $0xed63d1c5 // vpacksswb xmm5, xmm5, xmm5 - LONG $0x1479e3c4; WORD $0x09e9 // vpextrb ecx, xmm5, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - QUAD $0x000000e024948948 // mov qword [rsp + 224], rdx - JE LBB0_46 - LONG $0x2059e3c4; WORD $0x172c; BYTE $0x09 // vpinsrb xmm5, xmm4, byte [rdi + rdx], 9 - LONG $0x025de3c4; WORD $0x0fe5 // vpblendd ymm4, ymm4, ymm5, 15 - -LBB0_46: - WORD $0x894c; BYTE $0xda // mov rdx, r11 - LONG $0x0aca8348 // or rdx, 10 - LONG $0xea66f9c5 // vpcmpgtd xmm5, xmm0, xmm2 - LONG $0xed6bd1c5 // vpackssdw xmm5, xmm5, xmm5 - LONG $0xed63d1c5 // vpacksswb xmm5, xmm5, xmm5 - LONG $0x1479e3c4; WORD $0x0ae9 // vpextrb ecx, xmm5, 10 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - QUAD $0x000340249c7ffdc5; BYTE $0x00 // vmovdqa yword [rsp + 832], ymm3 - LONG $0x24748948; BYTE $0x60 // mov qword [rsp + 96], rsi - JE LBB0_48 - LONG $0x2059e3c4; WORD $0x172c; BYTE $0x0a // vpinsrb xmm5, xmm4, byte [rdi + rdx], 10 - LONG $0x025de3c4; WORD $0x0fe5 // vpblendd ymm4, ymm4, ymm5, 15 - -LBB0_48: - LONG $0x397de3c4; WORD $0x01d5 // vextracti128 xmm5, ymm2, 1 - WORD $0x894c; BYTE $0xde // mov rsi, r11 - LONG $0x0bce8348 // or rsi, 11 - LONG $0xda66f9c5 // vpcmpgtd xmm3, xmm0, xmm2 - LONG $0xdb6be1c5 // vpackssdw xmm3, xmm3, xmm3 - LONG $0xdb63e1c5 // vpacksswb xmm3, xmm3, xmm3 - LONG $0x1479e3c4; WORD $0x0bd9 // vpextrb ecx, xmm3, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - QUAD $0x000000982494894c // mov qword [rsp + 152], r10 - QUAD $0x0000012824b4894c // mov qword [rsp + 296], r14 - LONG $0x247c894c; BYTE $0x68 // mov qword [rsp + 104], r15 - QUAD $0x00000120249c8948 // mov qword [rsp + 288], rbx - QUAD $0x000000e824848948 // mov qword [rsp + 232], rax - JE LBB0_50 - LONG $0x2059e3c4; WORD $0x371c; BYTE $0x0b // vpinsrb xmm3, xmm4, byte [rdi + rsi], 11 - LONG $0x025de3c4; WORD $0x0fe3 // vpblendd ymm4, ymm4, ymm3, 15 - -LBB0_50: - WORD $0x894c; BYTE $0xd9 // mov rcx, r11 - LONG $0x0cc98348 // or rcx, 12 - LONG $0xdd66c1c5 // vpcmpgtd xmm3, xmm7, xmm5 - LONG $0x1479c3c4; WORD $0x00de // vpextrb r14d, xmm3, 0 - LONG $0x01c6f641 // test r14b, 1 - QUAD $0x0000010024b48948 // mov qword [rsp + 256], rsi - QUAD $0x000000f8248c8948 // mov qword [rsp + 248], rcx - JE LBB0_52 - LONG $0x2059e3c4; WORD $0x0f1c; BYTE $0x0c // vpinsrb xmm3, xmm4, byte [rdi + rcx], 12 - LONG $0x025de3c4; WORD $0x0fe3 // vpblendd ymm4, ymm4, ymm3, 15 - -LBB0_52: - WORD $0x894c; BYTE $0xd8 // mov rax, r11 - LONG $0x0dc88348 // or rax, 13 - LONG $0xfa66fdc5 // vpcmpgtd ymm7, ymm0, ymm2 - LONG $0xd86bc5c5 // vpackssdw ymm3, ymm7, ymm0 - LONG $0x397de3c4; WORD $0x01db // vextracti128 xmm3, ymm3, 1 - LONG $0x5879e2c4; BYTE $0xdb // vpbroadcastd xmm3, xmm3 - LONG $0xdb63e1c5 // vpacksswb xmm3, xmm3, xmm3 - LONG $0x1479e3c4; WORD $0x0dd9 // vpextrb ecx, xmm3, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_54 - LONG $0x2059e3c4; WORD $0x071c; BYTE $0x0d // vpinsrb xmm3, xmm4, byte [rdi + rax], 13 - LONG $0x025de3c4; WORD $0x0fe3 // vpblendd ymm4, ymm4, ymm3, 15 - -LBB0_54: - WORD $0x894c; BYTE $0xdb // mov rbx, r11 - LONG $0x0ecb8348 // or rbx, 14 - LONG $0xd86bc5c5 // vpackssdw ymm3, ymm7, ymm0 - LONG $0x00fde3c4; WORD $0xe8db // vpermq ymm3, ymm3, 232 - LONG $0xdb63e1c5 // vpacksswb xmm3, xmm3, xmm3 - LONG $0x1479e3c4; WORD $0x0ed9 // vpextrb ecx, xmm3, 14 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - LONG $0x245c8948; BYTE $0x50 // mov qword [rsp + 80], rbx - JE LBB0_56 - LONG $0x2059e3c4; WORD $0x1f1c; BYTE $0x0e // vpinsrb xmm3, xmm4, byte [rdi + rbx], 14 - LONG $0x025de3c4; WORD $0x0fe3 // vpblendd ymm4, ymm4, ymm3, 15 - -LBB0_56: - LONG $0x722dc1c4; WORD $0x03f0 // vpslld ymm10, ymm8, 3 - WORD $0x894c; BYTE $0xde // mov rsi, r11 - LONG $0x0fce8348 // or rsi, 15 - LONG $0xd86bc5c5 // vpackssdw ymm3, ymm7, ymm0 - LONG $0x00fde3c4; WORD $0xe8db // vpermq ymm3, ymm3, 232 - LONG $0xdb63e1c5 // vpacksswb xmm3, xmm3, xmm3 - LONG $0x1479e3c4; WORD $0x0fd9 // vpextrb ecx, xmm3, 15 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_58 - LONG $0x2059e3c4; WORD $0x371c; BYTE $0x0f // vpinsrb xmm3, xmm4, byte [rdi + rsi], 15 - LONG $0x025de3c4; WORD $0x0fe3 // vpblendd ymm4, ymm4, ymm3, 15 - -LBB0_58: - WORD $0x894d; BYTE $0xdf // mov r15, r11 - LONG $0x10cf8349 // or r15, 16 - LONG $0x6679c1c4; BYTE $0xda // vpcmpgtd xmm3, xmm0, xmm10 - LONG $0xd97ef9c5 // vmovd ecx, xmm3 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - LONG $0x247c894c; BYTE $0x40 // mov qword [rsp + 64], r15 - LONG $0x24748948; BYTE $0x48 // mov qword [rsp + 72], rsi - JE LBB0_60 - LONG $0x397de3c4; WORD $0x01e3 // vextracti128 xmm3, ymm4, 1 - LONG $0x2061a3c4; WORD $0x3f1c; BYTE $0x00 // vpinsrb xmm3, xmm3, byte [rdi + r15], 0 - LONG $0x385de3c4; WORD $0x01e3 // vinserti128 ymm4, ymm4, xmm3, 1 - -LBB0_60: - WORD $0x894c; BYTE $0xde // mov rsi, r11 - LONG $0x11ce8348 // or rsi, 17 - LONG $0x6679c1c4; BYTE $0xda // vpcmpgtd xmm3, xmm0, xmm10 - LONG $0xdb6be1c5 // vpackssdw xmm3, xmm3, xmm3 - LONG $0x00fde3c4; WORD $0xd4db // vpermq ymm3, ymm3, 212 - LONG $0xd863e5c5 // vpacksswb ymm3, ymm3, ymm0 - LONG $0x397de3c4; WORD $0x01db // vextracti128 xmm3, ymm3, 1 - LONG $0x1479e3c4; WORD $0x01d9 // vpextrb ecx, xmm3, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_62 - LONG $0x397de3c4; WORD $0x01e3 // vextracti128 xmm3, ymm4, 1 - LONG $0x2061e3c4; WORD $0x371c; BYTE $0x01 // vpinsrb xmm3, xmm3, byte [rdi + rsi], 1 - LONG $0x385de3c4; WORD $0x01e3 // vinserti128 ymm4, ymm4, xmm3, 1 - -LBB0_62: - WORD $0x894c; BYTE $0xdb // mov rbx, r11 - LONG $0x12cb8348 // or rbx, 18 - LONG $0x6679c1c4; BYTE $0xda // vpcmpgtd xmm3, xmm0, xmm10 - LONG $0xdb6be1c5 // vpackssdw xmm3, xmm3, xmm3 - LONG $0x00fde3c4; WORD $0xd4db // vpermq ymm3, ymm3, 212 - LONG $0xd863e5c5 // vpacksswb ymm3, ymm3, ymm0 - LONG $0x397de3c4; WORD $0x01db // vextracti128 xmm3, ymm3, 1 - LONG $0x1479e3c4; WORD $0x02d9 // vpextrb ecx, xmm3, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_64 - LONG $0x397de3c4; WORD $0x01e3 // vextracti128 xmm3, ymm4, 1 - LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x02 // vpinsrb xmm3, xmm3, byte [rdi + rbx], 2 - LONG $0x385de3c4; WORD $0x01e3 // vinserti128 ymm4, ymm4, xmm3, 1 - -LBB0_64: - WORD $0x894d; BYTE $0xdf // mov r15, r11 - LONG $0x13cf8349 // or r15, 19 - LONG $0x6679c1c4; BYTE $0xda // vpcmpgtd xmm3, xmm0, xmm10 - LONG $0xdb6be1c5 // vpackssdw xmm3, xmm3, xmm3 - LONG $0x00fde3c4; WORD $0xd4db // vpermq ymm3, ymm3, 212 - LONG $0xd863e5c5 // vpacksswb ymm3, ymm3, ymm0 - LONG $0x397de3c4; WORD $0x01db // vextracti128 xmm3, ymm3, 1 - LONG $0x1479e3c4; WORD $0x03d9 // vpextrb ecx, xmm3, 3 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - QUAD $0x00036024847f7dc5; BYTE $0x00 // vmovdqa yword [rsp + 864], ymm8 - JE LBB0_66 - LONG $0x397de3c4; WORD $0x01e3 // vextracti128 xmm3, ymm4, 1 - LONG $0x2061a3c4; WORD $0x3f1c; BYTE $0x03 // vpinsrb xmm3, xmm3, byte [rdi + r15], 3 - LONG $0x385de3c4; WORD $0x01e3 // vinserti128 ymm4, ymm4, xmm3, 1 - -LBB0_66: - WORD $0x894d; BYTE $0xdd // mov r13, r11 - LONG $0x14cd8349 // or r13, 20 - LONG $0x667d41c4; BYTE $0xc2 // vpcmpgtd ymm8, ymm0, ymm10 - LONG $0x6b7dc1c4; BYTE $0xd8 // vpackssdw ymm3, ymm0, ymm8 - LONG $0xd863e5c5 // vpacksswb ymm3, ymm3, ymm0 - LONG $0x397de3c4; WORD $0x01db // vextracti128 xmm3, ymm3, 1 - LONG $0x1479e3c4; WORD $0x04d9 // vpextrb ecx, xmm3, 4 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - LONG $0x246c894c; BYTE $0x38 // mov qword [rsp + 56], r13 - JE LBB0_68 - LONG $0x397de3c4; WORD $0x01e3 // vextracti128 xmm3, ymm4, 1 - LONG $0x2061a3c4; WORD $0x2f1c; BYTE $0x04 // vpinsrb xmm3, xmm3, byte [rdi + r13], 4 - LONG $0x385de3c4; WORD $0x01e3 // vinserti128 ymm4, ymm4, xmm3, 1 - -LBB0_68: - WORD $0x894d; BYTE $0xdd // mov r13, r11 - LONG $0x15cd8349 // or r13, 21 - LONG $0x6b7dc1c4; BYTE $0xd8 // vpackssdw ymm3, ymm0, ymm8 - LONG $0xd863e5c5 // vpacksswb ymm3, ymm3, ymm0 - LONG $0x397de3c4; WORD $0x01db // vextracti128 xmm3, ymm3, 1 - LONG $0x1479e3c4; WORD $0x05d9 // vpextrb ecx, xmm3, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - QUAD $0x00000080249c8948 // mov qword [rsp + 128], rbx - JE LBB0_70 - LONG $0x397de3c4; WORD $0x01e3 // vextracti128 xmm3, ymm4, 1 - LONG $0x2061a3c4; WORD $0x2f1c; BYTE $0x05 // vpinsrb xmm3, xmm3, byte [rdi + r13], 5 - LONG $0x385de3c4; WORD $0x01e3 // vinserti128 ymm4, ymm4, xmm3, 1 - -LBB0_70: - WORD $0x894d; BYTE $0xda // mov r10, r11 - LONG $0x16ca8349 // or r10, 22 - LONG $0x6b7dc1c4; BYTE $0xd8 // vpackssdw ymm3, ymm0, ymm8 - LONG $0xd863e5c5 // vpacksswb ymm3, ymm3, ymm0 - LONG $0x397de3c4; WORD $0x01db // vextracti128 xmm3, ymm3, 1 - LONG $0x1479e3c4; WORD $0x06d9 // vpextrb ecx, xmm3, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_72 - LONG $0x397de3c4; WORD $0x01e3 // vextracti128 xmm3, ymm4, 1 - LONG $0x2061a3c4; WORD $0x171c; BYTE $0x06 // vpinsrb xmm3, xmm3, byte [rdi + r10], 6 - LONG $0x385de3c4; WORD $0x01e3 // vinserti128 ymm4, ymm4, xmm3, 1 - -LBB0_72: - LONG $0x7225c1c4; WORD $0x03f1 // vpslld ymm11, ymm9, 3 - WORD $0x894c; BYTE $0xdb // mov rbx, r11 - LONG $0x17cb8348 // or rbx, 23 - LONG $0x6b7dc1c4; BYTE $0xd8 // vpackssdw ymm3, ymm0, ymm8 - LONG $0xd863e5c5 // vpacksswb ymm3, ymm3, ymm0 - LONG $0x397de3c4; WORD $0x01db // vextracti128 xmm3, ymm3, 1 - LONG $0x1479e3c4; WORD $0x07d9 // vpextrb ecx, xmm3, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - QUAD $0x000000f0249c8948 // mov qword [rsp + 240], rbx - QUAD $0x000380248c7f7dc5; BYTE $0x00 // vmovdqa yword [rsp + 896], ymm9 - JE LBB0_74 - LONG $0x397de3c4; WORD $0x01e3 // vextracti128 xmm3, ymm4, 1 - LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x07 // vpinsrb xmm3, xmm3, byte [rdi + rbx], 7 - LONG $0x385de3c4; WORD $0x01e3 // vinserti128 ymm4, ymm4, xmm3, 1 - -LBB0_74: - WORD $0x894c; BYTE $0xdb // mov rbx, r11 - LONG $0x18cb8348 // or rbx, 24 - LONG $0x667d41c4; BYTE $0xcb // vpcmpgtd ymm9, ymm0, ymm11 - LONG $0x00fd43c4; WORD $0x44e1 // vpermq ymm12, ymm9, 68 - LONG $0x637dc1c4; BYTE $0xdc // vpacksswb ymm3, ymm0, ymm12 - LONG $0x397de3c4; WORD $0x01db // vextracti128 xmm3, ymm3, 1 - LONG $0x1479e3c4; WORD $0x08d9 // vpextrb ecx, xmm3, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - QUAD $0x000000d8249c8948 // mov qword [rsp + 216], rbx - JE LBB0_76 - LONG $0x397de3c4; WORD $0x01e3 // vextracti128 xmm3, ymm4, 1 - LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x08 // vpinsrb xmm3, xmm3, byte [rdi + rbx], 8 - LONG $0x385de3c4; WORD $0x01e3 // vinserti128 ymm4, ymm4, xmm3, 1 - -LBB0_76: - WORD $0x894c; BYTE $0xdb // mov rbx, r11 - LONG $0x19cb8348 // or rbx, 25 - LONG $0x6679c1c4; BYTE $0xdb // vpcmpgtd xmm3, xmm0, xmm11 - LONG $0xdb6be1c5 // vpackssdw xmm3, xmm3, xmm3 - LONG $0x00fde3c4; WORD $0xd4db // vpermq ymm3, ymm3, 212 - LONG $0xdb63fdc5 // vpacksswb ymm3, ymm0, ymm3 - LONG $0x397de3c4; WORD $0x01db // vextracti128 xmm3, ymm3, 1 - LONG $0x1479e3c4; WORD $0x09d9 // vpextrb ecx, xmm3, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - QUAD $0x000000d0249c8948 // mov qword [rsp + 208], rbx - JE LBB0_78 - LONG $0x397de3c4; WORD $0x01e3 // vextracti128 xmm3, ymm4, 1 - LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x09 // vpinsrb xmm3, xmm3, byte [rdi + rbx], 9 - LONG $0x385de3c4; WORD $0x01e3 // vinserti128 ymm4, ymm4, xmm3, 1 - -LBB0_78: - WORD $0x894c; BYTE $0xdb // mov rbx, r11 - LONG $0x1acb8348 // or rbx, 26 - LONG $0x6679c1c4; BYTE $0xdb // vpcmpgtd xmm3, xmm0, xmm11 - LONG $0xdb6be1c5 // vpackssdw xmm3, xmm3, xmm3 - LONG $0x00fde3c4; WORD $0xd4db // vpermq ymm3, ymm3, 212 - LONG $0xdb63fdc5 // vpacksswb ymm3, ymm0, ymm3 - LONG $0x397de3c4; WORD $0x01db // vextracti128 xmm3, ymm3, 1 - LONG $0x1479e3c4; WORD $0x0ad9 // vpextrb ecx, xmm3, 10 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - QUAD $0x000000c8249c8948 // mov qword [rsp + 200], rbx - JE LBB0_80 - LONG $0x397de3c4; WORD $0x01e3 // vextracti128 xmm3, ymm4, 1 - LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x0a // vpinsrb xmm3, xmm3, byte [rdi + rbx], 10 - LONG $0x385de3c4; WORD $0x01e3 // vinserti128 ymm4, ymm4, xmm3, 1 - -LBB0_80: - WORD $0x894c; BYTE $0xdb // mov rbx, r11 - LONG $0x1bcb8348 // or rbx, 27 - LONG $0x6679c1c4; BYTE $0xdb // vpcmpgtd xmm3, xmm0, xmm11 - LONG $0xdb6be1c5 // vpackssdw xmm3, xmm3, xmm3 - LONG $0x00fde3c4; WORD $0xd4db // vpermq ymm3, ymm3, 212 - LONG $0xdb63fdc5 // vpacksswb ymm3, ymm0, ymm3 - LONG $0x397de3c4; WORD $0x01db // vextracti128 xmm3, ymm3, 1 - LONG $0x1479e3c4; WORD $0x0bd9 // vpextrb ecx, xmm3, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - QUAD $0x000000c0249c8948 // mov qword [rsp + 192], rbx - QUAD $0x0000009024948948 // mov qword [rsp + 144], rdx - LONG $0x24448948; BYTE $0x58 // mov qword [rsp + 88], rax - JE LBB0_82 - LONG $0x397de3c4; WORD $0x01e3 // vextracti128 xmm3, ymm4, 1 - LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x0b // vpinsrb xmm3, xmm3, byte [rdi + rbx], 11 - LONG $0x385de3c4; WORD $0x01e3 // vinserti128 ymm4, ymm4, xmm3, 1 - -LBB0_82: - WORD $0x894c; BYTE $0xda // mov rdx, r11 - LONG $0x1cca8348 // or rdx, 28 - LONG $0x6b7dc1c4; BYTE $0xd9 // vpackssdw ymm3, ymm0, ymm9 - LONG $0xdb63fdc5 // vpacksswb ymm3, ymm0, ymm3 - LONG $0x397de3c4; WORD $0x01db // vextracti128 xmm3, ymm3, 1 - LONG $0x1479e3c4; WORD $0x0cd9 // vpextrb ecx, xmm3, 12 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_84 - LONG $0x397de3c4; WORD $0x01e3 // vextracti128 xmm3, ymm4, 1 - LONG $0x2061e3c4; WORD $0x171c; BYTE $0x0c // vpinsrb xmm3, xmm3, byte [rdi + rdx], 12 - LONG $0x385de3c4; WORD $0x01e3 // vinserti128 ymm4, ymm4, xmm3, 1 - -LBB0_84: - WORD $0x894c; BYTE $0xdb // mov rbx, r11 - LONG $0x1dcb8348 // or rbx, 29 - LONG $0x6b7dc1c4; BYTE $0xd9 // vpackssdw ymm3, ymm0, ymm9 - LONG $0xdb63fdc5 // vpacksswb ymm3, ymm0, ymm3 - LONG $0x397de3c4; WORD $0x01db // vextracti128 xmm3, ymm3, 1 - LONG $0x1479e3c4; WORD $0x0dd9 // vpextrb ecx, xmm3, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - QUAD $0x000000b0249c8948 // mov qword [rsp + 176], rbx - JE LBB0_86 - LONG $0x397de3c4; WORD $0x01e3 // vextracti128 xmm3, ymm4, 1 - LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x0d // vpinsrb xmm3, xmm3, byte [rdi + rbx], 13 - LONG $0x385de3c4; WORD $0x01e3 // vinserti128 ymm4, ymm4, xmm3, 1 - -LBB0_86: - WORD $0x894c; BYTE $0xdb // mov rbx, r11 - LONG $0x1ecb8348 // or rbx, 30 - LONG $0x6b7dc1c4; BYTE $0xd9 // vpackssdw ymm3, ymm0, ymm9 - LONG $0xdb63fdc5 // vpacksswb ymm3, ymm0, ymm3 - LONG $0x397de3c4; WORD $0x01db // vextracti128 xmm3, ymm3, 1 - LONG $0x1479e3c4; WORD $0x0ed9 // vpextrb ecx, xmm3, 14 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - QUAD $0x000000a8249c8948 // mov qword [rsp + 168], rbx - JE LBB0_88 - LONG $0x397de3c4; WORD $0x01e3 // vextracti128 xmm3, ymm4, 1 - LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x0e // vpinsrb xmm3, xmm3, byte [rdi + rbx], 14 - LONG $0x385de3c4; WORD $0x01e3 // vinserti128 ymm4, ymm4, xmm3, 1 - -LBB0_88: - WORD $0x894c; BYTE $0xdb // mov rbx, r11 - LONG $0x1fcb8348 // or rbx, 31 - LONG $0x6b7dc1c4; BYTE $0xd9 // vpackssdw ymm3, ymm0, ymm9 - LONG $0xdb63fdc5 // vpacksswb ymm3, ymm0, ymm3 - LONG $0x397de3c4; WORD $0x01db // vextracti128 xmm3, ymm3, 1 - LONG $0x1479e3c4; WORD $0x0fd9 // vpextrb ecx, xmm3, 15 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - QUAD $0x000000a0249c8948 // mov qword [rsp + 160], rbx - JE LBB0_90 - LONG $0x397de3c4; WORD $0x01e3 // vextracti128 xmm3, ymm4, 1 - LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x0f // vpinsrb xmm3, xmm3, byte [rdi + rbx], 15 - LONG $0x385de3c4; WORD $0x01e3 // vinserti128 ymm4, ymm4, xmm3, 1 - -LBB0_90: - LONG $0x357de2c4; BYTE $0xd9 // vpmovzxdq ymm3, xmm1 - QUAD $0x000200249c7ffdc5; BYTE $0x00 // vmovdqa yword [rsp + 512], ymm3 - QUAD $0x00000080bddb5dc5 // vpand ymm15, ymm4, yword 128[rbp] /* [rip + .LCPI0_4] */ - LONG $0xd966f9c5 // vpcmpgtd xmm3, xmm0, xmm1 - LONG $0xd97ef9c5 // vmovd ecx, xmm3 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_92 - QUAD $0x000200249c6ffdc5; BYTE $0x00 // vmovdqa ymm3, yword [rsp + 512] - LONG $0x7ef9e1c4; BYTE $0xd9 // vmovq rcx, xmm3 - LONG $0x147943c4; WORD $0x083c; BYTE $0x00 // vpextrb byte [r8 + rcx], xmm15, 0 - -LBB0_92: - LONG $0xd966f9c5 // vpcmpgtd xmm3, xmm0, xmm1 - LONG $0xdb6be1c5 // vpackssdw xmm3, xmm3, xmm3 - LONG $0xdb63e1c5 // vpacksswb xmm3, xmm3, xmm3 - LONG $0x1479e3c4; WORD $0x01d9 // vpextrb ecx, xmm3, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_94 - QUAD $0x000200249c6ffdc5; BYTE $0x00 // vmovdqa ymm3, yword [rsp + 512] - LONG $0x16f9e3c4; WORD $0x01d9 // vpextrq rcx, xmm3, 1 - LONG $0x147943c4; WORD $0x083c; BYTE $0x01 // vpextrb byte [r8 + rcx], xmm15, 1 - -LBB0_94: - LONG $0xd966f9c5 // vpcmpgtd xmm3, xmm0, xmm1 - LONG $0xdb6be1c5 // vpackssdw xmm3, xmm3, xmm3 - LONG $0xdb63e1c5 // vpacksswb xmm3, xmm3, xmm3 - LONG $0x1479e3c4; WORD $0x02d9 // vpextrb ecx, xmm3, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_96 - QUAD $0x000200249c6ffdc5; BYTE $0x00 // vmovdqa ymm3, yword [rsp + 512] - LONG $0x397de3c4; WORD $0x01db // vextracti128 xmm3, ymm3, 1 - LONG $0x7ef9e1c4; BYTE $0xd9 // vmovq rcx, xmm3 - LONG $0x147943c4; WORD $0x083c; BYTE $0x02 // vpextrb byte [r8 + rcx], xmm15, 2 - -LBB0_96: - LONG $0xc966f9c5 // vpcmpgtd xmm1, xmm0, xmm1 - LONG $0xc96bf1c5 // vpackssdw xmm1, xmm1, xmm1 - LONG $0xc963f1c5 // vpacksswb xmm1, xmm1, xmm1 - LONG $0x1479e3c4; WORD $0x03c9 // vpextrb ecx, xmm1, 3 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_98 - QUAD $0x000200248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 512] - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x147943c4; WORD $0x083c; BYTE $0x03 // vpextrb byte [r8 + rcx], xmm15, 3 - -LBB0_98: - LONG $0x357dc2c4; BYTE $0xcd // vpmovzxdq ymm1, xmm13 - QUAD $0x0001e0248c7ffdc5; BYTE $0x00 // vmovdqa yword [rsp + 480], ymm1 - LONG $0x01c1f641 // test r9b, 1 - JE LBB0_100 - QUAD $0x0001e0248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 480] - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x147943c4; WORD $0x083c; BYTE $0x04 // vpextrb byte [r8 + rcx], xmm15, 4 - -LBB0_100: - LONG $0xc86bcdc5 // vpackssdw ymm1, ymm6, ymm0 - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x5879e2c4; BYTE $0xc9 // vpbroadcastd xmm1, xmm1 - LONG $0xc963f1c5 // vpacksswb xmm1, xmm1, xmm1 - LONG $0x1479e3c4; WORD $0x05c9 // vpextrb ecx, xmm1, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_102 - QUAD $0x0001e0248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 480] - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x147943c4; WORD $0x083c; BYTE $0x05 // vpextrb byte [r8 + rcx], xmm15, 5 - -LBB0_102: - LONG $0xc86bcdc5 // vpackssdw ymm1, ymm6, ymm0 - LONG $0x00fde3c4; WORD $0xe8c9 // vpermq ymm1, ymm1, 232 - LONG $0xc963f1c5 // vpacksswb xmm1, xmm1, xmm1 - LONG $0x1479e3c4; WORD $0x06c9 // vpextrb ecx, xmm1, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_104 - QUAD $0x0001e0248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 480] - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x147943c4; WORD $0x083c; BYTE $0x06 // vpextrb byte [r8 + rcx], xmm15, 6 - -LBB0_104: - LONG $0xc86bcdc5 // vpackssdw ymm1, ymm6, ymm0 - LONG $0x00fde3c4; WORD $0xe8c9 // vpermq ymm1, ymm1, 232 - LONG $0xc963f1c5 // vpacksswb xmm1, xmm1, xmm1 - LONG $0x1479e3c4; WORD $0x07c9 // vpextrb ecx, xmm1, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_106 - QUAD $0x0001e0248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 480] - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x147943c4; WORD $0x083c; BYTE $0x07 // vpextrb byte [r8 + rcx], xmm15, 7 - -LBB0_106: - LONG $0x357de2c4; BYTE $0xca // vpmovzxdq ymm1, xmm2 - QUAD $0x0001c0248c7ffdc5; BYTE $0x00 // vmovdqa yword [rsp + 448], ymm1 - LONG $0xca66f9c5 // vpcmpgtd xmm1, xmm0, xmm2 - LONG $0x1479e3c4; WORD $0x00c9 // vpextrb ecx, xmm1, 0 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_108 - QUAD $0x0001c0248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 448] - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x147943c4; WORD $0x083c; BYTE $0x08 // vpextrb byte [r8 + rcx], xmm15, 8 - -LBB0_108: - LONG $0xca66f9c5 // vpcmpgtd xmm1, xmm0, xmm2 - LONG $0xc96bf1c5 // vpackssdw xmm1, xmm1, xmm1 - LONG $0xc963f1c5 // vpacksswb xmm1, xmm1, xmm1 - LONG $0x1479e3c4; WORD $0x09c9 // vpextrb ecx, xmm1, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_110 - QUAD $0x0001c0248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 448] - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x147943c4; WORD $0x083c; BYTE $0x09 // vpextrb byte [r8 + rcx], xmm15, 9 - -LBB0_110: - LONG $0xca66f9c5 // vpcmpgtd xmm1, xmm0, xmm2 - LONG $0xc96bf1c5 // vpackssdw xmm1, xmm1, xmm1 - LONG $0xc963f1c5 // vpacksswb xmm1, xmm1, xmm1 - LONG $0x1479e3c4; WORD $0x0ac9 // vpextrb ecx, xmm1, 10 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_112 - QUAD $0x0001c0248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 448] - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x147943c4; WORD $0x083c; BYTE $0x0a // vpextrb byte [r8 + rcx], xmm15, 10 - -LBB0_112: - LONG $0xca66f9c5 // vpcmpgtd xmm1, xmm0, xmm2 - LONG $0xc96bf1c5 // vpackssdw xmm1, xmm1, xmm1 - LONG $0xc963f1c5 // vpacksswb xmm1, xmm1, xmm1 - LONG $0x1479e3c4; WORD $0x0bc9 // vpextrb ecx, xmm1, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_114 - QUAD $0x0001c0248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 448] - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x147943c4; WORD $0x083c; BYTE $0x0b // vpextrb byte [r8 + rcx], xmm15, 11 - -LBB0_114: - QUAD $0x0000008824b48948 // mov qword [rsp + 136], rsi - LONG $0x357de2c4; BYTE $0xcd // vpmovzxdq ymm1, xmm5 - QUAD $0x0001a0248c7ffdc5; BYTE $0x00 // vmovdqa yword [rsp + 416], ymm1 - LONG $0x01c6f641 // test r14b, 1 - JE LBB0_116 - QUAD $0x0001a0248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 416] - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x147943c4; WORD $0x083c; BYTE $0x0c // vpextrb byte [r8 + rcx], xmm15, 12 - -LBB0_116: - LONG $0xc86bc5c5 // vpackssdw ymm1, ymm7, ymm0 - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x5879e2c4; BYTE $0xc9 // vpbroadcastd xmm1, xmm1 - LONG $0xc963f1c5 // vpacksswb xmm1, xmm1, xmm1 - LONG $0x1479e3c4; WORD $0x0dc9 // vpextrb ecx, xmm1, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - QUAD $0x00000098248c8b4c // mov r9, qword [rsp + 152] - QUAD $0x0000012824b48b48 // mov rsi, qword [rsp + 296] - LONG $0x24748b4c; BYTE $0x68 // mov r14, qword [rsp + 104] - QUAD $0x0000012024848b48 // mov rax, qword [rsp + 288] - JE LBB0_118 - QUAD $0x0001a0248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 416] - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x147943c4; WORD $0x083c; BYTE $0x0d // vpextrb byte [r8 + rcx], xmm15, 13 - -LBB0_118: - LONG $0xc86bc5c5 // vpackssdw ymm1, ymm7, ymm0 - LONG $0x00fde3c4; WORD $0xe8c9 // vpermq ymm1, ymm1, 232 - LONG $0xc963f1c5 // vpacksswb xmm1, xmm1, xmm1 - LONG $0x1479e3c4; WORD $0x0ec9 // vpextrb ecx, xmm1, 14 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_120 - QUAD $0x0001a0248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 416] - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x147943c4; WORD $0x083c; BYTE $0x0e // vpextrb byte [r8 + rcx], xmm15, 14 - -LBB0_120: - LONG $0xc86bc5c5 // vpackssdw ymm1, ymm7, ymm0 - LONG $0x00fde3c4; WORD $0xe8c9 // vpermq ymm1, ymm1, 232 - LONG $0xc963f1c5 // vpacksswb xmm1, xmm1, xmm1 - LONG $0x1479e3c4; WORD $0x0fc9 // vpextrb ecx, xmm1, 15 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_122 - QUAD $0x0001a0248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 416] - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x147943c4; WORD $0x083c; BYTE $0x0f // vpextrb byte [r8 + rcx], xmm15, 15 - -LBB0_122: - LONG $0x357dc2c4; BYTE $0xca // vpmovzxdq ymm1, xmm10 - QUAD $0x000180248c7ffdc5; BYTE $0x00 // vmovdqa yword [rsp + 384], ymm1 - LONG $0x6679c1c4; BYTE $0xca // vpcmpgtd xmm1, xmm0, xmm10 - LONG $0xc97ef9c5 // vmovd ecx, xmm1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_124 - QUAD $0x000180248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 384] - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x00 // vpextrb byte [r8 + rcx], xmm1, 0 - -LBB0_124: - LONG $0x6679c1c4; BYTE $0xca // vpcmpgtd xmm1, xmm0, xmm10 - LONG $0xc96bf1c5 // vpackssdw xmm1, xmm1, xmm1 - LONG $0x00fde3c4; WORD $0xd4c9 // vpermq ymm1, ymm1, 212 - LONG $0xc863f5c5 // vpacksswb ymm1, ymm1, ymm0 - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x1479e3c4; WORD $0x01c9 // vpextrb ecx, xmm1, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_126 - QUAD $0x000180248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 384] - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x01 // vpextrb byte [r8 + rcx], xmm1, 1 - -LBB0_126: - LONG $0x6679c1c4; BYTE $0xca // vpcmpgtd xmm1, xmm0, xmm10 - LONG $0xc96bf1c5 // vpackssdw xmm1, xmm1, xmm1 - LONG $0x00fde3c4; WORD $0xd4c9 // vpermq ymm1, ymm1, 212 - LONG $0xc863f5c5 // vpacksswb ymm1, ymm1, ymm0 - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x1479e3c4; WORD $0x02c9 // vpextrb ecx, xmm1, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_128 - QUAD $0x000180248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 384] - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x02 // vpextrb byte [r8 + rcx], xmm1, 2 - -LBB0_128: - LONG $0x397d63c4; WORD $0x01d1 // vextracti128 xmm1, ymm10, 1 - LONG $0x6679c1c4; BYTE $0xd2 // vpcmpgtd xmm2, xmm0, xmm10 - LONG $0xd26be9c5 // vpackssdw xmm2, xmm2, xmm2 - LONG $0x00fde3c4; WORD $0xd4d2 // vpermq ymm2, ymm2, 212 - LONG $0xd063edc5 // vpacksswb ymm2, ymm2, ymm0 - LONG $0x397de3c4; WORD $0x01d2 // vextracti128 xmm2, ymm2, 1 - LONG $0x1479e3c4; WORD $0x03d1 // vpextrb ecx, xmm2, 3 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_130 - QUAD $0x00018024946ffdc5; BYTE $0x00 // vmovdqa ymm2, yword [rsp + 384] - LONG $0x397de3c4; WORD $0x01d2 // vextracti128 xmm2, ymm2, 1 - LONG $0x16f9e3c4; WORD $0x01d1 // vpextrq rcx, xmm2, 1 - LONG $0x397d63c4; WORD $0x01fa // vextracti128 xmm2, ymm15, 1 - LONG $0x1479c3c4; WORD $0x0814; BYTE $0x03 // vpextrb byte [r8 + rcx], xmm2, 3 - -LBB0_130: - LONG $0x357de2c4; BYTE $0xc9 // vpmovzxdq ymm1, xmm1 - QUAD $0x000160248c7ffdc5; BYTE $0x00 // vmovdqa yword [rsp + 352], ymm1 - LONG $0x6b7dc1c4; BYTE $0xc8 // vpackssdw ymm1, ymm0, ymm8 - LONG $0xc863f5c5 // vpacksswb ymm1, ymm1, ymm0 - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x1479e3c4; WORD $0x04c9 // vpextrb ecx, xmm1, 4 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_132 - QUAD $0x000160248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 352] - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x04 // vpextrb byte [r8 + rcx], xmm1, 4 - -LBB0_132: - LONG $0x6b7dc1c4; BYTE $0xc8 // vpackssdw ymm1, ymm0, ymm8 - LONG $0xc863f5c5 // vpacksswb ymm1, ymm1, ymm0 - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x1479e3c4; WORD $0x05c9 // vpextrb ecx, xmm1, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_134 - QUAD $0x000160248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 352] - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x05 // vpextrb byte [r8 + rcx], xmm1, 5 - -LBB0_134: - LONG $0x6b7dc1c4; BYTE $0xc8 // vpackssdw ymm1, ymm0, ymm8 - LONG $0xc863f5c5 // vpacksswb ymm1, ymm1, ymm0 - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x1479e3c4; WORD $0x06c9 // vpextrb ecx, xmm1, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_136 - QUAD $0x000160248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 352] - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x06 // vpextrb byte [r8 + rcx], xmm1, 6 - -LBB0_136: - LONG $0x6b7dc1c4; BYTE $0xc8 // vpackssdw ymm1, ymm0, ymm8 - LONG $0xc863f5c5 // vpacksswb ymm1, ymm1, ymm0 - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x1479e3c4; WORD $0x07c9 // vpextrb ecx, xmm1, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_138 - QUAD $0x000160248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 352] - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x07 // vpextrb byte [r8 + rcx], xmm1, 7 - -LBB0_138: - LONG $0x357dc2c4; BYTE $0xcb // vpmovzxdq ymm1, xmm11 - QUAD $0x000140248c7ffdc5; BYTE $0x00 // vmovdqa yword [rsp + 320], ymm1 - LONG $0x637dc1c4; BYTE $0xcc // vpacksswb ymm1, ymm0, ymm12 - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x1479e3c4; WORD $0x08c9 // vpextrb ecx, xmm1, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_140 - QUAD $0x000140248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 320] - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x08 // vpextrb byte [r8 + rcx], xmm1, 8 - -LBB0_140: - LONG $0x6679c1c4; BYTE $0xcb // vpcmpgtd xmm1, xmm0, xmm11 - LONG $0xc96bf1c5 // vpackssdw xmm1, xmm1, xmm1 - LONG $0x00fde3c4; WORD $0xd4c9 // vpermq ymm1, ymm1, 212 - LONG $0xc963fdc5 // vpacksswb ymm1, ymm0, ymm1 - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x1479e3c4; WORD $0x09c9 // vpextrb ecx, xmm1, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_142 - QUAD $0x000140248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 320] - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb byte [r8 + rcx], xmm1, 9 - -LBB0_142: - LONG $0x6679c1c4; BYTE $0xcb // vpcmpgtd xmm1, xmm0, xmm11 - LONG $0xc96bf1c5 // vpackssdw xmm1, xmm1, xmm1 - LONG $0x00fde3c4; WORD $0xd4c9 // vpermq ymm1, ymm1, 212 - LONG $0xc963fdc5 // vpacksswb ymm1, ymm0, ymm1 - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x1479e3c4; WORD $0x0ac9 // vpextrb ecx, xmm1, 10 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_144 - QUAD $0x000140248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 320] - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb byte [r8 + rcx], xmm1, 10 - -LBB0_144: - LONG $0x397d63c4; WORD $0x01d9 // vextracti128 xmm1, ymm11, 1 - LONG $0x6679c1c4; BYTE $0xe3 // vpcmpgtd xmm4, xmm0, xmm11 - LONG $0xe46bd9c5 // vpackssdw xmm4, xmm4, xmm4 - LONG $0x00fde3c4; WORD $0xd4e4 // vpermq ymm4, ymm4, 212 - LONG $0xe463fdc5 // vpacksswb ymm4, ymm0, ymm4 - LONG $0x397de3c4; WORD $0x01e4 // vextracti128 xmm4, ymm4, 1 - LONG $0x1479e3c4; WORD $0x0be1 // vpextrb ecx, xmm4, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_146 - QUAD $0x00014024946ffdc5; BYTE $0x00 // vmovdqa ymm2, yword [rsp + 320] - LONG $0x397de3c4; WORD $0x01d4 // vextracti128 xmm4, ymm2, 1 - LONG $0x16f9e3c4; WORD $0x01e1 // vpextrq rcx, xmm4, 1 - LONG $0x397d63c4; WORD $0x01fc // vextracti128 xmm4, ymm15, 1 - LONG $0x1479c3c4; WORD $0x0824; BYTE $0x0b // vpextrb byte [r8 + rcx], xmm4, 11 - -LBB0_146: - LONG $0x357de2c4; BYTE $0xe1 // vpmovzxdq ymm4, xmm1 - LONG $0x6b7dc1c4; BYTE $0xc9 // vpackssdw ymm1, ymm0, ymm9 - LONG $0xc963fdc5 // vpacksswb ymm1, ymm0, ymm1 - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x1479e3c4; WORD $0x0cc9 // vpextrb ecx, xmm1, 12 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_148 - LONG $0x7ef9e1c4; BYTE $0xe1 // vmovq rcx, xmm4 - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb byte [r8 + rcx], xmm1, 12 - -LBB0_148: - LONG $0x6b7dc1c4; BYTE $0xc9 // vpackssdw ymm1, ymm0, ymm9 - LONG $0xc963fdc5 // vpacksswb ymm1, ymm0, ymm1 - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x1479e3c4; WORD $0x0dc9 // vpextrb ecx, xmm1, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_150 - LONG $0x16f9e3c4; WORD $0x01e1 // vpextrq rcx, xmm4, 1 - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb byte [r8 + rcx], xmm1, 13 - -LBB0_150: - LONG $0x6b7dc1c4; BYTE $0xc9 // vpackssdw ymm1, ymm0, ymm9 - LONG $0xc963fdc5 // vpacksswb ymm1, ymm0, ymm1 - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x1479e3c4; WORD $0x0ec9 // vpextrb ecx, xmm1, 14 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_152 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb byte [r8 + rcx], xmm1, 14 - -LBB0_152: - LONG $0x6b7dc1c4; BYTE $0xc9 // vpackssdw ymm1, ymm0, ymm9 - LONG $0xc963fdc5 // vpacksswb ymm1, ymm0, ymm1 - LONG $0x397de3c4; WORD $0x01c9 // vextracti128 xmm1, ymm1, 1 - LONG $0x1479e3c4; WORD $0x0fc9 // vpextrb ecx, xmm1, 15 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_154 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb byte [r8 + rcx], xmm1, 15 - -LBB0_154: - LONG $0x6b4dc1c4; BYTE $0xc8 // vpackssdw ymm1, ymm6, ymm8 - LONG $0x00fde3c4; WORD $0xd8c9 // vpermq ymm1, ymm1, 216 - LONG $0x6b45c1c4; BYTE $0xe9 // vpackssdw ymm5, ymm7, ymm9 - LONG $0x00fde3c4; WORD $0xd8ed // vpermq ymm5, ymm5, 216 - LONG $0xcd63f5c5 // vpacksswb ymm1, ymm1, ymm5 - QUAD $0x00030024946ffdc5; BYTE $0x00 // vmovdqa ymm2, yword [rsp + 768] - QUAD $0x00020024bceb6dc5; BYTE $0x00 // vpor ymm15, ymm2, yword [rsp + 512] - QUAD $0x0001e024acebedc5; BYTE $0x00 // vpor ymm5, ymm2, yword [rsp + 480] - QUAD $0x0001802494eb6dc5; BYTE $0x00 // vpor ymm10, ymm2, yword [rsp + 384] - QUAD $0x000160248ceb6dc5; BYTE $0x00 // vpor ymm9, ymm2, yword [rsp + 352] - QUAD $0x0001c024a4eb6dc5; BYTE $0x00 // vpor ymm12, ymm2, yword [rsp + 448] - QUAD $0x0001a0249ceb6dc5; BYTE $0x00 // vpor ymm11, ymm2, yword [rsp + 416] - QUAD $0x0001402484eb6dc5; BYTE $0x00 // vpor ymm8, ymm2, yword [rsp + 320] - LONG $0xfaebddc5 // vpor ymm7, ymm4, ymm2 - LONG $0x463de3c4; WORD $0x31f7 // vperm2i128 ymm6, ymm8, ymm7, 49 - LONG $0x383d63c4; WORD $0x01ef // vinserti128 ymm13, ymm8, xmm7, 1 - LONG $0xf6c694c5; BYTE $0x88 // vshufps ymm6, ymm13, ymm6, 136 - LONG $0x461d43c4; WORD $0x31eb // vperm2i128 ymm13, ymm12, ymm11, 49 - LONG $0x381d43c4; WORD $0x01f3 // vinserti128 ymm14, ymm12, xmm11, 1 - LONG $0xc60c41c4; WORD $0x88ed // vshufps ymm13, ymm14, ymm13, 136 - LONG $0x462d43c4; WORD $0x31f1 // vperm2i128 ymm14, ymm10, ymm9, 49 - LONG $0x382dc3c4; WORD $0x01d1 // vinserti128 ymm2, ymm10, xmm9, 1 - LONG $0xc66cc1c4; WORD $0x88d6 // vshufps ymm2, ymm2, ymm14, 136 - LONG $0x460563c4; WORD $0x31f5 // vperm2i128 ymm14, ymm15, ymm5, 49 - LONG $0x3805e3c4; WORD $0x01dd // vinserti128 ymm3, ymm15, xmm5, 1 - LONG $0xc664c1c4; WORD $0x88de // vshufps ymm3, ymm3, ymm14, 136 - LONG $0xdb66fdc5 // vpcmpgtd ymm3, ymm0, ymm3 - LONG $0xd266fdc5 // vpcmpgtd ymm2, ymm0, ymm2 - LONG $0xd26be5c5 // vpackssdw ymm2, ymm3, ymm2 - LONG $0x667dc1c4; BYTE $0xdd // vpcmpgtd ymm3, ymm0, ymm13 - LONG $0xf666fdc5 // vpcmpgtd ymm6, ymm0, ymm6 - LONG $0xde6be5c5 // vpackssdw ymm3, ymm3, ymm6 - LONG $0x00fde3c4; WORD $0xd8d2 // vpermq ymm2, ymm2, 216 - LONG $0x00fde3c4; WORD $0xd8db // vpermq ymm3, ymm3, 216 - LONG $0xd363edc5 // vpacksswb ymm2, ymm2, ymm3 - LONG $0xf1dbedc5 // vpand ymm6, ymm2, ymm1 - LONG $0xf17ef9c5 // vmovd ecx, xmm6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_155 - LONG $0x787d22c4; WORD $0x1f34 // vpbroadcastb ymm14, byte [rdi + r11] - LONG $0x1479e3c4; WORD $0x01f1 // vpextrb ecx, xmm6, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_661 - -LBB0_156: - QUAD $0x000000e0249c8b48 // mov rbx, qword [rsp + 224] - LONG $0x1479e3c4; WORD $0x02f1 // vpextrb ecx, xmm6, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_157 - -LBB0_662: - LONG $0x2009e3c4; WORD $0x370c; BYTE $0x02 // vpinsrb xmm1, xmm14, byte [rdi + rsi], 2 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x03f1 // vpextrb ecx, xmm6, 3 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_663 - -LBB0_158: - LONG $0x1479e3c4; WORD $0x04f1 // vpextrb ecx, xmm6, 4 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_159 - -LBB0_664: - QUAD $0x00000108248c8b48 // mov rcx, qword [rsp + 264] - LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x04 // vpinsrb xmm1, xmm14, byte [rdi + rcx], 4 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x05f1 // vpextrb ecx, xmm6, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_665 - -LBB0_160: - QUAD $0x000000e824b48b48 // mov rsi, qword [rsp + 232] - LONG $0x1479e3c4; WORD $0x06f1 // vpextrb ecx, xmm6, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_161 - -LBB0_666: - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x06 // vpinsrb xmm1, xmm14, byte [rdi + rax], 6 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x07f1 // vpextrb ecx, xmm6, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_667 - -LBB0_162: - LONG $0x1479e3c4; WORD $0x08f1 // vpextrb ecx, xmm6, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_163 - -LBB0_668: - LONG $0x24448b48; BYTE $0x60 // mov rax, qword [rsp + 96] - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x08 // vpinsrb xmm1, xmm14, byte [rdi + rax], 8 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x09f1 // vpextrb ecx, xmm6, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_669 - -LBB0_164: - LONG $0x1479e3c4; WORD $0x0af1 // vpextrb ecx, xmm6, 10 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_165 - -LBB0_670: - QUAD $0x0000009024848b48 // mov rax, qword [rsp + 144] - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb xmm1, xmm14, byte [rdi + rax], 10 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb ecx, xmm6, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_671 - -LBB0_166: - LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb ecx, xmm6, 12 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_167 - -LBB0_672: - QUAD $0x000000f824848b48 // mov rax, qword [rsp + 248] - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0c // vpinsrb xmm1, xmm14, byte [rdi + rax], 12 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0df1 // vpextrb ecx, xmm6, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_673 - -LBB0_168: - LONG $0x1479e3c4; WORD $0x0ef1 // vpextrb ecx, xmm6, 14 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_169 - -LBB0_674: - LONG $0x24448b48; BYTE $0x50 // mov rax, qword [rsp + 80] - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0e // vpinsrb xmm1, xmm14, byte [rdi + rax], 14 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0ff1 // vpextrb ecx, xmm6, 15 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_170 - JMP LBB0_171 - -LBB0_155: - LONG $0x1479e3c4; WORD $0x01f1 // vpextrb ecx, xmm6, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_156 - -LBB0_661: - LONG $0x2009a3c4; WORD $0x0f0c; BYTE $0x01 // vpinsrb xmm1, xmm14, byte [rdi + r9], 1 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - QUAD $0x000000e0249c8b48 // mov rbx, qword [rsp + 224] - LONG $0x1479e3c4; WORD $0x02f1 // vpextrb ecx, xmm6, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_662 - -LBB0_157: - LONG $0x1479e3c4; WORD $0x03f1 // vpextrb ecx, xmm6, 3 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_158 - -LBB0_663: - QUAD $0x00000110248c8b48 // mov rcx, qword [rsp + 272] - LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb xmm1, xmm14, byte [rdi + rcx], 3 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x04f1 // vpextrb ecx, xmm6, 4 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_664 - -LBB0_159: - LONG $0x1479e3c4; WORD $0x05f1 // vpextrb ecx, xmm6, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_160 - -LBB0_665: - LONG $0x2009a3c4; WORD $0x370c; BYTE $0x05 // vpinsrb xmm1, xmm14, byte [rdi + r14], 5 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - QUAD $0x000000e824b48b48 // mov rsi, qword [rsp + 232] - LONG $0x1479e3c4; WORD $0x06f1 // vpextrb ecx, xmm6, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_666 - -LBB0_161: - LONG $0x1479e3c4; WORD $0x07f1 // vpextrb ecx, xmm6, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_162 - -LBB0_667: - LONG $0x2009e3c4; WORD $0x370c; BYTE $0x07 // vpinsrb xmm1, xmm14, byte [rdi + rsi], 7 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x08f1 // vpextrb ecx, xmm6, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_668 - -LBB0_163: - LONG $0x1479e3c4; WORD $0x09f1 // vpextrb ecx, xmm6, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_164 - -LBB0_669: - LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x09 // vpinsrb xmm1, xmm14, byte [rdi + rbx], 9 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0af1 // vpextrb ecx, xmm6, 10 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_670 - -LBB0_165: - LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb ecx, xmm6, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_166 - -LBB0_671: - QUAD $0x0000010024848b48 // mov rax, qword [rsp + 256] - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0b // vpinsrb xmm1, xmm14, byte [rdi + rax], 11 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb ecx, xmm6, 12 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_672 - -LBB0_167: - LONG $0x1479e3c4; WORD $0x0df1 // vpextrb ecx, xmm6, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_168 - -LBB0_673: - LONG $0x24448b48; BYTE $0x58 // mov rax, qword [rsp + 88] - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0d // vpinsrb xmm1, xmm14, byte [rdi + rax], 13 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0ef1 // vpextrb ecx, xmm6, 14 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_674 - -LBB0_169: - LONG $0x1479e3c4; WORD $0x0ff1 // vpextrb ecx, xmm6, 15 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_171 - -LBB0_170: - LONG $0x24448b48; BYTE $0x48 // mov rax, qword [rsp + 72] - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0f // vpinsrb xmm1, xmm14, byte [rdi + rax], 15 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_171: - QUAD $0x000000d0248c8b48 // mov rcx, qword [rsp + 208] - LONG $0x397dc3c4; WORD $0x01f5 // vextracti128 xmm13, ymm6, 1 - LONG $0xe87e79c5 // vmovd eax, xmm13 - LONG $0x2c244489 // mov dword [rsp + 44], eax - WORD $0x01a8 // test al, 1 - JE LBB0_172 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x24448b48; BYTE $0x40 // mov rax, qword [rsp + 64] - LONG $0x2071e3c4; WORD $0x070c; BYTE $0x00 // vpinsrb xmm1, xmm1, byte [rdi + rax], 0 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - LONG $0x147963c4; WORD $0x01e8 // vpextrb eax, xmm13, 1 - LONG $0x28244489 // mov dword [rsp + 40], eax - WORD $0x01a8 // test al, 1 - JNE LBB0_676 - -LBB0_173: - LONG $0x147963c4; WORD $0x02e8 // vpextrb eax, xmm13, 2 - LONG $0x24244489 // mov dword [rsp + 36], eax - WORD $0x01a8 // test al, 1 - JE LBB0_174 - -LBB0_677: - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - QUAD $0x0000008024848b48 // mov rax, qword [rsp + 128] - LONG $0x2071e3c4; WORD $0x070c; BYTE $0x02 // vpinsrb xmm1, xmm1, byte [rdi + rax], 2 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - LONG $0x147963c4; WORD $0x03e8 // vpextrb eax, xmm13, 3 - LONG $0x20244489 // mov dword [rsp + 32], eax - WORD $0x01a8 // test al, 1 - JNE LBB0_678 - -LBB0_175: - LONG $0x147963c4; WORD $0x04e8 // vpextrb eax, xmm13, 4 - LONG $0x1c244489 // mov dword [rsp + 28], eax - WORD $0x01a8 // test al, 1 - JE LBB0_176 - -LBB0_679: - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x24448b48; BYTE $0x38 // mov rax, qword [rsp + 56] - LONG $0x2071e3c4; WORD $0x070c; BYTE $0x04 // vpinsrb xmm1, xmm1, byte [rdi + rax], 4 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - LONG $0x147963c4; WORD $0x05e8 // vpextrb eax, xmm13, 5 - LONG $0x18244489 // mov dword [rsp + 24], eax - WORD $0x01a8 // test al, 1 - JNE LBB0_680 - -LBB0_177: - LONG $0x147963c4; WORD $0x06e8 // vpextrb eax, xmm13, 6 - LONG $0x14244489 // mov dword [rsp + 20], eax - WORD $0x01a8 // test al, 1 - JE LBB0_178 - -LBB0_681: - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x2071a3c4; WORD $0x170c; BYTE $0x06 // vpinsrb xmm1, xmm1, byte [rdi + r10], 6 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - LONG $0x147963c4; WORD $0x07e8 // vpextrb eax, xmm13, 7 - LONG $0x3c248489; WORD $0x0001; BYTE $0x00 // mov dword [rsp + 316], eax - WORD $0x01a8 // test al, 1 - JNE LBB0_682 - -LBB0_179: - QUAD $0x000000d824848b48 // mov rax, qword [rsp + 216] - LONG $0x147963c4; WORD $0x08eb // vpextrb ebx, xmm13, 8 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_181 - -LBB0_180: - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x2071e3c4; WORD $0x070c; BYTE $0x08 // vpinsrb xmm1, xmm1, byte [rdi + rax], 8 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - -LBB0_181: - LONG $0x147943c4; WORD $0x09e9 // vpextrb r9d, xmm13, 9 - LONG $0x01c1f641 // test r9b, 1 - QUAD $0x0000011824ac894c // mov qword [rsp + 280], r13 - LONG $0x2454894c; BYTE $0x70 // mov qword [rsp + 112], r10 - QUAD $0x000000b824948948 // mov qword [rsp + 184], rdx - JE LBB0_183 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x2071e3c4; WORD $0x0f0c; BYTE $0x09 // vpinsrb xmm1, xmm1, byte [rdi + rcx], 9 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - -LBB0_183: - QUAD $0x000000c824848b48 // mov rax, qword [rsp + 200] - QUAD $0x000000c0248c8b48 // mov rcx, qword [rsp + 192] - LONG $0x147943c4; WORD $0x0aed // vpextrb r13d, xmm13, 10 - LONG $0x01c5f641 // test r13b, 1 - JE LBB0_184 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x2071e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb xmm1, xmm1, byte [rdi + rax], 10 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - LONG $0x147963c4; WORD $0x0be8 // vpextrb eax, xmm13, 11 - WORD $0x01a8 // test al, 1 - LONG $0x247c894c; BYTE $0x78 // mov qword [rsp + 120], r15 - JNE LBB0_684 - -LBB0_185: - LONG $0x147943c4; WORD $0x0cef // vpextrb r15d, xmm13, 12 - LONG $0x01c7f641 // test r15b, 1 - QUAD $0x00000130249c894c // mov qword [rsp + 304], r11 - JE LBB0_186 - -LBB0_685: - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - QUAD $0x000000b8248c8b48 // mov rcx, qword [rsp + 184] - LONG $0x2071e3c4; WORD $0x0f0c; BYTE $0x0c // vpinsrb xmm1, xmm1, byte [rdi + rcx], 12 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - LONG $0x147963c4; WORD $0x0dea // vpextrb edx, xmm13, 13 - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - JNE LBB0_686 - -LBB0_187: - LONG $0x147963c4; WORD $0x0eee // vpextrb esi, xmm13, 14 - LONG $0x01c6f640 // test sil, 1 - JE LBB0_188 - -LBB0_687: - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - QUAD $0x000000a8248c8b48 // mov rcx, qword [rsp + 168] - LONG $0x2071e3c4; WORD $0x0f0c; BYTE $0x0e // vpinsrb xmm1, xmm1, byte [rdi + rcx], 14 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - LONG $0x147943c4; WORD $0x0fee // vpextrb r14d, xmm13, 15 - LONG $0x01c6f641 // test r14b, 1 - JNE LBB0_189 - JMP LBB0_190 - -LBB0_172: - LONG $0x147963c4; WORD $0x01e8 // vpextrb eax, xmm13, 1 - LONG $0x28244489 // mov dword [rsp + 40], eax - WORD $0x01a8 // test al, 1 - JE LBB0_173 - -LBB0_676: - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - QUAD $0x0000008824848b48 // mov rax, qword [rsp + 136] - LONG $0x2071e3c4; WORD $0x070c; BYTE $0x01 // vpinsrb xmm1, xmm1, byte [rdi + rax], 1 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - LONG $0x147963c4; WORD $0x02e8 // vpextrb eax, xmm13, 2 - LONG $0x24244489 // mov dword [rsp + 36], eax - WORD $0x01a8 // test al, 1 - JNE LBB0_677 - -LBB0_174: - LONG $0x147963c4; WORD $0x03e8 // vpextrb eax, xmm13, 3 - LONG $0x20244489 // mov dword [rsp + 32], eax - WORD $0x01a8 // test al, 1 - JE LBB0_175 - -LBB0_678: - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x2071a3c4; WORD $0x3f0c; BYTE $0x03 // vpinsrb xmm1, xmm1, byte [rdi + r15], 3 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - LONG $0x147963c4; WORD $0x04e8 // vpextrb eax, xmm13, 4 - LONG $0x1c244489 // mov dword [rsp + 28], eax - WORD $0x01a8 // test al, 1 - JNE LBB0_679 - -LBB0_176: - LONG $0x147963c4; WORD $0x05e8 // vpextrb eax, xmm13, 5 - LONG $0x18244489 // mov dword [rsp + 24], eax - WORD $0x01a8 // test al, 1 - JE LBB0_177 - -LBB0_680: - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x2071a3c4; WORD $0x2f0c; BYTE $0x05 // vpinsrb xmm1, xmm1, byte [rdi + r13], 5 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - LONG $0x147963c4; WORD $0x06e8 // vpextrb eax, xmm13, 6 - LONG $0x14244489 // mov dword [rsp + 20], eax - WORD $0x01a8 // test al, 1 - JNE LBB0_681 - -LBB0_178: - LONG $0x147963c4; WORD $0x07e8 // vpextrb eax, xmm13, 7 - LONG $0x3c248489; WORD $0x0001; BYTE $0x00 // mov dword [rsp + 316], eax - WORD $0x01a8 // test al, 1 - JE LBB0_179 - -LBB0_682: - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - QUAD $0x000000f024848b48 // mov rax, qword [rsp + 240] - LONG $0x2071e3c4; WORD $0x070c; BYTE $0x07 // vpinsrb xmm1, xmm1, byte [rdi + rax], 7 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - QUAD $0x000000d824848b48 // mov rax, qword [rsp + 216] - LONG $0x147963c4; WORD $0x08eb // vpextrb ebx, xmm13, 8 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_180 - JMP LBB0_181 - -LBB0_184: - LONG $0x147963c4; WORD $0x0be8 // vpextrb eax, xmm13, 11 - WORD $0x01a8 // test al, 1 - LONG $0x247c894c; BYTE $0x78 // mov qword [rsp + 120], r15 - JE LBB0_185 - -LBB0_684: - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x2071e3c4; WORD $0x0f0c; BYTE $0x0b // vpinsrb xmm1, xmm1, byte [rdi + rcx], 11 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - LONG $0x147943c4; WORD $0x0cef // vpextrb r15d, xmm13, 12 - LONG $0x01c7f641 // test r15b, 1 - QUAD $0x00000130249c894c // mov qword [rsp + 304], r11 - JNE LBB0_685 - -LBB0_186: - LONG $0x147963c4; WORD $0x0dea // vpextrb edx, xmm13, 13 - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - JE LBB0_187 - -LBB0_686: - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - QUAD $0x000000b0248c8b48 // mov rcx, qword [rsp + 176] - LONG $0x2071e3c4; WORD $0x0f0c; BYTE $0x0d // vpinsrb xmm1, xmm1, byte [rdi + rcx], 13 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - LONG $0x147963c4; WORD $0x0eee // vpextrb esi, xmm13, 14 - LONG $0x01c6f640 // test sil, 1 - JNE LBB0_687 - -LBB0_188: - LONG $0x147943c4; WORD $0x0fee // vpextrb r14d, xmm13, 15 - LONG $0x01c6f641 // test r14b, 1 - JE LBB0_190 - -LBB0_189: - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - QUAD $0x000000a0248c8b48 // mov rcx, qword [rsp + 160] - LONG $0x2071e3c4; WORD $0x0f0c; BYTE $0x0f // vpinsrb xmm1, xmm1, byte [rdi + rcx], 15 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - -LBB0_190: - LONG $0x7175c1c4; WORD $0x01d6 // vpsrlw ymm1, ymm14, 1 - QUAD $0x00000080b5db75c5 // vpand ymm14, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */ - LONG $0x7e79c1c4; BYTE $0xf2 // vmovd r10d, xmm6 - LONG $0x01c2f641 // test r10b, 1 - JE LBB0_191 - LONG $0x7ef961c4; BYTE $0xf9 // vmovq rcx, xmm15 - LONG $0x147943c4; WORD $0x0834; BYTE $0x00 // vpextrb byte [r8 + rcx], xmm14, 0 - LONG $0x1479e3c4; WORD $0x01f1 // vpextrb ecx, xmm6, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_689 - -LBB0_192: - LONG $0x1479e3c4; WORD $0x02f1 // vpextrb ecx, xmm6, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_193 - -LBB0_690: - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x147943c4; WORD $0x0834; BYTE $0x02 // vpextrb byte [r8 + rcx], xmm14, 2 - LONG $0x1479e3c4; WORD $0x03f1 // vpextrb ecx, xmm6, 3 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_691 - -LBB0_194: - LONG $0x1479e3c4; WORD $0x04f1 // vpextrb ecx, xmm6, 4 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_195 - -LBB0_692: - LONG $0x7ef9e1c4; BYTE $0xe9 // vmovq rcx, xmm5 - LONG $0x147943c4; WORD $0x0834; BYTE $0x04 // vpextrb byte [r8 + rcx], xmm14, 4 - LONG $0x1479e3c4; WORD $0x05f1 // vpextrb ecx, xmm6, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_693 - -LBB0_196: - LONG $0x1479e3c4; WORD $0x06f1 // vpextrb ecx, xmm6, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_197 - -LBB0_694: - LONG $0x397de3c4; WORD $0x01e9 // vextracti128 xmm1, ymm5, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x147943c4; WORD $0x0834; BYTE $0x06 // vpextrb byte [r8 + rcx], xmm14, 6 - LONG $0x1479e3c4; WORD $0x07f1 // vpextrb ecx, xmm6, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_695 - -LBB0_198: - LONG $0x1479e3c4; WORD $0x08f1 // vpextrb ecx, xmm6, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_199 - -LBB0_696: - LONG $0x7ef961c4; BYTE $0xe1 // vmovq rcx, xmm12 - LONG $0x147943c4; WORD $0x0834; BYTE $0x08 // vpextrb byte [r8 + rcx], xmm14, 8 - LONG $0x1479e3c4; WORD $0x09f1 // vpextrb ecx, xmm6, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_697 - -LBB0_200: - LONG $0x1479e3c4; WORD $0x0af1 // vpextrb ecx, xmm6, 10 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_201 - -LBB0_698: - LONG $0x397d63c4; WORD $0x01e1 // vextracti128 xmm1, ymm12, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x147943c4; WORD $0x0834; BYTE $0x0a // vpextrb byte [r8 + rcx], xmm14, 10 - LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb ecx, xmm6, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_699 - -LBB0_202: - LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb ecx, xmm6, 12 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_203 - -LBB0_700: - LONG $0x7ef961c4; BYTE $0xd9 // vmovq rcx, xmm11 - LONG $0x147943c4; WORD $0x0834; BYTE $0x0c // vpextrb byte [r8 + rcx], xmm14, 12 - LONG $0x1479e3c4; WORD $0x0df1 // vpextrb ecx, xmm6, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_701 - -LBB0_204: - LONG $0x1479e3c4; WORD $0x0ef1 // vpextrb ecx, xmm6, 14 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_205 - -LBB0_702: - LONG $0x397d63c4; WORD $0x01d9 // vextracti128 xmm1, ymm11, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x147943c4; WORD $0x0834; BYTE $0x0e // vpextrb byte [r8 + rcx], xmm14, 14 - LONG $0x1479e3c4; WORD $0x0ff1 // vpextrb ecx, xmm6, 15 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_703 - -LBB0_206: - LONG $0x2c2444f6; BYTE $0x01 // test byte [rsp + 44], 1 - JE LBB0_207 - -LBB0_704: - LONG $0x7ef961c4; BYTE $0xd1 // vmovq rcx, xmm10 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x00 // vpextrb byte [r8 + rcx], xmm1, 0 - LONG $0x282444f6; BYTE $0x01 // test byte [rsp + 40], 1 - JNE LBB0_705 - -LBB0_208: - LONG $0x242444f6; BYTE $0x01 // test byte [rsp + 36], 1 - JE LBB0_209 - -LBB0_706: - LONG $0x397d63c4; WORD $0x01d1 // vextracti128 xmm1, ymm10, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x02 // vpextrb byte [r8 + rcx], xmm1, 2 - LONG $0x202444f6; BYTE $0x01 // test byte [rsp + 32], 1 - JNE LBB0_707 - -LBB0_210: - LONG $0x1c2444f6; BYTE $0x01 // test byte [rsp + 28], 1 - JE LBB0_211 - -LBB0_708: - LONG $0x7ef961c4; BYTE $0xc9 // vmovq rcx, xmm9 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x04 // vpextrb byte [r8 + rcx], xmm1, 4 - LONG $0x182444f6; BYTE $0x01 // test byte [rsp + 24], 1 - JNE LBB0_709 - -LBB0_212: - LONG $0x142444f6; BYTE $0x01 // test byte [rsp + 20], 1 - JE LBB0_213 - -LBB0_710: - LONG $0x397d63c4; WORD $0x01c9 // vextracti128 xmm1, ymm9, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x06 // vpextrb byte [r8 + rcx], xmm1, 6 - QUAD $0x010000013c2484f6 // test byte [rsp + 316], 1 - JNE LBB0_711 - -LBB0_214: - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_215 - -LBB0_712: - LONG $0x7ef961c4; BYTE $0xc1 // vmovq rcx, xmm8 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x08 // vpextrb byte [r8 + rcx], xmm1, 8 - LONG $0x01c1f641 // test r9b, 1 - QUAD $0x000000e024948b4c // mov r10, qword [rsp + 224] - QUAD $0x00000090249c8b4c // mov r11, qword [rsp + 144] - JNE LBB0_713 - -LBB0_216: - LONG $0x01c5f641 // test r13b, 1 - QUAD $0x00000128249c8b48 // mov rbx, qword [rsp + 296] - JE LBB0_217 - -LBB0_714: - LONG $0x397d63c4; WORD $0x01c1 // vextracti128 xmm1, ymm8, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb byte [r8 + rcx], xmm1, 10 - WORD $0x01a8 // test al, 1 - QUAD $0x00000120248c8b4c // mov r9, qword [rsp + 288] - QUAD $0x000000e824848b48 // mov rax, qword [rsp + 232] - JNE LBB0_715 - -LBB0_218: - LONG $0x01c7f641 // test r15b, 1 - JE LBB0_219 - -LBB0_716: - LONG $0x7ef9e1c4; BYTE $0xf9 // vmovq rcx, xmm7 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb byte [r8 + rcx], xmm1, 12 - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - QUAD $0x0000008824ac8b4c // mov r13, qword [rsp + 136] - QUAD $0x0000008024bc8b4c // mov r15, qword [rsp + 128] - JNE LBB0_717 - -LBB0_220: - LONG $0x01c6f640 // test sil, 1 - QUAD $0x0000013024948b48 // mov rdx, qword [rsp + 304] - JE LBB0_221 - -LBB0_718: - LONG $0x397de3c4; WORD $0x01f9 // vextracti128 xmm1, ymm7, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb byte [r8 + rcx], xmm1, 14 - LONG $0x01c6f641 // test r14b, 1 - QUAD $0x0000009824b48b48 // mov rsi, qword [rsp + 152] - JNE LBB0_222 - JMP LBB0_223 - -LBB0_191: - LONG $0x1479e3c4; WORD $0x01f1 // vpextrb ecx, xmm6, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_192 - -LBB0_689: - LONG $0x16f963c4; WORD $0x01f9 // vpextrq rcx, xmm15, 1 - LONG $0x147943c4; WORD $0x0834; BYTE $0x01 // vpextrb byte [r8 + rcx], xmm14, 1 - LONG $0x1479e3c4; WORD $0x02f1 // vpextrb ecx, xmm6, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_690 - -LBB0_193: - LONG $0x1479e3c4; WORD $0x03f1 // vpextrb ecx, xmm6, 3 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_194 - -LBB0_691: - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x147943c4; WORD $0x0834; BYTE $0x03 // vpextrb byte [r8 + rcx], xmm14, 3 - LONG $0x1479e3c4; WORD $0x04f1 // vpextrb ecx, xmm6, 4 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_692 - -LBB0_195: - LONG $0x1479e3c4; WORD $0x05f1 // vpextrb ecx, xmm6, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_196 - -LBB0_693: - LONG $0x16f9e3c4; WORD $0x01e9 // vpextrq rcx, xmm5, 1 - LONG $0x147943c4; WORD $0x0834; BYTE $0x05 // vpextrb byte [r8 + rcx], xmm14, 5 - LONG $0x1479e3c4; WORD $0x06f1 // vpextrb ecx, xmm6, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_694 - -LBB0_197: - LONG $0x1479e3c4; WORD $0x07f1 // vpextrb ecx, xmm6, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_198 - -LBB0_695: - LONG $0x397de3c4; WORD $0x01e9 // vextracti128 xmm1, ymm5, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x147943c4; WORD $0x0834; BYTE $0x07 // vpextrb byte [r8 + rcx], xmm14, 7 - LONG $0x1479e3c4; WORD $0x08f1 // vpextrb ecx, xmm6, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_696 - -LBB0_199: - LONG $0x1479e3c4; WORD $0x09f1 // vpextrb ecx, xmm6, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_200 - -LBB0_697: - LONG $0x16f963c4; WORD $0x01e1 // vpextrq rcx, xmm12, 1 - LONG $0x147943c4; WORD $0x0834; BYTE $0x09 // vpextrb byte [r8 + rcx], xmm14, 9 - LONG $0x1479e3c4; WORD $0x0af1 // vpextrb ecx, xmm6, 10 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_698 - -LBB0_201: - LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb ecx, xmm6, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_202 - -LBB0_699: - LONG $0x397d63c4; WORD $0x01e1 // vextracti128 xmm1, ymm12, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x147943c4; WORD $0x0834; BYTE $0x0b // vpextrb byte [r8 + rcx], xmm14, 11 - LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb ecx, xmm6, 12 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_700 - -LBB0_203: - LONG $0x1479e3c4; WORD $0x0df1 // vpextrb ecx, xmm6, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_204 - -LBB0_701: - LONG $0x16f963c4; WORD $0x01d9 // vpextrq rcx, xmm11, 1 - LONG $0x147943c4; WORD $0x0834; BYTE $0x0d // vpextrb byte [r8 + rcx], xmm14, 13 - LONG $0x1479e3c4; WORD $0x0ef1 // vpextrb ecx, xmm6, 14 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_702 - -LBB0_205: - LONG $0x1479e3c4; WORD $0x0ff1 // vpextrb ecx, xmm6, 15 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_206 - -LBB0_703: - LONG $0x397d63c4; WORD $0x01d9 // vextracti128 xmm1, ymm11, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x147943c4; WORD $0x0834; BYTE $0x0f // vpextrb byte [r8 + rcx], xmm14, 15 - LONG $0x2c2444f6; BYTE $0x01 // test byte [rsp + 44], 1 - JNE LBB0_704 - -LBB0_207: - LONG $0x282444f6; BYTE $0x01 // test byte [rsp + 40], 1 - JE LBB0_208 - -LBB0_705: - LONG $0x16f963c4; WORD $0x01d1 // vpextrq rcx, xmm10, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x01 // vpextrb byte [r8 + rcx], xmm1, 1 - LONG $0x242444f6; BYTE $0x01 // test byte [rsp + 36], 1 - JNE LBB0_706 - -LBB0_209: - LONG $0x202444f6; BYTE $0x01 // test byte [rsp + 32], 1 - JE LBB0_210 - -LBB0_707: - LONG $0x397d63c4; WORD $0x01d1 // vextracti128 xmm1, ymm10, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x03 // vpextrb byte [r8 + rcx], xmm1, 3 - LONG $0x1c2444f6; BYTE $0x01 // test byte [rsp + 28], 1 - JNE LBB0_708 - -LBB0_211: - LONG $0x182444f6; BYTE $0x01 // test byte [rsp + 24], 1 - JE LBB0_212 - -LBB0_709: - LONG $0x16f963c4; WORD $0x01c9 // vpextrq rcx, xmm9, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x05 // vpextrb byte [r8 + rcx], xmm1, 5 - LONG $0x142444f6; BYTE $0x01 // test byte [rsp + 20], 1 - JNE LBB0_710 - -LBB0_213: - QUAD $0x010000013c2484f6 // test byte [rsp + 316], 1 - JE LBB0_214 - -LBB0_711: - LONG $0x397d63c4; WORD $0x01c9 // vextracti128 xmm1, ymm9, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x07 // vpextrb byte [r8 + rcx], xmm1, 7 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_712 - -LBB0_215: - LONG $0x01c1f641 // test r9b, 1 - QUAD $0x000000e024948b4c // mov r10, qword [rsp + 224] - QUAD $0x00000090249c8b4c // mov r11, qword [rsp + 144] - JE LBB0_216 - -LBB0_713: - LONG $0x16f963c4; WORD $0x01c1 // vpextrq rcx, xmm8, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb byte [r8 + rcx], xmm1, 9 - LONG $0x01c5f641 // test r13b, 1 - QUAD $0x00000128249c8b48 // mov rbx, qword [rsp + 296] - JNE LBB0_714 - -LBB0_217: - WORD $0x01a8 // test al, 1 - QUAD $0x00000120248c8b4c // mov r9, qword [rsp + 288] - QUAD $0x000000e824848b48 // mov rax, qword [rsp + 232] - JE LBB0_218 - -LBB0_715: - LONG $0x397d63c4; WORD $0x01c1 // vextracti128 xmm1, ymm8, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb byte [r8 + rcx], xmm1, 11 - LONG $0x01c7f641 // test r15b, 1 - JNE LBB0_716 - -LBB0_219: - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - QUAD $0x0000008824ac8b4c // mov r13, qword [rsp + 136] - QUAD $0x0000008024bc8b4c // mov r15, qword [rsp + 128] - JE LBB0_220 - -LBB0_717: - LONG $0x16f9e3c4; WORD $0x01f9 // vpextrq rcx, xmm7, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb byte [r8 + rcx], xmm1, 13 - LONG $0x01c6f640 // test sil, 1 - QUAD $0x0000013024948b48 // mov rdx, qword [rsp + 304] - JNE LBB0_718 - -LBB0_221: - LONG $0x01c6f641 // test r14b, 1 - QUAD $0x0000009824b48b48 // mov rsi, qword [rsp + 152] - JE LBB0_223 - -LBB0_222: - LONG $0x397de3c4; WORD $0x01f9 // vextracti128 xmm1, ymm7, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb byte [r8 + rcx], xmm1, 15 - -LBB0_223: - QUAD $0x0002e0248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 736] - QUAD $0x00020024bceb75c5; BYTE $0x00 // vpor ymm15, ymm1, yword [rsp + 512] - QUAD $0x0001e024acebf5c5; BYTE $0x00 // vpor ymm5, ymm1, yword [rsp + 480] - QUAD $0x0001802494eb75c5; BYTE $0x00 // vpor ymm10, ymm1, yword [rsp + 384] - QUAD $0x000160248ceb75c5; BYTE $0x00 // vpor ymm9, ymm1, yword [rsp + 352] - QUAD $0x0001c024a4eb75c5; BYTE $0x00 // vpor ymm12, ymm1, yword [rsp + 448] - QUAD $0x0001a0249ceb75c5; BYTE $0x00 // vpor ymm11, ymm1, yword [rsp + 416] - QUAD $0x0001402484eb75c5; BYTE $0x00 // vpor ymm8, ymm1, yword [rsp + 320] - LONG $0xf9ebddc5 // vpor ymm7, ymm4, ymm1 - LONG $0x463de3c4; WORD $0x31cf // vperm2i128 ymm1, ymm8, ymm7, 49 - LONG $0x383de3c4; WORD $0x01d7 // vinserti128 ymm2, ymm8, xmm7, 1 - LONG $0xc9c6ecc5; BYTE $0x88 // vshufps ymm1, ymm2, ymm1, 136 - LONG $0x461dc3c4; WORD $0x31d3 // vperm2i128 ymm2, ymm12, ymm11, 49 - LONG $0x381dc3c4; WORD $0x01db // vinserti128 ymm3, ymm12, xmm11, 1 - LONG $0xd2c6e4c5; BYTE $0x88 // vshufps ymm2, ymm3, ymm2, 136 - LONG $0x462dc3c4; WORD $0x31d9 // vperm2i128 ymm3, ymm10, ymm9, 49 - LONG $0x382d43c4; WORD $0x01e9 // vinserti128 ymm13, ymm10, xmm9, 1 - LONG $0xdbc694c5; BYTE $0x88 // vshufps ymm3, ymm13, ymm3, 136 - LONG $0x460563c4; WORD $0x31ed // vperm2i128 ymm13, ymm15, ymm5, 49 - LONG $0x380563c4; WORD $0x01f5 // vinserti128 ymm14, ymm15, xmm5, 1 - LONG $0xc60c41c4; WORD $0x88ed // vshufps ymm13, ymm14, ymm13, 136 - LONG $0x667d41c4; BYTE $0xed // vpcmpgtd ymm13, ymm0, ymm13 - LONG $0xdb66fdc5 // vpcmpgtd ymm3, ymm0, ymm3 - LONG $0xdb6b95c5 // vpackssdw ymm3, ymm13, ymm3 - LONG $0xd266fdc5 // vpcmpgtd ymm2, ymm0, ymm2 - LONG $0xc966fdc5 // vpcmpgtd ymm1, ymm0, ymm1 - LONG $0xc96bedc5 // vpackssdw ymm1, ymm2, ymm1 - LONG $0x00fde3c4; WORD $0xd8d3 // vpermq ymm2, ymm3, 216 - LONG $0x00fde3c4; WORD $0xd8c9 // vpermq ymm1, ymm1, 216 - LONG $0xc963edc5 // vpacksswb ymm1, ymm2, ymm1 - LONG $0xf6dbf5c5 // vpand ymm6, ymm1, ymm6 - LONG $0xf17ef9c5 // vmovd ecx, xmm6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_224 - LONG $0x787d62c4; WORD $0x1734 // vpbroadcastb ymm14, byte [rdi + rdx] - LONG $0x1479e3c4; WORD $0x01f1 // vpextrb ecx, xmm6, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_720 - -LBB0_225: - LONG $0x24548b48; BYTE $0x68 // mov rdx, qword [rsp + 104] - LONG $0x1479e3c4; WORD $0x02f1 // vpextrb ecx, xmm6, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_227 - -LBB0_226: - LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x02 // vpinsrb xmm1, xmm14, byte [rdi + rbx], 2 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_227: - LONG $0x24748b48; BYTE $0x60 // mov rsi, qword [rsp + 96] - LONG $0x245c8b48; BYTE $0x48 // mov rbx, qword [rsp + 72] - LONG $0x1479e3c4; WORD $0x03f1 // vpextrb ecx, xmm6, 3 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_228 - QUAD $0x00000110248c8b48 // mov rcx, qword [rsp + 272] - LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb xmm1, xmm14, byte [rdi + rcx], 3 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x04f1 // vpextrb ecx, xmm6, 4 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_722 - -LBB0_229: - LONG $0x1479e3c4; WORD $0x05f1 // vpextrb ecx, xmm6, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_230 - -LBB0_723: - LONG $0x2009e3c4; WORD $0x170c; BYTE $0x05 // vpinsrb xmm1, xmm14, byte [rdi + rdx], 5 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x06f1 // vpextrb ecx, xmm6, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_724 - -LBB0_231: - LONG $0x1479e3c4; WORD $0x07f1 // vpextrb ecx, xmm6, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_232 - -LBB0_725: - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x07 // vpinsrb xmm1, xmm14, byte [rdi + rax], 7 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x08f1 // vpextrb ecx, xmm6, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_726 - -LBB0_233: - LONG $0x24548b48; BYTE $0x58 // mov rdx, qword [rsp + 88] - LONG $0x1479e3c4; WORD $0x09f1 // vpextrb ecx, xmm6, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_234 - -LBB0_727: - LONG $0x2009a3c4; WORD $0x170c; BYTE $0x09 // vpinsrb xmm1, xmm14, byte [rdi + r10], 9 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0af1 // vpextrb ecx, xmm6, 10 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_728 - -LBB0_235: - LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb ecx, xmm6, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_236 - -LBB0_729: - QUAD $0x0000010024848b48 // mov rax, qword [rsp + 256] - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0b // vpinsrb xmm1, xmm14, byte [rdi + rax], 11 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb ecx, xmm6, 12 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_730 - -LBB0_237: - LONG $0x1479e3c4; WORD $0x0df1 // vpextrb ecx, xmm6, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_239 - -LBB0_238: - LONG $0x2009e3c4; WORD $0x170c; BYTE $0x0d // vpinsrb xmm1, xmm14, byte [rdi + rdx], 13 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_239: - LONG $0x24448b48; BYTE $0x50 // mov rax, qword [rsp + 80] - LONG $0x24548b48; BYTE $0x40 // mov rdx, qword [rsp + 64] - LONG $0x1479e3c4; WORD $0x0ef1 // vpextrb ecx, xmm6, 14 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_241 - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0e // vpinsrb xmm1, xmm14, byte [rdi + rax], 14 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_241: - LONG $0x1479e3c4; WORD $0x0ff1 // vpextrb ecx, xmm6, 15 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_243 - LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x0f // vpinsrb xmm1, xmm14, byte [rdi + rbx], 15 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_243: - LONG $0x397de3c4; WORD $0x01f1 // vextracti128 xmm1, ymm6, 1 - LONG $0xc87ef9c5 // vmovd eax, xmm1 - LONG $0x2c244489 // mov dword [rsp + 44], eax - WORD $0x01a8 // test al, 1 - JE LBB0_245 - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x1714; BYTE $0x00 // vpinsrb xmm2, xmm2, byte [rdi + rdx], 0 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - -LBB0_245: - LONG $0x244c8b48; BYTE $0x38 // mov rcx, qword [rsp + 56] - LONG $0x1479e3c4; WORD $0x01c8 // vpextrb eax, xmm1, 1 - LONG $0x28244489 // mov dword [rsp + 40], eax - WORD $0x01a8 // test al, 1 - JE LBB0_247 - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069a3c4; WORD $0x2f14; BYTE $0x01 // vpinsrb xmm2, xmm2, byte [rdi + r13], 1 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - -LBB0_247: - QUAD $0x0000011824948b48 // mov rdx, qword [rsp + 280] - LONG $0x24748b48; BYTE $0x70 // mov rsi, qword [rsp + 112] - LONG $0x1479e3c4; WORD $0x02c8 // vpextrb eax, xmm1, 2 - LONG $0x24244489 // mov dword [rsp + 36], eax - WORD $0x01a8 // test al, 1 - JE LBB0_249 - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069a3c4; WORD $0x3f14; BYTE $0x02 // vpinsrb xmm2, xmm2, byte [rdi + r15], 2 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - -LBB0_249: - LONG $0x24448b48; BYTE $0x78 // mov rax, qword [rsp + 120] - LONG $0x1479e3c4; WORD $0x03cb // vpextrb ebx, xmm1, 3 - LONG $0x20245c89 // mov dword [rsp + 32], ebx - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_250 - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x03 // vpinsrb xmm2, xmm2, byte [rdi + rax], 3 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x04c8 // vpextrb eax, xmm1, 4 - LONG $0x1c244489 // mov dword [rsp + 28], eax - WORD $0x01a8 // test al, 1 - JNE LBB0_732 - -LBB0_251: - LONG $0x1479e3c4; WORD $0x05c8 // vpextrb eax, xmm1, 5 - LONG $0x18244489 // mov dword [rsp + 24], eax - WORD $0x01a8 // test al, 1 - JE LBB0_252 - -LBB0_733: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x1714; BYTE $0x05 // vpinsrb xmm2, xmm2, byte [rdi + rdx], 5 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x06c8 // vpextrb eax, xmm1, 6 - LONG $0x14244489 // mov dword [rsp + 20], eax - WORD $0x01a8 // test al, 1 - JNE LBB0_734 - -LBB0_253: - LONG $0x1479c3c4; WORD $0x07c9 // vpextrb r9d, xmm1, 7 - LONG $0x01c1f641 // test r9b, 1 - JE LBB0_254 - -LBB0_735: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000f024848b48 // mov rax, qword [rsp + 240] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x07 // vpinsrb xmm2, xmm2, byte [rdi + rax], 7 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x08ca // vpextrb edx, xmm1, 8 - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - JNE LBB0_736 - -LBB0_255: - LONG $0x1479e3c4; WORD $0x09c9 // vpextrb ecx, xmm1, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_256 - -LBB0_737: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000d024848b48 // mov rax, qword [rsp + 208] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x09 // vpinsrb xmm2, xmm2, byte [rdi + rax], 9 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x0ace // vpextrb esi, xmm1, 10 - LONG $0x01c6f640 // test sil, 1 - JNE LBB0_738 - -LBB0_257: - LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb eax, xmm1, 11 - WORD $0x01a8 // test al, 1 - JE LBB0_258 - -LBB0_739: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000c0249c8b48 // mov rbx, qword [rsp + 192] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0b // vpinsrb xmm2, xmm2, byte [rdi + rbx], 11 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0ccd // vpextrb r13d, xmm1, 12 - LONG $0x01c5f641 // test r13b, 1 - JNE LBB0_740 - -LBB0_259: - LONG $0x1479c3c4; WORD $0x0dca // vpextrb r10d, xmm1, 13 - LONG $0x01c2f641 // test r10b, 1 - JE LBB0_260 - -LBB0_741: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000b0249c8b48 // mov rbx, qword [rsp + 176] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0d // vpinsrb xmm2, xmm2, byte [rdi + rbx], 13 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0ecb // vpextrb r11d, xmm1, 14 - LONG $0x01c3f641 // test r11b, 1 - JNE LBB0_742 - -LBB0_261: - LONG $0x1479c3c4; WORD $0x0fce // vpextrb r14d, xmm1, 15 - LONG $0x01c6f641 // test r14b, 1 - JE LBB0_263 - -LBB0_262: - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - QUAD $0x000000a0249c8b48 // mov rbx, qword [rsp + 160] - LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x0f // vpinsrb xmm1, xmm1, byte [rdi + rbx], 15 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - -LBB0_263: - LONG $0x7175c1c4; WORD $0x02d6 // vpsrlw ymm1, ymm14, 2 - QUAD $0x00000080b5db75c5 // vpand ymm14, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */ - LONG $0x7e79c1c4; BYTE $0xf7 // vmovd r15d, xmm6 - LONG $0x01c7f641 // test r15b, 1 - JE LBB0_264 - LONG $0x7ef961c4; BYTE $0xfb // vmovq rbx, xmm15 - LONG $0x147943c4; WORD $0x1834; BYTE $0x00 // vpextrb byte [r8 + rbx], xmm14, 0 - LONG $0x1479e3c4; WORD $0x01f3 // vpextrb ebx, xmm6, 1 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_744 - -LBB0_265: - LONG $0x1479e3c4; WORD $0x02f3 // vpextrb ebx, xmm6, 2 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - QUAD $0x000000e024bc8b4c // mov r15, qword [rsp + 224] - JE LBB0_266 - -LBB0_745: - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x02 // vpextrb byte [r8 + rbx], xmm14, 2 - LONG $0x1479e3c4; WORD $0x03f3 // vpextrb ebx, xmm6, 3 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_746 - -LBB0_267: - LONG $0x1479e3c4; WORD $0x04f3 // vpextrb ebx, xmm6, 4 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_268 - -LBB0_747: - LONG $0x7ef9e1c4; BYTE $0xeb // vmovq rbx, xmm5 - LONG $0x147943c4; WORD $0x1834; BYTE $0x04 // vpextrb byte [r8 + rbx], xmm14, 4 - LONG $0x1479e3c4; WORD $0x05f3 // vpextrb ebx, xmm6, 5 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_748 - -LBB0_269: - LONG $0x1479e3c4; WORD $0x06f3 // vpextrb ebx, xmm6, 6 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_270 - -LBB0_749: - LONG $0x397de3c4; WORD $0x01e9 // vextracti128 xmm1, ymm5, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x06 // vpextrb byte [r8 + rbx], xmm14, 6 - LONG $0x1479e3c4; WORD $0x07f3 // vpextrb ebx, xmm6, 7 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_750 - -LBB0_271: - LONG $0x1479e3c4; WORD $0x08f3 // vpextrb ebx, xmm6, 8 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_272 - -LBB0_751: - LONG $0x7ef961c4; BYTE $0xe3 // vmovq rbx, xmm12 - LONG $0x147943c4; WORD $0x1834; BYTE $0x08 // vpextrb byte [r8 + rbx], xmm14, 8 - LONG $0x1479e3c4; WORD $0x09f3 // vpextrb ebx, xmm6, 9 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_752 - -LBB0_273: - LONG $0x1479e3c4; WORD $0x0af3 // vpextrb ebx, xmm6, 10 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_274 - -LBB0_753: - LONG $0x397d63c4; WORD $0x01e1 // vextracti128 xmm1, ymm12, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0a // vpextrb byte [r8 + rbx], xmm14, 10 - LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb ebx, xmm6, 11 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_754 - -LBB0_275: - LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb ebx, xmm6, 12 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_276 - -LBB0_755: - LONG $0x7ef961c4; BYTE $0xdb // vmovq rbx, xmm11 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0c // vpextrb byte [r8 + rbx], xmm14, 12 - LONG $0x1479e3c4; WORD $0x0df3 // vpextrb ebx, xmm6, 13 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_756 - -LBB0_277: - LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb ebx, xmm6, 14 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_278 - -LBB0_757: - LONG $0x397d63c4; WORD $0x01d9 // vextracti128 xmm1, ymm11, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0e // vpextrb byte [r8 + rbx], xmm14, 14 - LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb ebx, xmm6, 15 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_758 - -LBB0_279: - LONG $0x2c2444f6; BYTE $0x01 // test byte [rsp + 44], 1 - JE LBB0_280 - -LBB0_759: - LONG $0x7ef961c4; BYTE $0xd3 // vmovq rbx, xmm10 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x00 // vpextrb byte [r8 + rbx], xmm1, 0 - LONG $0x282444f6; BYTE $0x01 // test byte [rsp + 40], 1 - JNE LBB0_760 - -LBB0_281: - LONG $0x242444f6; BYTE $0x01 // test byte [rsp + 36], 1 - JE LBB0_282 - -LBB0_761: - LONG $0x397d63c4; WORD $0x01d1 // vextracti128 xmm1, ymm10, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x02 // vpextrb byte [r8 + rbx], xmm1, 2 - LONG $0x202444f6; BYTE $0x01 // test byte [rsp + 32], 1 - JNE LBB0_762 - -LBB0_283: - LONG $0x1c2444f6; BYTE $0x01 // test byte [rsp + 28], 1 - JE LBB0_284 - -LBB0_763: - LONG $0x7ef961c4; BYTE $0xcb // vmovq rbx, xmm9 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x04 // vpextrb byte [r8 + rbx], xmm1, 4 - LONG $0x182444f6; BYTE $0x01 // test byte [rsp + 24], 1 - JNE LBB0_764 - -LBB0_285: - LONG $0x142444f6; BYTE $0x01 // test byte [rsp + 20], 1 - JE LBB0_286 - -LBB0_765: - LONG $0x397d63c4; WORD $0x01c9 // vextracti128 xmm1, ymm9, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x06 // vpextrb byte [r8 + rbx], xmm1, 6 - LONG $0x01c1f641 // test r9b, 1 - JNE LBB0_766 - -LBB0_287: - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - QUAD $0x00000128249c8b48 // mov rbx, qword [rsp + 296] - JE LBB0_288 - -LBB0_767: - LONG $0x7ef961c4; BYTE $0xc2 // vmovq rdx, xmm8 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x100c; BYTE $0x08 // vpextrb byte [r8 + rdx], xmm1, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_768 - -LBB0_289: - LONG $0x01c6f640 // test sil, 1 - QUAD $0x0000013024948b48 // mov rdx, qword [rsp + 304] - JE LBB0_290 - -LBB0_769: - LONG $0x397d63c4; WORD $0x01c1 // vextracti128 xmm1, ymm8, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb byte [r8 + rcx], xmm1, 10 - WORD $0x01a8 // test al, 1 - QUAD $0x0000009824b48b48 // mov rsi, qword [rsp + 152] - JNE LBB0_770 - -LBB0_291: - LONG $0x01c5f641 // test r13b, 1 - JE LBB0_292 - -LBB0_771: - LONG $0x7ef9e1c4; BYTE $0xf9 // vmovq rcx, xmm7 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb byte [r8 + rcx], xmm1, 12 - LONG $0x01c2f641 // test r10b, 1 - QUAD $0x0000011824ac8b4c // mov r13, qword [rsp + 280] - JNE LBB0_772 - -LBB0_293: - LONG $0x01c3f641 // test r11b, 1 - JE LBB0_294 - -LBB0_773: - LONG $0x397de3c4; WORD $0x01f9 // vextracti128 xmm1, ymm7, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb byte [r8 + rcx], xmm1, 14 - LONG $0x01c6f641 // test r14b, 1 - QUAD $0x0000012024848b48 // mov rax, qword [rsp + 288] - QUAD $0x000000e8248c8b4c // mov r9, qword [rsp + 232] - JNE LBB0_295 - JMP LBB0_296 - -LBB0_224: - LONG $0x1479e3c4; WORD $0x01f1 // vpextrb ecx, xmm6, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_225 - -LBB0_720: - LONG $0x2009e3c4; WORD $0x370c; BYTE $0x01 // vpinsrb xmm1, xmm14, byte [rdi + rsi], 1 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x24548b48; BYTE $0x68 // mov rdx, qword [rsp + 104] - LONG $0x1479e3c4; WORD $0x02f1 // vpextrb ecx, xmm6, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_226 - JMP LBB0_227 - -LBB0_228: - LONG $0x1479e3c4; WORD $0x04f1 // vpextrb ecx, xmm6, 4 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_229 - -LBB0_722: - QUAD $0x00000108248c8b48 // mov rcx, qword [rsp + 264] - LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x04 // vpinsrb xmm1, xmm14, byte [rdi + rcx], 4 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x05f1 // vpextrb ecx, xmm6, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_723 - -LBB0_230: - LONG $0x1479e3c4; WORD $0x06f1 // vpextrb ecx, xmm6, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_231 - -LBB0_724: - LONG $0x2009a3c4; WORD $0x0f0c; BYTE $0x06 // vpinsrb xmm1, xmm14, byte [rdi + r9], 6 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x07f1 // vpextrb ecx, xmm6, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_725 - -LBB0_232: - LONG $0x1479e3c4; WORD $0x08f1 // vpextrb ecx, xmm6, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_233 - -LBB0_726: - LONG $0x2009e3c4; WORD $0x370c; BYTE $0x08 // vpinsrb xmm1, xmm14, byte [rdi + rsi], 8 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x24548b48; BYTE $0x58 // mov rdx, qword [rsp + 88] - LONG $0x1479e3c4; WORD $0x09f1 // vpextrb ecx, xmm6, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_727 - -LBB0_234: - LONG $0x1479e3c4; WORD $0x0af1 // vpextrb ecx, xmm6, 10 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_235 - -LBB0_728: - LONG $0x2009a3c4; WORD $0x1f0c; BYTE $0x0a // vpinsrb xmm1, xmm14, byte [rdi + r11], 10 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb ecx, xmm6, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_729 - -LBB0_236: - LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb ecx, xmm6, 12 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_237 - -LBB0_730: - QUAD $0x000000f824848b48 // mov rax, qword [rsp + 248] - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0c // vpinsrb xmm1, xmm14, byte [rdi + rax], 12 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0df1 // vpextrb ecx, xmm6, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_238 - JMP LBB0_239 - -LBB0_250: - LONG $0x1479e3c4; WORD $0x04c8 // vpextrb eax, xmm1, 4 - LONG $0x1c244489 // mov dword [rsp + 28], eax - WORD $0x01a8 // test al, 1 - JE LBB0_251 - -LBB0_732: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x0f14; BYTE $0x04 // vpinsrb xmm2, xmm2, byte [rdi + rcx], 4 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x05c8 // vpextrb eax, xmm1, 5 - LONG $0x18244489 // mov dword [rsp + 24], eax - WORD $0x01a8 // test al, 1 - JNE LBB0_733 - -LBB0_252: - LONG $0x1479e3c4; WORD $0x06c8 // vpextrb eax, xmm1, 6 - LONG $0x14244489 // mov dword [rsp + 20], eax - WORD $0x01a8 // test al, 1 - JE LBB0_253 - -LBB0_734: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x3714; BYTE $0x06 // vpinsrb xmm2, xmm2, byte [rdi + rsi], 6 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x07c9 // vpextrb r9d, xmm1, 7 - LONG $0x01c1f641 // test r9b, 1 - JNE LBB0_735 - -LBB0_254: - LONG $0x1479e3c4; WORD $0x08ca // vpextrb edx, xmm1, 8 - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - JE LBB0_255 - -LBB0_736: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000d824848b48 // mov rax, qword [rsp + 216] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x08 // vpinsrb xmm2, xmm2, byte [rdi + rax], 8 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x09c9 // vpextrb ecx, xmm1, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_737 - -LBB0_256: - LONG $0x1479e3c4; WORD $0x0ace // vpextrb esi, xmm1, 10 - LONG $0x01c6f640 // test sil, 1 - JE LBB0_257 - -LBB0_738: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000c824848b48 // mov rax, qword [rsp + 200] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x0a // vpinsrb xmm2, xmm2, byte [rdi + rax], 10 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb eax, xmm1, 11 - WORD $0x01a8 // test al, 1 - JNE LBB0_739 - -LBB0_258: - LONG $0x1479c3c4; WORD $0x0ccd // vpextrb r13d, xmm1, 12 - LONG $0x01c5f641 // test r13b, 1 - JE LBB0_259 - -LBB0_740: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000b8249c8b48 // mov rbx, qword [rsp + 184] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0c // vpinsrb xmm2, xmm2, byte [rdi + rbx], 12 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0dca // vpextrb r10d, xmm1, 13 - LONG $0x01c2f641 // test r10b, 1 - JNE LBB0_741 - -LBB0_260: - LONG $0x1479c3c4; WORD $0x0ecb // vpextrb r11d, xmm1, 14 - LONG $0x01c3f641 // test r11b, 1 - JE LBB0_261 - -LBB0_742: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000a8249c8b48 // mov rbx, qword [rsp + 168] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0e // vpinsrb xmm2, xmm2, byte [rdi + rbx], 14 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0fce // vpextrb r14d, xmm1, 15 - LONG $0x01c6f641 // test r14b, 1 - JNE LBB0_262 - JMP LBB0_263 - -LBB0_264: - LONG $0x1479e3c4; WORD $0x01f3 // vpextrb ebx, xmm6, 1 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_265 - -LBB0_744: - LONG $0x16f963c4; WORD $0x01fb // vpextrq rbx, xmm15, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x01 // vpextrb byte [r8 + rbx], xmm14, 1 - LONG $0x1479e3c4; WORD $0x02f3 // vpextrb ebx, xmm6, 2 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - QUAD $0x000000e024bc8b4c // mov r15, qword [rsp + 224] - JNE LBB0_745 - -LBB0_266: - LONG $0x1479e3c4; WORD $0x03f3 // vpextrb ebx, xmm6, 3 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_267 - -LBB0_746: - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x03 // vpextrb byte [r8 + rbx], xmm14, 3 - LONG $0x1479e3c4; WORD $0x04f3 // vpextrb ebx, xmm6, 4 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_747 - -LBB0_268: - LONG $0x1479e3c4; WORD $0x05f3 // vpextrb ebx, xmm6, 5 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_269 - -LBB0_748: - LONG $0x16f9e3c4; WORD $0x01eb // vpextrq rbx, xmm5, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x05 // vpextrb byte [r8 + rbx], xmm14, 5 - LONG $0x1479e3c4; WORD $0x06f3 // vpextrb ebx, xmm6, 6 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_749 - -LBB0_270: - LONG $0x1479e3c4; WORD $0x07f3 // vpextrb ebx, xmm6, 7 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_271 - -LBB0_750: - LONG $0x397de3c4; WORD $0x01e9 // vextracti128 xmm1, ymm5, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x07 // vpextrb byte [r8 + rbx], xmm14, 7 - LONG $0x1479e3c4; WORD $0x08f3 // vpextrb ebx, xmm6, 8 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_751 - -LBB0_272: - LONG $0x1479e3c4; WORD $0x09f3 // vpextrb ebx, xmm6, 9 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_273 - -LBB0_752: - LONG $0x16f963c4; WORD $0x01e3 // vpextrq rbx, xmm12, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x09 // vpextrb byte [r8 + rbx], xmm14, 9 - LONG $0x1479e3c4; WORD $0x0af3 // vpextrb ebx, xmm6, 10 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_753 - -LBB0_274: - LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb ebx, xmm6, 11 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_275 - -LBB0_754: - LONG $0x397d63c4; WORD $0x01e1 // vextracti128 xmm1, ymm12, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0b // vpextrb byte [r8 + rbx], xmm14, 11 - LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb ebx, xmm6, 12 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_755 - -LBB0_276: - LONG $0x1479e3c4; WORD $0x0df3 // vpextrb ebx, xmm6, 13 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_277 - -LBB0_756: - LONG $0x16f963c4; WORD $0x01db // vpextrq rbx, xmm11, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0d // vpextrb byte [r8 + rbx], xmm14, 13 - LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb ebx, xmm6, 14 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_757 - -LBB0_278: - LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb ebx, xmm6, 15 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_279 - -LBB0_758: - LONG $0x397d63c4; WORD $0x01d9 // vextracti128 xmm1, ymm11, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0f // vpextrb byte [r8 + rbx], xmm14, 15 - LONG $0x2c2444f6; BYTE $0x01 // test byte [rsp + 44], 1 - JNE LBB0_759 - -LBB0_280: - LONG $0x282444f6; BYTE $0x01 // test byte [rsp + 40], 1 - JE LBB0_281 - -LBB0_760: - LONG $0x16f963c4; WORD $0x01d3 // vpextrq rbx, xmm10, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x01 // vpextrb byte [r8 + rbx], xmm1, 1 - LONG $0x242444f6; BYTE $0x01 // test byte [rsp + 36], 1 - JNE LBB0_761 - -LBB0_282: - LONG $0x202444f6; BYTE $0x01 // test byte [rsp + 32], 1 - JE LBB0_283 - -LBB0_762: - LONG $0x397d63c4; WORD $0x01d1 // vextracti128 xmm1, ymm10, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x03 // vpextrb byte [r8 + rbx], xmm1, 3 - LONG $0x1c2444f6; BYTE $0x01 // test byte [rsp + 28], 1 - JNE LBB0_763 - -LBB0_284: - LONG $0x182444f6; BYTE $0x01 // test byte [rsp + 24], 1 - JE LBB0_285 - -LBB0_764: - LONG $0x16f963c4; WORD $0x01cb // vpextrq rbx, xmm9, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x05 // vpextrb byte [r8 + rbx], xmm1, 5 - LONG $0x142444f6; BYTE $0x01 // test byte [rsp + 20], 1 - JNE LBB0_765 - -LBB0_286: - LONG $0x01c1f641 // test r9b, 1 - JE LBB0_287 - -LBB0_766: - LONG $0x397d63c4; WORD $0x01c9 // vextracti128 xmm1, ymm9, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x07 // vpextrb byte [r8 + rbx], xmm1, 7 - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - QUAD $0x00000128249c8b48 // mov rbx, qword [rsp + 296] - JNE LBB0_767 - -LBB0_288: - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_289 - -LBB0_768: - LONG $0x16f963c4; WORD $0x01c1 // vpextrq rcx, xmm8, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb byte [r8 + rcx], xmm1, 9 - LONG $0x01c6f640 // test sil, 1 - QUAD $0x0000013024948b48 // mov rdx, qword [rsp + 304] - JNE LBB0_769 - -LBB0_290: - WORD $0x01a8 // test al, 1 - QUAD $0x0000009824b48b48 // mov rsi, qword [rsp + 152] - JE LBB0_291 - -LBB0_770: - LONG $0x397d63c4; WORD $0x01c1 // vextracti128 xmm1, ymm8, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb byte [r8 + rcx], xmm1, 11 - LONG $0x01c5f641 // test r13b, 1 - JNE LBB0_771 - -LBB0_292: - LONG $0x01c2f641 // test r10b, 1 - QUAD $0x0000011824ac8b4c // mov r13, qword [rsp + 280] - JE LBB0_293 - -LBB0_772: - LONG $0x16f9e3c4; WORD $0x01f9 // vpextrq rcx, xmm7, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb byte [r8 + rcx], xmm1, 13 - LONG $0x01c3f641 // test r11b, 1 - JNE LBB0_773 - -LBB0_294: - LONG $0x01c6f641 // test r14b, 1 - QUAD $0x0000012024848b48 // mov rax, qword [rsp + 288] - QUAD $0x000000e8248c8b4c // mov r9, qword [rsp + 232] - JE LBB0_296 - -LBB0_295: - LONG $0x397de3c4; WORD $0x01f9 // vextracti128 xmm1, ymm7, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb byte [r8 + rcx], xmm1, 15 - -LBB0_296: - QUAD $0x0002c0248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 704] - QUAD $0x00020024bceb75c5; BYTE $0x00 // vpor ymm15, ymm1, yword [rsp + 512] - QUAD $0x0001e024acebf5c5; BYTE $0x00 // vpor ymm5, ymm1, yword [rsp + 480] - QUAD $0x0001802494eb75c5; BYTE $0x00 // vpor ymm10, ymm1, yword [rsp + 384] - QUAD $0x000160248ceb75c5; BYTE $0x00 // vpor ymm9, ymm1, yword [rsp + 352] - QUAD $0x0001c024a4eb75c5; BYTE $0x00 // vpor ymm12, ymm1, yword [rsp + 448] - QUAD $0x0001a0249ceb75c5; BYTE $0x00 // vpor ymm11, ymm1, yword [rsp + 416] - QUAD $0x0001402484eb75c5; BYTE $0x00 // vpor ymm8, ymm1, yword [rsp + 320] - LONG $0xf9ebddc5 // vpor ymm7, ymm4, ymm1 - LONG $0x463de3c4; WORD $0x31cf // vperm2i128 ymm1, ymm8, ymm7, 49 - LONG $0x383de3c4; WORD $0x01d7 // vinserti128 ymm2, ymm8, xmm7, 1 - LONG $0xc9c6ecc5; BYTE $0x88 // vshufps ymm1, ymm2, ymm1, 136 - LONG $0x461dc3c4; WORD $0x31d3 // vperm2i128 ymm2, ymm12, ymm11, 49 - LONG $0x381dc3c4; WORD $0x01db // vinserti128 ymm3, ymm12, xmm11, 1 - LONG $0xd2c6e4c5; BYTE $0x88 // vshufps ymm2, ymm3, ymm2, 136 - LONG $0x462dc3c4; WORD $0x31d9 // vperm2i128 ymm3, ymm10, ymm9, 49 - LONG $0x382d43c4; WORD $0x01e9 // vinserti128 ymm13, ymm10, xmm9, 1 - LONG $0xdbc694c5; BYTE $0x88 // vshufps ymm3, ymm13, ymm3, 136 - LONG $0x460563c4; WORD $0x31ed // vperm2i128 ymm13, ymm15, ymm5, 49 - LONG $0x380563c4; WORD $0x01f5 // vinserti128 ymm14, ymm15, xmm5, 1 - LONG $0xc60c41c4; WORD $0x88ed // vshufps ymm13, ymm14, ymm13, 136 - LONG $0x667d41c4; BYTE $0xed // vpcmpgtd ymm13, ymm0, ymm13 - LONG $0xdb66fdc5 // vpcmpgtd ymm3, ymm0, ymm3 - LONG $0xdb6b95c5 // vpackssdw ymm3, ymm13, ymm3 - LONG $0xd266fdc5 // vpcmpgtd ymm2, ymm0, ymm2 - LONG $0xc966fdc5 // vpcmpgtd ymm1, ymm0, ymm1 - LONG $0xc96bedc5 // vpackssdw ymm1, ymm2, ymm1 - LONG $0x00fde3c4; WORD $0xd8d3 // vpermq ymm2, ymm3, 216 - LONG $0x00fde3c4; WORD $0xd8c9 // vpermq ymm1, ymm1, 216 - LONG $0xc963edc5 // vpacksswb ymm1, ymm2, ymm1 - LONG $0xf6dbf5c5 // vpand ymm6, ymm1, ymm6 - LONG $0xf17ef9c5 // vmovd ecx, xmm6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_297 - LONG $0x787d62c4; WORD $0x1734 // vpbroadcastb ymm14, byte [rdi + rdx] - LONG $0x1479e3c4; WORD $0x01f1 // vpextrb ecx, xmm6, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_775 - -LBB0_298: - LONG $0x24548b48; BYTE $0x68 // mov rdx, qword [rsp + 104] - LONG $0x1479e3c4; WORD $0x02f1 // vpextrb ecx, xmm6, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_300 - -LBB0_299: - LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x02 // vpinsrb xmm1, xmm14, byte [rdi + rbx], 2 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_300: - LONG $0x24748b48; BYTE $0x60 // mov rsi, qword [rsp + 96] - LONG $0x24548b4c; BYTE $0x48 // mov r10, qword [rsp + 72] - LONG $0x1479e3c4; WORD $0x03f1 // vpextrb ecx, xmm6, 3 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_301 - QUAD $0x00000110248c8b48 // mov rcx, qword [rsp + 272] - LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb xmm1, xmm14, byte [rdi + rcx], 3 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x04f1 // vpextrb ecx, xmm6, 4 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_777 - -LBB0_302: - LONG $0x1479e3c4; WORD $0x05f1 // vpextrb ecx, xmm6, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_303 - -LBB0_778: - LONG $0x2009e3c4; WORD $0x170c; BYTE $0x05 // vpinsrb xmm1, xmm14, byte [rdi + rdx], 5 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x06f1 // vpextrb ecx, xmm6, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_779 - -LBB0_304: - LONG $0x1479e3c4; WORD $0x07f1 // vpextrb ecx, xmm6, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_305 - -LBB0_780: - LONG $0x2009a3c4; WORD $0x0f0c; BYTE $0x07 // vpinsrb xmm1, xmm14, byte [rdi + r9], 7 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x08f1 // vpextrb ecx, xmm6, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_781 - -LBB0_306: - LONG $0x24548b48; BYTE $0x58 // mov rdx, qword [rsp + 88] - LONG $0x1479e3c4; WORD $0x09f1 // vpextrb ecx, xmm6, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_308 - -LBB0_307: - LONG $0x2009a3c4; WORD $0x3f0c; BYTE $0x09 // vpinsrb xmm1, xmm14, byte [rdi + r15], 9 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_308: - QUAD $0x0000009024848b48 // mov rax, qword [rsp + 144] - QUAD $0x0000008824b48b48 // mov rsi, qword [rsp + 136] - QUAD $0x00000080249c8b48 // mov rbx, qword [rsp + 128] - LONG $0x244c8b4c; BYTE $0x78 // mov r9, qword [rsp + 120] - LONG $0x1479e3c4; WORD $0x0af1 // vpextrb ecx, xmm6, 10 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_309 - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb xmm1, xmm14, byte [rdi + rax], 10 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb ecx, xmm6, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_783 - -LBB0_310: - LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb ecx, xmm6, 12 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_311 - -LBB0_784: - QUAD $0x000000f824848b48 // mov rax, qword [rsp + 248] - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0c // vpinsrb xmm1, xmm14, byte [rdi + rax], 12 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0df1 // vpextrb ecx, xmm6, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_312 - JMP LBB0_313 - -LBB0_297: - LONG $0x1479e3c4; WORD $0x01f1 // vpextrb ecx, xmm6, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_298 - -LBB0_775: - LONG $0x2009e3c4; WORD $0x370c; BYTE $0x01 // vpinsrb xmm1, xmm14, byte [rdi + rsi], 1 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x24548b48; BYTE $0x68 // mov rdx, qword [rsp + 104] - LONG $0x1479e3c4; WORD $0x02f1 // vpextrb ecx, xmm6, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_299 - JMP LBB0_300 - -LBB0_301: - LONG $0x1479e3c4; WORD $0x04f1 // vpextrb ecx, xmm6, 4 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_302 - -LBB0_777: - QUAD $0x00000108248c8b48 // mov rcx, qword [rsp + 264] - LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x04 // vpinsrb xmm1, xmm14, byte [rdi + rcx], 4 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x05f1 // vpextrb ecx, xmm6, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_778 - -LBB0_303: - LONG $0x1479e3c4; WORD $0x06f1 // vpextrb ecx, xmm6, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_304 - -LBB0_779: - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x06 // vpinsrb xmm1, xmm14, byte [rdi + rax], 6 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x07f1 // vpextrb ecx, xmm6, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_780 - -LBB0_305: - LONG $0x1479e3c4; WORD $0x08f1 // vpextrb ecx, xmm6, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_306 - -LBB0_781: - LONG $0x2009e3c4; WORD $0x370c; BYTE $0x08 // vpinsrb xmm1, xmm14, byte [rdi + rsi], 8 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x24548b48; BYTE $0x58 // mov rdx, qword [rsp + 88] - LONG $0x1479e3c4; WORD $0x09f1 // vpextrb ecx, xmm6, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_307 - JMP LBB0_308 - -LBB0_309: - LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb ecx, xmm6, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_310 - -LBB0_783: - QUAD $0x0000010024848b48 // mov rax, qword [rsp + 256] - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0b // vpinsrb xmm1, xmm14, byte [rdi + rax], 11 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb ecx, xmm6, 12 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_784 - -LBB0_311: - LONG $0x1479e3c4; WORD $0x0df1 // vpextrb ecx, xmm6, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_313 - -LBB0_312: - LONG $0x2009e3c4; WORD $0x170c; BYTE $0x0d // vpinsrb xmm1, xmm14, byte [rdi + rdx], 13 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_313: - LONG $0x24448b48; BYTE $0x50 // mov rax, qword [rsp + 80] - LONG $0x24548b48; BYTE $0x40 // mov rdx, qword [rsp + 64] - LONG $0x1479e3c4; WORD $0x0ef1 // vpextrb ecx, xmm6, 14 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_315 - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0e // vpinsrb xmm1, xmm14, byte [rdi + rax], 14 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_315: - LONG $0x1479e3c4; WORD $0x0ff1 // vpextrb ecx, xmm6, 15 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_317 - LONG $0x2009a3c4; WORD $0x170c; BYTE $0x0f // vpinsrb xmm1, xmm14, byte [rdi + r10], 15 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_317: - LONG $0x397de3c4; WORD $0x01f1 // vextracti128 xmm1, ymm6, 1 - LONG $0xc87ef9c5 // vmovd eax, xmm1 - LONG $0x2c244489 // mov dword [rsp + 44], eax - WORD $0x01a8 // test al, 1 - JE LBB0_319 - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x1714; BYTE $0x00 // vpinsrb xmm2, xmm2, byte [rdi + rdx], 0 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - -LBB0_319: - LONG $0x24448b48; BYTE $0x38 // mov rax, qword [rsp + 56] - LONG $0x1479e3c4; WORD $0x01c9 // vpextrb ecx, xmm1, 1 - LONG $0x28244c89 // mov dword [rsp + 40], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_320 - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x3714; BYTE $0x01 // vpinsrb xmm2, xmm2, byte [rdi + rsi], 1 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x02c9 // vpextrb ecx, xmm1, 2 - LONG $0x24244c89 // mov dword [rsp + 36], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_786 - -LBB0_321: - LONG $0x1479e3c4; WORD $0x03c9 // vpextrb ecx, xmm1, 3 - LONG $0x20244c89 // mov dword [rsp + 32], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_322 - -LBB0_787: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069a3c4; WORD $0x0f14; BYTE $0x03 // vpinsrb xmm2, xmm2, byte [rdi + r9], 3 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x04c9 // vpextrb ecx, xmm1, 4 - LONG $0x1c244c89 // mov dword [rsp + 28], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_788 - -LBB0_323: - LONG $0x1479e3c4; WORD $0x05c8 // vpextrb eax, xmm1, 5 - LONG $0x18244489 // mov dword [rsp + 24], eax - WORD $0x01a8 // test al, 1 - JE LBB0_325 - -LBB0_324: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069a3c4; WORD $0x2f14; BYTE $0x05 // vpinsrb xmm2, xmm2, byte [rdi + r13], 5 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - -LBB0_325: - LONG $0x24448b48; BYTE $0x70 // mov rax, qword [rsp + 112] - LONG $0x1479e3c4; WORD $0x06c9 // vpextrb ecx, xmm1, 6 - LONG $0x14244c89 // mov dword [rsp + 20], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_326 - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x06 // vpinsrb xmm2, xmm2, byte [rdi + rax], 6 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x07c9 // vpextrb r9d, xmm1, 7 - LONG $0x01c1f641 // test r9b, 1 - JNE LBB0_790 - -LBB0_327: - LONG $0x1479e3c4; WORD $0x08ca // vpextrb edx, xmm1, 8 - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - JE LBB0_328 - -LBB0_791: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000d824848b48 // mov rax, qword [rsp + 216] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x08 // vpinsrb xmm2, xmm2, byte [rdi + rax], 8 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x09c9 // vpextrb ecx, xmm1, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_792 - -LBB0_329: - LONG $0x1479e3c4; WORD $0x0ace // vpextrb esi, xmm1, 10 - LONG $0x01c6f640 // test sil, 1 - JE LBB0_330 - -LBB0_793: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000c824848b48 // mov rax, qword [rsp + 200] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x0a // vpinsrb xmm2, xmm2, byte [rdi + rax], 10 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb eax, xmm1, 11 - WORD $0x01a8 // test al, 1 - JNE LBB0_794 - -LBB0_331: - LONG $0x1479c3c4; WORD $0x0ccd // vpextrb r13d, xmm1, 12 - LONG $0x01c5f641 // test r13b, 1 - JE LBB0_332 - -LBB0_795: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000b8249c8b48 // mov rbx, qword [rsp + 184] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0c // vpinsrb xmm2, xmm2, byte [rdi + rbx], 12 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0dca // vpextrb r10d, xmm1, 13 - LONG $0x01c2f641 // test r10b, 1 - JNE LBB0_796 - -LBB0_333: - LONG $0x1479c3c4; WORD $0x0ecb // vpextrb r11d, xmm1, 14 - LONG $0x01c3f641 // test r11b, 1 - JE LBB0_334 - -LBB0_797: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000a8249c8b48 // mov rbx, qword [rsp + 168] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0e // vpinsrb xmm2, xmm2, byte [rdi + rbx], 14 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0fce // vpextrb r14d, xmm1, 15 - LONG $0x01c6f641 // test r14b, 1 - JNE LBB0_335 - JMP LBB0_336 - -LBB0_320: - LONG $0x1479e3c4; WORD $0x02c9 // vpextrb ecx, xmm1, 2 - LONG $0x24244c89 // mov dword [rsp + 36], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_321 - -LBB0_786: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x02 // vpinsrb xmm2, xmm2, byte [rdi + rbx], 2 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x03c9 // vpextrb ecx, xmm1, 3 - LONG $0x20244c89 // mov dword [rsp + 32], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_787 - -LBB0_322: - LONG $0x1479e3c4; WORD $0x04c9 // vpextrb ecx, xmm1, 4 - LONG $0x1c244c89 // mov dword [rsp + 28], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_323 - -LBB0_788: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x04 // vpinsrb xmm2, xmm2, byte [rdi + rax], 4 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x05c8 // vpextrb eax, xmm1, 5 - LONG $0x18244489 // mov dword [rsp + 24], eax - WORD $0x01a8 // test al, 1 - JNE LBB0_324 - JMP LBB0_325 - -LBB0_326: - LONG $0x1479c3c4; WORD $0x07c9 // vpextrb r9d, xmm1, 7 - LONG $0x01c1f641 // test r9b, 1 - JE LBB0_327 - -LBB0_790: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000f024848b48 // mov rax, qword [rsp + 240] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x07 // vpinsrb xmm2, xmm2, byte [rdi + rax], 7 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x08ca // vpextrb edx, xmm1, 8 - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - JNE LBB0_791 - -LBB0_328: - LONG $0x1479e3c4; WORD $0x09c9 // vpextrb ecx, xmm1, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_329 - -LBB0_792: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000d024848b48 // mov rax, qword [rsp + 208] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x09 // vpinsrb xmm2, xmm2, byte [rdi + rax], 9 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x0ace // vpextrb esi, xmm1, 10 - LONG $0x01c6f640 // test sil, 1 - JNE LBB0_793 - -LBB0_330: - LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb eax, xmm1, 11 - WORD $0x01a8 // test al, 1 - JE LBB0_331 - -LBB0_794: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000c0249c8b48 // mov rbx, qword [rsp + 192] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0b // vpinsrb xmm2, xmm2, byte [rdi + rbx], 11 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0ccd // vpextrb r13d, xmm1, 12 - LONG $0x01c5f641 // test r13b, 1 - JNE LBB0_795 - -LBB0_332: - LONG $0x1479c3c4; WORD $0x0dca // vpextrb r10d, xmm1, 13 - LONG $0x01c2f641 // test r10b, 1 - JE LBB0_333 - -LBB0_796: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000b0249c8b48 // mov rbx, qword [rsp + 176] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0d // vpinsrb xmm2, xmm2, byte [rdi + rbx], 13 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0ecb // vpextrb r11d, xmm1, 14 - LONG $0x01c3f641 // test r11b, 1 - JNE LBB0_797 - -LBB0_334: - LONG $0x1479c3c4; WORD $0x0fce // vpextrb r14d, xmm1, 15 - LONG $0x01c6f641 // test r14b, 1 - JE LBB0_336 - -LBB0_335: - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - QUAD $0x000000a0249c8b48 // mov rbx, qword [rsp + 160] - LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x0f // vpinsrb xmm1, xmm1, byte [rdi + rbx], 15 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - -LBB0_336: - LONG $0x7175c1c4; WORD $0x03d6 // vpsrlw ymm1, ymm14, 3 - QUAD $0x00000080b5db75c5 // vpand ymm14, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */ - LONG $0x7e79c1c4; BYTE $0xf7 // vmovd r15d, xmm6 - LONG $0x01c7f641 // test r15b, 1 - JE LBB0_337 - LONG $0x7ef961c4; BYTE $0xfb // vmovq rbx, xmm15 - LONG $0x147943c4; WORD $0x1834; BYTE $0x00 // vpextrb byte [r8 + rbx], xmm14, 0 - LONG $0x1479e3c4; WORD $0x01f3 // vpextrb ebx, xmm6, 1 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_799 - -LBB0_338: - LONG $0x1479e3c4; WORD $0x02f3 // vpextrb ebx, xmm6, 2 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - QUAD $0x000000e024bc8b4c // mov r15, qword [rsp + 224] - JE LBB0_339 - -LBB0_800: - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x02 // vpextrb byte [r8 + rbx], xmm14, 2 - LONG $0x1479e3c4; WORD $0x03f3 // vpextrb ebx, xmm6, 3 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_801 - -LBB0_340: - LONG $0x1479e3c4; WORD $0x04f3 // vpextrb ebx, xmm6, 4 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_341 - -LBB0_802: - LONG $0x7ef9e1c4; BYTE $0xeb // vmovq rbx, xmm5 - LONG $0x147943c4; WORD $0x1834; BYTE $0x04 // vpextrb byte [r8 + rbx], xmm14, 4 - LONG $0x1479e3c4; WORD $0x05f3 // vpextrb ebx, xmm6, 5 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_803 - -LBB0_342: - LONG $0x1479e3c4; WORD $0x06f3 // vpextrb ebx, xmm6, 6 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_343 - -LBB0_804: - LONG $0x397de3c4; WORD $0x01e9 // vextracti128 xmm1, ymm5, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x06 // vpextrb byte [r8 + rbx], xmm14, 6 - LONG $0x1479e3c4; WORD $0x07f3 // vpextrb ebx, xmm6, 7 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_805 - -LBB0_344: - LONG $0x1479e3c4; WORD $0x08f3 // vpextrb ebx, xmm6, 8 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_345 - -LBB0_806: - LONG $0x7ef961c4; BYTE $0xe3 // vmovq rbx, xmm12 - LONG $0x147943c4; WORD $0x1834; BYTE $0x08 // vpextrb byte [r8 + rbx], xmm14, 8 - LONG $0x1479e3c4; WORD $0x09f3 // vpextrb ebx, xmm6, 9 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_807 - -LBB0_346: - LONG $0x1479e3c4; WORD $0x0af3 // vpextrb ebx, xmm6, 10 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_347 - -LBB0_808: - LONG $0x397d63c4; WORD $0x01e1 // vextracti128 xmm1, ymm12, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0a // vpextrb byte [r8 + rbx], xmm14, 10 - LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb ebx, xmm6, 11 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_809 - -LBB0_348: - LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb ebx, xmm6, 12 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_349 - -LBB0_810: - LONG $0x7ef961c4; BYTE $0xdb // vmovq rbx, xmm11 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0c // vpextrb byte [r8 + rbx], xmm14, 12 - LONG $0x1479e3c4; WORD $0x0df3 // vpextrb ebx, xmm6, 13 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_811 - -LBB0_350: - LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb ebx, xmm6, 14 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_351 - -LBB0_812: - LONG $0x397d63c4; WORD $0x01d9 // vextracti128 xmm1, ymm11, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0e // vpextrb byte [r8 + rbx], xmm14, 14 - LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb ebx, xmm6, 15 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_813 - -LBB0_352: - LONG $0x2c2444f6; BYTE $0x01 // test byte [rsp + 44], 1 - JE LBB0_353 - -LBB0_814: - LONG $0x7ef961c4; BYTE $0xd3 // vmovq rbx, xmm10 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x00 // vpextrb byte [r8 + rbx], xmm1, 0 - LONG $0x282444f6; BYTE $0x01 // test byte [rsp + 40], 1 - JNE LBB0_815 - -LBB0_354: - LONG $0x242444f6; BYTE $0x01 // test byte [rsp + 36], 1 - JE LBB0_355 - -LBB0_816: - LONG $0x397d63c4; WORD $0x01d1 // vextracti128 xmm1, ymm10, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x02 // vpextrb byte [r8 + rbx], xmm1, 2 - LONG $0x202444f6; BYTE $0x01 // test byte [rsp + 32], 1 - JNE LBB0_817 - -LBB0_356: - LONG $0x1c2444f6; BYTE $0x01 // test byte [rsp + 28], 1 - JE LBB0_357 - -LBB0_818: - LONG $0x7ef961c4; BYTE $0xcb // vmovq rbx, xmm9 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x04 // vpextrb byte [r8 + rbx], xmm1, 4 - LONG $0x182444f6; BYTE $0x01 // test byte [rsp + 24], 1 - JNE LBB0_819 - -LBB0_358: - LONG $0x142444f6; BYTE $0x01 // test byte [rsp + 20], 1 - JE LBB0_359 - -LBB0_820: - LONG $0x397d63c4; WORD $0x01c9 // vextracti128 xmm1, ymm9, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x06 // vpextrb byte [r8 + rbx], xmm1, 6 - LONG $0x01c1f641 // test r9b, 1 - JNE LBB0_821 - -LBB0_360: - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - QUAD $0x00000128249c8b48 // mov rbx, qword [rsp + 296] - JE LBB0_361 - -LBB0_822: - LONG $0x7ef961c4; BYTE $0xc2 // vmovq rdx, xmm8 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x100c; BYTE $0x08 // vpextrb byte [r8 + rdx], xmm1, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_823 - -LBB0_362: - LONG $0x01c6f640 // test sil, 1 - QUAD $0x0000013024948b48 // mov rdx, qword [rsp + 304] - JE LBB0_363 - -LBB0_824: - LONG $0x397d63c4; WORD $0x01c1 // vextracti128 xmm1, ymm8, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb byte [r8 + rcx], xmm1, 10 - WORD $0x01a8 // test al, 1 - QUAD $0x0000009824b48b48 // mov rsi, qword [rsp + 152] - JNE LBB0_825 - -LBB0_364: - LONG $0x01c5f641 // test r13b, 1 - JE LBB0_365 - -LBB0_826: - LONG $0x7ef9e1c4; BYTE $0xf9 // vmovq rcx, xmm7 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb byte [r8 + rcx], xmm1, 12 - LONG $0x01c2f641 // test r10b, 1 - QUAD $0x0000011824ac8b4c // mov r13, qword [rsp + 280] - JNE LBB0_827 - -LBB0_366: - LONG $0x01c3f641 // test r11b, 1 - JE LBB0_367 - -LBB0_828: - LONG $0x397de3c4; WORD $0x01f9 // vextracti128 xmm1, ymm7, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb byte [r8 + rcx], xmm1, 14 - LONG $0x01c6f641 // test r14b, 1 - QUAD $0x0000012024848b48 // mov rax, qword [rsp + 288] - QUAD $0x000000e8248c8b4c // mov r9, qword [rsp + 232] - JNE LBB0_368 - JMP LBB0_369 - -LBB0_337: - LONG $0x1479e3c4; WORD $0x01f3 // vpextrb ebx, xmm6, 1 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_338 - -LBB0_799: - LONG $0x16f963c4; WORD $0x01fb // vpextrq rbx, xmm15, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x01 // vpextrb byte [r8 + rbx], xmm14, 1 - LONG $0x1479e3c4; WORD $0x02f3 // vpextrb ebx, xmm6, 2 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - QUAD $0x000000e024bc8b4c // mov r15, qword [rsp + 224] - JNE LBB0_800 - -LBB0_339: - LONG $0x1479e3c4; WORD $0x03f3 // vpextrb ebx, xmm6, 3 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_340 - -LBB0_801: - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x03 // vpextrb byte [r8 + rbx], xmm14, 3 - LONG $0x1479e3c4; WORD $0x04f3 // vpextrb ebx, xmm6, 4 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_802 - -LBB0_341: - LONG $0x1479e3c4; WORD $0x05f3 // vpextrb ebx, xmm6, 5 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_342 - -LBB0_803: - LONG $0x16f9e3c4; WORD $0x01eb // vpextrq rbx, xmm5, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x05 // vpextrb byte [r8 + rbx], xmm14, 5 - LONG $0x1479e3c4; WORD $0x06f3 // vpextrb ebx, xmm6, 6 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_804 - -LBB0_343: - LONG $0x1479e3c4; WORD $0x07f3 // vpextrb ebx, xmm6, 7 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_344 - -LBB0_805: - LONG $0x397de3c4; WORD $0x01e9 // vextracti128 xmm1, ymm5, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x07 // vpextrb byte [r8 + rbx], xmm14, 7 - LONG $0x1479e3c4; WORD $0x08f3 // vpextrb ebx, xmm6, 8 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_806 - -LBB0_345: - LONG $0x1479e3c4; WORD $0x09f3 // vpextrb ebx, xmm6, 9 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_346 - -LBB0_807: - LONG $0x16f963c4; WORD $0x01e3 // vpextrq rbx, xmm12, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x09 // vpextrb byte [r8 + rbx], xmm14, 9 - LONG $0x1479e3c4; WORD $0x0af3 // vpextrb ebx, xmm6, 10 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_808 - -LBB0_347: - LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb ebx, xmm6, 11 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_348 - -LBB0_809: - LONG $0x397d63c4; WORD $0x01e1 // vextracti128 xmm1, ymm12, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0b // vpextrb byte [r8 + rbx], xmm14, 11 - LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb ebx, xmm6, 12 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_810 - -LBB0_349: - LONG $0x1479e3c4; WORD $0x0df3 // vpextrb ebx, xmm6, 13 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_350 - -LBB0_811: - LONG $0x16f963c4; WORD $0x01db // vpextrq rbx, xmm11, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0d // vpextrb byte [r8 + rbx], xmm14, 13 - LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb ebx, xmm6, 14 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_812 - -LBB0_351: - LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb ebx, xmm6, 15 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_352 - -LBB0_813: - LONG $0x397d63c4; WORD $0x01d9 // vextracti128 xmm1, ymm11, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0f // vpextrb byte [r8 + rbx], xmm14, 15 - LONG $0x2c2444f6; BYTE $0x01 // test byte [rsp + 44], 1 - JNE LBB0_814 - -LBB0_353: - LONG $0x282444f6; BYTE $0x01 // test byte [rsp + 40], 1 - JE LBB0_354 - -LBB0_815: - LONG $0x16f963c4; WORD $0x01d3 // vpextrq rbx, xmm10, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x01 // vpextrb byte [r8 + rbx], xmm1, 1 - LONG $0x242444f6; BYTE $0x01 // test byte [rsp + 36], 1 - JNE LBB0_816 - -LBB0_355: - LONG $0x202444f6; BYTE $0x01 // test byte [rsp + 32], 1 - JE LBB0_356 - -LBB0_817: - LONG $0x397d63c4; WORD $0x01d1 // vextracti128 xmm1, ymm10, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x03 // vpextrb byte [r8 + rbx], xmm1, 3 - LONG $0x1c2444f6; BYTE $0x01 // test byte [rsp + 28], 1 - JNE LBB0_818 - -LBB0_357: - LONG $0x182444f6; BYTE $0x01 // test byte [rsp + 24], 1 - JE LBB0_358 - -LBB0_819: - LONG $0x16f963c4; WORD $0x01cb // vpextrq rbx, xmm9, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x05 // vpextrb byte [r8 + rbx], xmm1, 5 - LONG $0x142444f6; BYTE $0x01 // test byte [rsp + 20], 1 - JNE LBB0_820 - -LBB0_359: - LONG $0x01c1f641 // test r9b, 1 - JE LBB0_360 - -LBB0_821: - LONG $0x397d63c4; WORD $0x01c9 // vextracti128 xmm1, ymm9, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x07 // vpextrb byte [r8 + rbx], xmm1, 7 - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - QUAD $0x00000128249c8b48 // mov rbx, qword [rsp + 296] - JNE LBB0_822 - -LBB0_361: - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_362 - -LBB0_823: - LONG $0x16f963c4; WORD $0x01c1 // vpextrq rcx, xmm8, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb byte [r8 + rcx], xmm1, 9 - LONG $0x01c6f640 // test sil, 1 - QUAD $0x0000013024948b48 // mov rdx, qword [rsp + 304] - JNE LBB0_824 - -LBB0_363: - WORD $0x01a8 // test al, 1 - QUAD $0x0000009824b48b48 // mov rsi, qword [rsp + 152] - JE LBB0_364 - -LBB0_825: - LONG $0x397d63c4; WORD $0x01c1 // vextracti128 xmm1, ymm8, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb byte [r8 + rcx], xmm1, 11 - LONG $0x01c5f641 // test r13b, 1 - JNE LBB0_826 - -LBB0_365: - LONG $0x01c2f641 // test r10b, 1 - QUAD $0x0000011824ac8b4c // mov r13, qword [rsp + 280] - JE LBB0_366 - -LBB0_827: - LONG $0x16f9e3c4; WORD $0x01f9 // vpextrq rcx, xmm7, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb byte [r8 + rcx], xmm1, 13 - LONG $0x01c3f641 // test r11b, 1 - JNE LBB0_828 - -LBB0_367: - LONG $0x01c6f641 // test r14b, 1 - QUAD $0x0000012024848b48 // mov rax, qword [rsp + 288] - QUAD $0x000000e8248c8b4c // mov r9, qword [rsp + 232] - JE LBB0_369 - -LBB0_368: - LONG $0x397de3c4; WORD $0x01f9 // vextracti128 xmm1, ymm7, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb byte [r8 + rcx], xmm1, 15 - -LBB0_369: - QUAD $0x0002a0248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 672] - QUAD $0x00020024bceb75c5; BYTE $0x00 // vpor ymm15, ymm1, yword [rsp + 512] - QUAD $0x0001e024acebf5c5; BYTE $0x00 // vpor ymm5, ymm1, yword [rsp + 480] - QUAD $0x0001802494eb75c5; BYTE $0x00 // vpor ymm10, ymm1, yword [rsp + 384] - QUAD $0x000160248ceb75c5; BYTE $0x00 // vpor ymm9, ymm1, yword [rsp + 352] - QUAD $0x0001c024a4eb75c5; BYTE $0x00 // vpor ymm12, ymm1, yword [rsp + 448] - QUAD $0x0001a0249ceb75c5; BYTE $0x00 // vpor ymm11, ymm1, yword [rsp + 416] - QUAD $0x0001402484eb75c5; BYTE $0x00 // vpor ymm8, ymm1, yword [rsp + 320] - LONG $0xf9ebddc5 // vpor ymm7, ymm4, ymm1 - LONG $0x463de3c4; WORD $0x31cf // vperm2i128 ymm1, ymm8, ymm7, 49 - LONG $0x383de3c4; WORD $0x01d7 // vinserti128 ymm2, ymm8, xmm7, 1 - LONG $0xc9c6ecc5; BYTE $0x88 // vshufps ymm1, ymm2, ymm1, 136 - LONG $0x461dc3c4; WORD $0x31d3 // vperm2i128 ymm2, ymm12, ymm11, 49 - LONG $0x381dc3c4; WORD $0x01db // vinserti128 ymm3, ymm12, xmm11, 1 - LONG $0xd2c6e4c5; BYTE $0x88 // vshufps ymm2, ymm3, ymm2, 136 - LONG $0x462dc3c4; WORD $0x31d9 // vperm2i128 ymm3, ymm10, ymm9, 49 - LONG $0x382d43c4; WORD $0x01e9 // vinserti128 ymm13, ymm10, xmm9, 1 - LONG $0xdbc694c5; BYTE $0x88 // vshufps ymm3, ymm13, ymm3, 136 - LONG $0x460563c4; WORD $0x31ed // vperm2i128 ymm13, ymm15, ymm5, 49 - LONG $0x380563c4; WORD $0x01f5 // vinserti128 ymm14, ymm15, xmm5, 1 - LONG $0xc60c41c4; WORD $0x88ed // vshufps ymm13, ymm14, ymm13, 136 - LONG $0x667d41c4; BYTE $0xed // vpcmpgtd ymm13, ymm0, ymm13 - LONG $0xdb66fdc5 // vpcmpgtd ymm3, ymm0, ymm3 - LONG $0xdb6b95c5 // vpackssdw ymm3, ymm13, ymm3 - LONG $0xd266fdc5 // vpcmpgtd ymm2, ymm0, ymm2 - LONG $0xc966fdc5 // vpcmpgtd ymm1, ymm0, ymm1 - LONG $0xc96bedc5 // vpackssdw ymm1, ymm2, ymm1 - LONG $0x00fde3c4; WORD $0xd8d3 // vpermq ymm2, ymm3, 216 - LONG $0x00fde3c4; WORD $0xd8c9 // vpermq ymm1, ymm1, 216 - LONG $0xc963edc5 // vpacksswb ymm1, ymm2, ymm1 - LONG $0xf6dbf5c5 // vpand ymm6, ymm1, ymm6 - LONG $0xf17ef9c5 // vmovd ecx, xmm6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_370 - LONG $0x787d62c4; WORD $0x1734 // vpbroadcastb ymm14, byte [rdi + rdx] - LONG $0x1479e3c4; WORD $0x01f1 // vpextrb ecx, xmm6, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_830 - -LBB0_371: - LONG $0x24548b48; BYTE $0x68 // mov rdx, qword [rsp + 104] - LONG $0x1479e3c4; WORD $0x02f1 // vpextrb ecx, xmm6, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_373 - -LBB0_372: - LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x02 // vpinsrb xmm1, xmm14, byte [rdi + rbx], 2 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_373: - LONG $0x24748b48; BYTE $0x60 // mov rsi, qword [rsp + 96] - LONG $0x24548b4c; BYTE $0x48 // mov r10, qword [rsp + 72] - LONG $0x1479e3c4; WORD $0x03f1 // vpextrb ecx, xmm6, 3 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_374 - QUAD $0x00000110248c8b48 // mov rcx, qword [rsp + 272] - LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb xmm1, xmm14, byte [rdi + rcx], 3 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x04f1 // vpextrb ecx, xmm6, 4 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_832 - -LBB0_375: - LONG $0x1479e3c4; WORD $0x05f1 // vpextrb ecx, xmm6, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_376 - -LBB0_833: - LONG $0x2009e3c4; WORD $0x170c; BYTE $0x05 // vpinsrb xmm1, xmm14, byte [rdi + rdx], 5 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x06f1 // vpextrb ecx, xmm6, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_834 - -LBB0_377: - LONG $0x1479e3c4; WORD $0x07f1 // vpextrb ecx, xmm6, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_378 - -LBB0_835: - LONG $0x2009a3c4; WORD $0x0f0c; BYTE $0x07 // vpinsrb xmm1, xmm14, byte [rdi + r9], 7 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x08f1 // vpextrb ecx, xmm6, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_836 - -LBB0_379: - LONG $0x24548b48; BYTE $0x58 // mov rdx, qword [rsp + 88] - LONG $0x1479e3c4; WORD $0x09f1 // vpextrb ecx, xmm6, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_381 - -LBB0_380: - LONG $0x2009a3c4; WORD $0x3f0c; BYTE $0x09 // vpinsrb xmm1, xmm14, byte [rdi + r15], 9 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_381: - QUAD $0x0000009024848b48 // mov rax, qword [rsp + 144] - QUAD $0x0000008824b48b48 // mov rsi, qword [rsp + 136] - QUAD $0x00000080249c8b48 // mov rbx, qword [rsp + 128] - LONG $0x244c8b4c; BYTE $0x78 // mov r9, qword [rsp + 120] - LONG $0x1479e3c4; WORD $0x0af1 // vpextrb ecx, xmm6, 10 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_382 - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb xmm1, xmm14, byte [rdi + rax], 10 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb ecx, xmm6, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_838 - -LBB0_383: - LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb ecx, xmm6, 12 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_384 - -LBB0_839: - QUAD $0x000000f824848b48 // mov rax, qword [rsp + 248] - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0c // vpinsrb xmm1, xmm14, byte [rdi + rax], 12 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0df1 // vpextrb ecx, xmm6, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_385 - JMP LBB0_386 - -LBB0_370: - LONG $0x1479e3c4; WORD $0x01f1 // vpextrb ecx, xmm6, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_371 - -LBB0_830: - LONG $0x2009e3c4; WORD $0x370c; BYTE $0x01 // vpinsrb xmm1, xmm14, byte [rdi + rsi], 1 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x24548b48; BYTE $0x68 // mov rdx, qword [rsp + 104] - LONG $0x1479e3c4; WORD $0x02f1 // vpextrb ecx, xmm6, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_372 - JMP LBB0_373 - -LBB0_374: - LONG $0x1479e3c4; WORD $0x04f1 // vpextrb ecx, xmm6, 4 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_375 - -LBB0_832: - QUAD $0x00000108248c8b48 // mov rcx, qword [rsp + 264] - LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x04 // vpinsrb xmm1, xmm14, byte [rdi + rcx], 4 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x05f1 // vpextrb ecx, xmm6, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_833 - -LBB0_376: - LONG $0x1479e3c4; WORD $0x06f1 // vpextrb ecx, xmm6, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_377 - -LBB0_834: - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x06 // vpinsrb xmm1, xmm14, byte [rdi + rax], 6 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x07f1 // vpextrb ecx, xmm6, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_835 - -LBB0_378: - LONG $0x1479e3c4; WORD $0x08f1 // vpextrb ecx, xmm6, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_379 - -LBB0_836: - LONG $0x2009e3c4; WORD $0x370c; BYTE $0x08 // vpinsrb xmm1, xmm14, byte [rdi + rsi], 8 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x24548b48; BYTE $0x58 // mov rdx, qword [rsp + 88] - LONG $0x1479e3c4; WORD $0x09f1 // vpextrb ecx, xmm6, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_380 - JMP LBB0_381 - -LBB0_382: - LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb ecx, xmm6, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_383 - -LBB0_838: - QUAD $0x0000010024848b48 // mov rax, qword [rsp + 256] - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0b // vpinsrb xmm1, xmm14, byte [rdi + rax], 11 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb ecx, xmm6, 12 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_839 - -LBB0_384: - LONG $0x1479e3c4; WORD $0x0df1 // vpextrb ecx, xmm6, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_386 - -LBB0_385: - LONG $0x2009e3c4; WORD $0x170c; BYTE $0x0d // vpinsrb xmm1, xmm14, byte [rdi + rdx], 13 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_386: - LONG $0x24448b48; BYTE $0x50 // mov rax, qword [rsp + 80] - LONG $0x24548b48; BYTE $0x40 // mov rdx, qword [rsp + 64] - LONG $0x1479e3c4; WORD $0x0ef1 // vpextrb ecx, xmm6, 14 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_388 - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0e // vpinsrb xmm1, xmm14, byte [rdi + rax], 14 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_388: - LONG $0x1479e3c4; WORD $0x0ff1 // vpextrb ecx, xmm6, 15 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_390 - LONG $0x2009a3c4; WORD $0x170c; BYTE $0x0f // vpinsrb xmm1, xmm14, byte [rdi + r10], 15 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_390: - LONG $0x397de3c4; WORD $0x01f1 // vextracti128 xmm1, ymm6, 1 - LONG $0xc87ef9c5 // vmovd eax, xmm1 - LONG $0x2c244489 // mov dword [rsp + 44], eax - WORD $0x01a8 // test al, 1 - JE LBB0_392 - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x1714; BYTE $0x00 // vpinsrb xmm2, xmm2, byte [rdi + rdx], 0 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - -LBB0_392: - LONG $0x24448b48; BYTE $0x38 // mov rax, qword [rsp + 56] - LONG $0x1479e3c4; WORD $0x01c9 // vpextrb ecx, xmm1, 1 - LONG $0x28244c89 // mov dword [rsp + 40], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_393 - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x3714; BYTE $0x01 // vpinsrb xmm2, xmm2, byte [rdi + rsi], 1 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x02c9 // vpextrb ecx, xmm1, 2 - LONG $0x24244c89 // mov dword [rsp + 36], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_841 - -LBB0_394: - LONG $0x1479e3c4; WORD $0x03c9 // vpextrb ecx, xmm1, 3 - LONG $0x20244c89 // mov dword [rsp + 32], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_395 - -LBB0_842: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069a3c4; WORD $0x0f14; BYTE $0x03 // vpinsrb xmm2, xmm2, byte [rdi + r9], 3 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x04c9 // vpextrb ecx, xmm1, 4 - LONG $0x1c244c89 // mov dword [rsp + 28], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_843 - -LBB0_396: - LONG $0x1479e3c4; WORD $0x05c8 // vpextrb eax, xmm1, 5 - LONG $0x18244489 // mov dword [rsp + 24], eax - WORD $0x01a8 // test al, 1 - JE LBB0_398 - -LBB0_397: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069a3c4; WORD $0x2f14; BYTE $0x05 // vpinsrb xmm2, xmm2, byte [rdi + r13], 5 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - -LBB0_398: - LONG $0x24448b48; BYTE $0x70 // mov rax, qword [rsp + 112] - LONG $0x1479e3c4; WORD $0x06c9 // vpextrb ecx, xmm1, 6 - LONG $0x14244c89 // mov dword [rsp + 20], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_399 - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x06 // vpinsrb xmm2, xmm2, byte [rdi + rax], 6 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x07c9 // vpextrb r9d, xmm1, 7 - LONG $0x01c1f641 // test r9b, 1 - JNE LBB0_845 - -LBB0_400: - LONG $0x1479e3c4; WORD $0x08ca // vpextrb edx, xmm1, 8 - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - JE LBB0_401 - -LBB0_846: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000d824848b48 // mov rax, qword [rsp + 216] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x08 // vpinsrb xmm2, xmm2, byte [rdi + rax], 8 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x09c9 // vpextrb ecx, xmm1, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_847 - -LBB0_402: - LONG $0x1479e3c4; WORD $0x0ace // vpextrb esi, xmm1, 10 - LONG $0x01c6f640 // test sil, 1 - JE LBB0_403 - -LBB0_848: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000c824848b48 // mov rax, qword [rsp + 200] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x0a // vpinsrb xmm2, xmm2, byte [rdi + rax], 10 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb eax, xmm1, 11 - WORD $0x01a8 // test al, 1 - JNE LBB0_849 - -LBB0_404: - LONG $0x1479c3c4; WORD $0x0ccd // vpextrb r13d, xmm1, 12 - LONG $0x01c5f641 // test r13b, 1 - JE LBB0_405 - -LBB0_850: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000b8249c8b48 // mov rbx, qword [rsp + 184] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0c // vpinsrb xmm2, xmm2, byte [rdi + rbx], 12 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0dca // vpextrb r10d, xmm1, 13 - LONG $0x01c2f641 // test r10b, 1 - JNE LBB0_851 - -LBB0_406: - LONG $0x1479c3c4; WORD $0x0ecb // vpextrb r11d, xmm1, 14 - LONG $0x01c3f641 // test r11b, 1 - JE LBB0_407 - -LBB0_852: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000a8249c8b48 // mov rbx, qword [rsp + 168] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0e // vpinsrb xmm2, xmm2, byte [rdi + rbx], 14 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0fce // vpextrb r14d, xmm1, 15 - LONG $0x01c6f641 // test r14b, 1 - JNE LBB0_408 - JMP LBB0_409 - -LBB0_393: - LONG $0x1479e3c4; WORD $0x02c9 // vpextrb ecx, xmm1, 2 - LONG $0x24244c89 // mov dword [rsp + 36], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_394 - -LBB0_841: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x02 // vpinsrb xmm2, xmm2, byte [rdi + rbx], 2 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x03c9 // vpextrb ecx, xmm1, 3 - LONG $0x20244c89 // mov dword [rsp + 32], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_842 - -LBB0_395: - LONG $0x1479e3c4; WORD $0x04c9 // vpextrb ecx, xmm1, 4 - LONG $0x1c244c89 // mov dword [rsp + 28], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_396 - -LBB0_843: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x04 // vpinsrb xmm2, xmm2, byte [rdi + rax], 4 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x05c8 // vpextrb eax, xmm1, 5 - LONG $0x18244489 // mov dword [rsp + 24], eax - WORD $0x01a8 // test al, 1 - JNE LBB0_397 - JMP LBB0_398 - -LBB0_399: - LONG $0x1479c3c4; WORD $0x07c9 // vpextrb r9d, xmm1, 7 - LONG $0x01c1f641 // test r9b, 1 - JE LBB0_400 - -LBB0_845: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000f024848b48 // mov rax, qword [rsp + 240] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x07 // vpinsrb xmm2, xmm2, byte [rdi + rax], 7 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x08ca // vpextrb edx, xmm1, 8 - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - JNE LBB0_846 - -LBB0_401: - LONG $0x1479e3c4; WORD $0x09c9 // vpextrb ecx, xmm1, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_402 - -LBB0_847: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000d024848b48 // mov rax, qword [rsp + 208] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x09 // vpinsrb xmm2, xmm2, byte [rdi + rax], 9 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x0ace // vpextrb esi, xmm1, 10 - LONG $0x01c6f640 // test sil, 1 - JNE LBB0_848 - -LBB0_403: - LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb eax, xmm1, 11 - WORD $0x01a8 // test al, 1 - JE LBB0_404 - -LBB0_849: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000c0249c8b48 // mov rbx, qword [rsp + 192] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0b // vpinsrb xmm2, xmm2, byte [rdi + rbx], 11 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0ccd // vpextrb r13d, xmm1, 12 - LONG $0x01c5f641 // test r13b, 1 - JNE LBB0_850 - -LBB0_405: - LONG $0x1479c3c4; WORD $0x0dca // vpextrb r10d, xmm1, 13 - LONG $0x01c2f641 // test r10b, 1 - JE LBB0_406 - -LBB0_851: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000b0249c8b48 // mov rbx, qword [rsp + 176] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0d // vpinsrb xmm2, xmm2, byte [rdi + rbx], 13 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0ecb // vpextrb r11d, xmm1, 14 - LONG $0x01c3f641 // test r11b, 1 - JNE LBB0_852 - -LBB0_407: - LONG $0x1479c3c4; WORD $0x0fce // vpextrb r14d, xmm1, 15 - LONG $0x01c6f641 // test r14b, 1 - JE LBB0_409 - -LBB0_408: - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - QUAD $0x000000a0249c8b48 // mov rbx, qword [rsp + 160] - LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x0f // vpinsrb xmm1, xmm1, byte [rdi + rbx], 15 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - -LBB0_409: - LONG $0x7175c1c4; WORD $0x04d6 // vpsrlw ymm1, ymm14, 4 - QUAD $0x00000080b5db75c5 // vpand ymm14, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */ - LONG $0x7e79c1c4; BYTE $0xf7 // vmovd r15d, xmm6 - LONG $0x01c7f641 // test r15b, 1 - JE LBB0_410 - LONG $0x7ef961c4; BYTE $0xfb // vmovq rbx, xmm15 - LONG $0x147943c4; WORD $0x1834; BYTE $0x00 // vpextrb byte [r8 + rbx], xmm14, 0 - LONG $0x1479e3c4; WORD $0x01f3 // vpextrb ebx, xmm6, 1 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_854 - -LBB0_411: - LONG $0x1479e3c4; WORD $0x02f3 // vpextrb ebx, xmm6, 2 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - QUAD $0x000000e024bc8b4c // mov r15, qword [rsp + 224] - JE LBB0_412 - -LBB0_855: - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x02 // vpextrb byte [r8 + rbx], xmm14, 2 - LONG $0x1479e3c4; WORD $0x03f3 // vpextrb ebx, xmm6, 3 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_856 - -LBB0_413: - LONG $0x1479e3c4; WORD $0x04f3 // vpextrb ebx, xmm6, 4 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_414 - -LBB0_857: - LONG $0x7ef9e1c4; BYTE $0xeb // vmovq rbx, xmm5 - LONG $0x147943c4; WORD $0x1834; BYTE $0x04 // vpextrb byte [r8 + rbx], xmm14, 4 - LONG $0x1479e3c4; WORD $0x05f3 // vpextrb ebx, xmm6, 5 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_858 - -LBB0_415: - LONG $0x1479e3c4; WORD $0x06f3 // vpextrb ebx, xmm6, 6 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_416 - -LBB0_859: - LONG $0x397de3c4; WORD $0x01e9 // vextracti128 xmm1, ymm5, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x06 // vpextrb byte [r8 + rbx], xmm14, 6 - LONG $0x1479e3c4; WORD $0x07f3 // vpextrb ebx, xmm6, 7 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_860 - -LBB0_417: - LONG $0x1479e3c4; WORD $0x08f3 // vpextrb ebx, xmm6, 8 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_418 - -LBB0_861: - LONG $0x7ef961c4; BYTE $0xe3 // vmovq rbx, xmm12 - LONG $0x147943c4; WORD $0x1834; BYTE $0x08 // vpextrb byte [r8 + rbx], xmm14, 8 - LONG $0x1479e3c4; WORD $0x09f3 // vpextrb ebx, xmm6, 9 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_862 - -LBB0_419: - LONG $0x1479e3c4; WORD $0x0af3 // vpextrb ebx, xmm6, 10 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_420 - -LBB0_863: - LONG $0x397d63c4; WORD $0x01e1 // vextracti128 xmm1, ymm12, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0a // vpextrb byte [r8 + rbx], xmm14, 10 - LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb ebx, xmm6, 11 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_864 - -LBB0_421: - LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb ebx, xmm6, 12 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_422 - -LBB0_865: - LONG $0x7ef961c4; BYTE $0xdb // vmovq rbx, xmm11 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0c // vpextrb byte [r8 + rbx], xmm14, 12 - LONG $0x1479e3c4; WORD $0x0df3 // vpextrb ebx, xmm6, 13 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_866 - -LBB0_423: - LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb ebx, xmm6, 14 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_424 - -LBB0_867: - LONG $0x397d63c4; WORD $0x01d9 // vextracti128 xmm1, ymm11, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0e // vpextrb byte [r8 + rbx], xmm14, 14 - LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb ebx, xmm6, 15 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_868 - -LBB0_425: - LONG $0x2c2444f6; BYTE $0x01 // test byte [rsp + 44], 1 - JE LBB0_426 - -LBB0_869: - LONG $0x7ef961c4; BYTE $0xd3 // vmovq rbx, xmm10 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x00 // vpextrb byte [r8 + rbx], xmm1, 0 - LONG $0x282444f6; BYTE $0x01 // test byte [rsp + 40], 1 - JNE LBB0_870 - -LBB0_427: - LONG $0x242444f6; BYTE $0x01 // test byte [rsp + 36], 1 - JE LBB0_428 - -LBB0_871: - LONG $0x397d63c4; WORD $0x01d1 // vextracti128 xmm1, ymm10, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x02 // vpextrb byte [r8 + rbx], xmm1, 2 - LONG $0x202444f6; BYTE $0x01 // test byte [rsp + 32], 1 - JNE LBB0_872 - -LBB0_429: - LONG $0x1c2444f6; BYTE $0x01 // test byte [rsp + 28], 1 - JE LBB0_430 - -LBB0_873: - LONG $0x7ef961c4; BYTE $0xcb // vmovq rbx, xmm9 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x04 // vpextrb byte [r8 + rbx], xmm1, 4 - LONG $0x182444f6; BYTE $0x01 // test byte [rsp + 24], 1 - JNE LBB0_874 - -LBB0_431: - LONG $0x142444f6; BYTE $0x01 // test byte [rsp + 20], 1 - JE LBB0_432 - -LBB0_875: - LONG $0x397d63c4; WORD $0x01c9 // vextracti128 xmm1, ymm9, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x06 // vpextrb byte [r8 + rbx], xmm1, 6 - LONG $0x01c1f641 // test r9b, 1 - JNE LBB0_876 - -LBB0_433: - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - QUAD $0x00000128249c8b48 // mov rbx, qword [rsp + 296] - JE LBB0_434 - -LBB0_877: - LONG $0x7ef961c4; BYTE $0xc2 // vmovq rdx, xmm8 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x100c; BYTE $0x08 // vpextrb byte [r8 + rdx], xmm1, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_878 - -LBB0_435: - LONG $0x01c6f640 // test sil, 1 - QUAD $0x0000013024948b48 // mov rdx, qword [rsp + 304] - JE LBB0_436 - -LBB0_879: - LONG $0x397d63c4; WORD $0x01c1 // vextracti128 xmm1, ymm8, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb byte [r8 + rcx], xmm1, 10 - WORD $0x01a8 // test al, 1 - QUAD $0x0000009824b48b48 // mov rsi, qword [rsp + 152] - JNE LBB0_880 - -LBB0_437: - LONG $0x01c5f641 // test r13b, 1 - JE LBB0_438 - -LBB0_881: - LONG $0x7ef9e1c4; BYTE $0xf9 // vmovq rcx, xmm7 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb byte [r8 + rcx], xmm1, 12 - LONG $0x01c2f641 // test r10b, 1 - QUAD $0x0000011824ac8b4c // mov r13, qword [rsp + 280] - JNE LBB0_882 - -LBB0_439: - LONG $0x01c3f641 // test r11b, 1 - JE LBB0_440 - -LBB0_883: - LONG $0x397de3c4; WORD $0x01f9 // vextracti128 xmm1, ymm7, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb byte [r8 + rcx], xmm1, 14 - LONG $0x01c6f641 // test r14b, 1 - QUAD $0x0000012024848b48 // mov rax, qword [rsp + 288] - QUAD $0x000000e8248c8b4c // mov r9, qword [rsp + 232] - JNE LBB0_441 - JMP LBB0_442 - -LBB0_410: - LONG $0x1479e3c4; WORD $0x01f3 // vpextrb ebx, xmm6, 1 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_411 - -LBB0_854: - LONG $0x16f963c4; WORD $0x01fb // vpextrq rbx, xmm15, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x01 // vpextrb byte [r8 + rbx], xmm14, 1 - LONG $0x1479e3c4; WORD $0x02f3 // vpextrb ebx, xmm6, 2 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - QUAD $0x000000e024bc8b4c // mov r15, qword [rsp + 224] - JNE LBB0_855 - -LBB0_412: - LONG $0x1479e3c4; WORD $0x03f3 // vpextrb ebx, xmm6, 3 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_413 - -LBB0_856: - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x03 // vpextrb byte [r8 + rbx], xmm14, 3 - LONG $0x1479e3c4; WORD $0x04f3 // vpextrb ebx, xmm6, 4 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_857 - -LBB0_414: - LONG $0x1479e3c4; WORD $0x05f3 // vpextrb ebx, xmm6, 5 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_415 - -LBB0_858: - LONG $0x16f9e3c4; WORD $0x01eb // vpextrq rbx, xmm5, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x05 // vpextrb byte [r8 + rbx], xmm14, 5 - LONG $0x1479e3c4; WORD $0x06f3 // vpextrb ebx, xmm6, 6 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_859 - -LBB0_416: - LONG $0x1479e3c4; WORD $0x07f3 // vpextrb ebx, xmm6, 7 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_417 - -LBB0_860: - LONG $0x397de3c4; WORD $0x01e9 // vextracti128 xmm1, ymm5, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x07 // vpextrb byte [r8 + rbx], xmm14, 7 - LONG $0x1479e3c4; WORD $0x08f3 // vpextrb ebx, xmm6, 8 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_861 - -LBB0_418: - LONG $0x1479e3c4; WORD $0x09f3 // vpextrb ebx, xmm6, 9 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_419 - -LBB0_862: - LONG $0x16f963c4; WORD $0x01e3 // vpextrq rbx, xmm12, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x09 // vpextrb byte [r8 + rbx], xmm14, 9 - LONG $0x1479e3c4; WORD $0x0af3 // vpextrb ebx, xmm6, 10 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_863 - -LBB0_420: - LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb ebx, xmm6, 11 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_421 - -LBB0_864: - LONG $0x397d63c4; WORD $0x01e1 // vextracti128 xmm1, ymm12, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0b // vpextrb byte [r8 + rbx], xmm14, 11 - LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb ebx, xmm6, 12 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_865 - -LBB0_422: - LONG $0x1479e3c4; WORD $0x0df3 // vpextrb ebx, xmm6, 13 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_423 - -LBB0_866: - LONG $0x16f963c4; WORD $0x01db // vpextrq rbx, xmm11, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0d // vpextrb byte [r8 + rbx], xmm14, 13 - LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb ebx, xmm6, 14 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_867 - -LBB0_424: - LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb ebx, xmm6, 15 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_425 - -LBB0_868: - LONG $0x397d63c4; WORD $0x01d9 // vextracti128 xmm1, ymm11, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0f // vpextrb byte [r8 + rbx], xmm14, 15 - LONG $0x2c2444f6; BYTE $0x01 // test byte [rsp + 44], 1 - JNE LBB0_869 - -LBB0_426: - LONG $0x282444f6; BYTE $0x01 // test byte [rsp + 40], 1 - JE LBB0_427 - -LBB0_870: - LONG $0x16f963c4; WORD $0x01d3 // vpextrq rbx, xmm10, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x01 // vpextrb byte [r8 + rbx], xmm1, 1 - LONG $0x242444f6; BYTE $0x01 // test byte [rsp + 36], 1 - JNE LBB0_871 - -LBB0_428: - LONG $0x202444f6; BYTE $0x01 // test byte [rsp + 32], 1 - JE LBB0_429 - -LBB0_872: - LONG $0x397d63c4; WORD $0x01d1 // vextracti128 xmm1, ymm10, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x03 // vpextrb byte [r8 + rbx], xmm1, 3 - LONG $0x1c2444f6; BYTE $0x01 // test byte [rsp + 28], 1 - JNE LBB0_873 - -LBB0_430: - LONG $0x182444f6; BYTE $0x01 // test byte [rsp + 24], 1 - JE LBB0_431 - -LBB0_874: - LONG $0x16f963c4; WORD $0x01cb // vpextrq rbx, xmm9, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x05 // vpextrb byte [r8 + rbx], xmm1, 5 - LONG $0x142444f6; BYTE $0x01 // test byte [rsp + 20], 1 - JNE LBB0_875 - -LBB0_432: - LONG $0x01c1f641 // test r9b, 1 - JE LBB0_433 - -LBB0_876: - LONG $0x397d63c4; WORD $0x01c9 // vextracti128 xmm1, ymm9, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x07 // vpextrb byte [r8 + rbx], xmm1, 7 - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - QUAD $0x00000128249c8b48 // mov rbx, qword [rsp + 296] - JNE LBB0_877 - -LBB0_434: - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_435 - -LBB0_878: - LONG $0x16f963c4; WORD $0x01c1 // vpextrq rcx, xmm8, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb byte [r8 + rcx], xmm1, 9 - LONG $0x01c6f640 // test sil, 1 - QUAD $0x0000013024948b48 // mov rdx, qword [rsp + 304] - JNE LBB0_879 - -LBB0_436: - WORD $0x01a8 // test al, 1 - QUAD $0x0000009824b48b48 // mov rsi, qword [rsp + 152] - JE LBB0_437 - -LBB0_880: - LONG $0x397d63c4; WORD $0x01c1 // vextracti128 xmm1, ymm8, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb byte [r8 + rcx], xmm1, 11 - LONG $0x01c5f641 // test r13b, 1 - JNE LBB0_881 - -LBB0_438: - LONG $0x01c2f641 // test r10b, 1 - QUAD $0x0000011824ac8b4c // mov r13, qword [rsp + 280] - JE LBB0_439 - -LBB0_882: - LONG $0x16f9e3c4; WORD $0x01f9 // vpextrq rcx, xmm7, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb byte [r8 + rcx], xmm1, 13 - LONG $0x01c3f641 // test r11b, 1 - JNE LBB0_883 - -LBB0_440: - LONG $0x01c6f641 // test r14b, 1 - QUAD $0x0000012024848b48 // mov rax, qword [rsp + 288] - QUAD $0x000000e8248c8b4c // mov r9, qword [rsp + 232] - JE LBB0_442 - -LBB0_441: - LONG $0x397de3c4; WORD $0x01f9 // vextracti128 xmm1, ymm7, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb byte [r8 + rcx], xmm1, 15 - -LBB0_442: - QUAD $0x000280248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 640] - QUAD $0x00020024bceb75c5; BYTE $0x00 // vpor ymm15, ymm1, yword [rsp + 512] - QUAD $0x0001e024acebf5c5; BYTE $0x00 // vpor ymm5, ymm1, yword [rsp + 480] - QUAD $0x0001802494eb75c5; BYTE $0x00 // vpor ymm10, ymm1, yword [rsp + 384] - QUAD $0x000160248ceb75c5; BYTE $0x00 // vpor ymm9, ymm1, yword [rsp + 352] - QUAD $0x0001c024a4eb75c5; BYTE $0x00 // vpor ymm12, ymm1, yword [rsp + 448] - QUAD $0x0001a0249ceb75c5; BYTE $0x00 // vpor ymm11, ymm1, yword [rsp + 416] - QUAD $0x0001402484eb75c5; BYTE $0x00 // vpor ymm8, ymm1, yword [rsp + 320] - LONG $0xf9ebddc5 // vpor ymm7, ymm4, ymm1 - LONG $0x463de3c4; WORD $0x31cf // vperm2i128 ymm1, ymm8, ymm7, 49 - LONG $0x383de3c4; WORD $0x01d7 // vinserti128 ymm2, ymm8, xmm7, 1 - LONG $0xc9c6ecc5; BYTE $0x88 // vshufps ymm1, ymm2, ymm1, 136 - LONG $0x461dc3c4; WORD $0x31d3 // vperm2i128 ymm2, ymm12, ymm11, 49 - LONG $0x381dc3c4; WORD $0x01db // vinserti128 ymm3, ymm12, xmm11, 1 - LONG $0xd2c6e4c5; BYTE $0x88 // vshufps ymm2, ymm3, ymm2, 136 - LONG $0x462dc3c4; WORD $0x31d9 // vperm2i128 ymm3, ymm10, ymm9, 49 - LONG $0x382d43c4; WORD $0x01e9 // vinserti128 ymm13, ymm10, xmm9, 1 - LONG $0xdbc694c5; BYTE $0x88 // vshufps ymm3, ymm13, ymm3, 136 - LONG $0x460563c4; WORD $0x31ed // vperm2i128 ymm13, ymm15, ymm5, 49 - LONG $0x380563c4; WORD $0x01f5 // vinserti128 ymm14, ymm15, xmm5, 1 - LONG $0xc60c41c4; WORD $0x88ed // vshufps ymm13, ymm14, ymm13, 136 - LONG $0x667d41c4; BYTE $0xed // vpcmpgtd ymm13, ymm0, ymm13 - LONG $0xdb66fdc5 // vpcmpgtd ymm3, ymm0, ymm3 - LONG $0xdb6b95c5 // vpackssdw ymm3, ymm13, ymm3 - LONG $0xd266fdc5 // vpcmpgtd ymm2, ymm0, ymm2 - LONG $0xc966fdc5 // vpcmpgtd ymm1, ymm0, ymm1 - LONG $0xc96bedc5 // vpackssdw ymm1, ymm2, ymm1 - LONG $0x00fde3c4; WORD $0xd8d3 // vpermq ymm2, ymm3, 216 - LONG $0x00fde3c4; WORD $0xd8c9 // vpermq ymm1, ymm1, 216 - LONG $0xc963edc5 // vpacksswb ymm1, ymm2, ymm1 - LONG $0xf6dbf5c5 // vpand ymm6, ymm1, ymm6 - LONG $0xf17ef9c5 // vmovd ecx, xmm6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_443 - LONG $0x787d62c4; WORD $0x1734 // vpbroadcastb ymm14, byte [rdi + rdx] - LONG $0x1479e3c4; WORD $0x01f1 // vpextrb ecx, xmm6, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_885 - -LBB0_444: - LONG $0x24548b48; BYTE $0x68 // mov rdx, qword [rsp + 104] - LONG $0x1479e3c4; WORD $0x02f1 // vpextrb ecx, xmm6, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_446 - -LBB0_445: - LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x02 // vpinsrb xmm1, xmm14, byte [rdi + rbx], 2 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_446: - LONG $0x24748b48; BYTE $0x60 // mov rsi, qword [rsp + 96] - LONG $0x24548b4c; BYTE $0x48 // mov r10, qword [rsp + 72] - LONG $0x1479e3c4; WORD $0x03f1 // vpextrb ecx, xmm6, 3 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_447 - QUAD $0x00000110248c8b48 // mov rcx, qword [rsp + 272] - LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb xmm1, xmm14, byte [rdi + rcx], 3 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x04f1 // vpextrb ecx, xmm6, 4 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_887 - -LBB0_448: - LONG $0x1479e3c4; WORD $0x05f1 // vpextrb ecx, xmm6, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_449 - -LBB0_888: - LONG $0x2009e3c4; WORD $0x170c; BYTE $0x05 // vpinsrb xmm1, xmm14, byte [rdi + rdx], 5 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x06f1 // vpextrb ecx, xmm6, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_889 - -LBB0_450: - LONG $0x1479e3c4; WORD $0x07f1 // vpextrb ecx, xmm6, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_451 - -LBB0_890: - LONG $0x2009a3c4; WORD $0x0f0c; BYTE $0x07 // vpinsrb xmm1, xmm14, byte [rdi + r9], 7 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x08f1 // vpextrb ecx, xmm6, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_891 - -LBB0_452: - LONG $0x24548b48; BYTE $0x58 // mov rdx, qword [rsp + 88] - LONG $0x1479e3c4; WORD $0x09f1 // vpextrb ecx, xmm6, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_454 - -LBB0_453: - LONG $0x2009a3c4; WORD $0x3f0c; BYTE $0x09 // vpinsrb xmm1, xmm14, byte [rdi + r15], 9 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_454: - QUAD $0x0000009024848b48 // mov rax, qword [rsp + 144] - QUAD $0x0000008824b48b48 // mov rsi, qword [rsp + 136] - QUAD $0x00000080249c8b48 // mov rbx, qword [rsp + 128] - LONG $0x244c8b4c; BYTE $0x78 // mov r9, qword [rsp + 120] - LONG $0x1479e3c4; WORD $0x0af1 // vpextrb ecx, xmm6, 10 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_455 - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb xmm1, xmm14, byte [rdi + rax], 10 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb ecx, xmm6, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_893 - -LBB0_456: - LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb ecx, xmm6, 12 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_457 - -LBB0_894: - QUAD $0x000000f824848b48 // mov rax, qword [rsp + 248] - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0c // vpinsrb xmm1, xmm14, byte [rdi + rax], 12 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0df1 // vpextrb ecx, xmm6, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_458 - JMP LBB0_459 - -LBB0_443: - LONG $0x1479e3c4; WORD $0x01f1 // vpextrb ecx, xmm6, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_444 - -LBB0_885: - LONG $0x2009e3c4; WORD $0x370c; BYTE $0x01 // vpinsrb xmm1, xmm14, byte [rdi + rsi], 1 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x24548b48; BYTE $0x68 // mov rdx, qword [rsp + 104] - LONG $0x1479e3c4; WORD $0x02f1 // vpextrb ecx, xmm6, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_445 - JMP LBB0_446 - -LBB0_447: - LONG $0x1479e3c4; WORD $0x04f1 // vpextrb ecx, xmm6, 4 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_448 - -LBB0_887: - QUAD $0x00000108248c8b48 // mov rcx, qword [rsp + 264] - LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x04 // vpinsrb xmm1, xmm14, byte [rdi + rcx], 4 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x05f1 // vpextrb ecx, xmm6, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_888 - -LBB0_449: - LONG $0x1479e3c4; WORD $0x06f1 // vpextrb ecx, xmm6, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_450 - -LBB0_889: - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x06 // vpinsrb xmm1, xmm14, byte [rdi + rax], 6 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x07f1 // vpextrb ecx, xmm6, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_890 - -LBB0_451: - LONG $0x1479e3c4; WORD $0x08f1 // vpextrb ecx, xmm6, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_452 - -LBB0_891: - LONG $0x2009e3c4; WORD $0x370c; BYTE $0x08 // vpinsrb xmm1, xmm14, byte [rdi + rsi], 8 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x24548b48; BYTE $0x58 // mov rdx, qword [rsp + 88] - LONG $0x1479e3c4; WORD $0x09f1 // vpextrb ecx, xmm6, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_453 - JMP LBB0_454 - -LBB0_455: - LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb ecx, xmm6, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_456 - -LBB0_893: - QUAD $0x0000010024848b48 // mov rax, qword [rsp + 256] - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0b // vpinsrb xmm1, xmm14, byte [rdi + rax], 11 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb ecx, xmm6, 12 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_894 - -LBB0_457: - LONG $0x1479e3c4; WORD $0x0df1 // vpextrb ecx, xmm6, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_459 - -LBB0_458: - LONG $0x2009e3c4; WORD $0x170c; BYTE $0x0d // vpinsrb xmm1, xmm14, byte [rdi + rdx], 13 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_459: - LONG $0x24448b48; BYTE $0x50 // mov rax, qword [rsp + 80] - LONG $0x24548b48; BYTE $0x40 // mov rdx, qword [rsp + 64] - LONG $0x1479e3c4; WORD $0x0ef1 // vpextrb ecx, xmm6, 14 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_461 - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0e // vpinsrb xmm1, xmm14, byte [rdi + rax], 14 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_461: - LONG $0x1479e3c4; WORD $0x0ff1 // vpextrb ecx, xmm6, 15 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_463 - LONG $0x2009a3c4; WORD $0x170c; BYTE $0x0f // vpinsrb xmm1, xmm14, byte [rdi + r10], 15 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_463: - LONG $0x397de3c4; WORD $0x01f1 // vextracti128 xmm1, ymm6, 1 - LONG $0xc87ef9c5 // vmovd eax, xmm1 - LONG $0x2c244489 // mov dword [rsp + 44], eax - WORD $0x01a8 // test al, 1 - JE LBB0_465 - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x1714; BYTE $0x00 // vpinsrb xmm2, xmm2, byte [rdi + rdx], 0 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - -LBB0_465: - LONG $0x24448b48; BYTE $0x38 // mov rax, qword [rsp + 56] - LONG $0x1479e3c4; WORD $0x01c9 // vpextrb ecx, xmm1, 1 - LONG $0x28244c89 // mov dword [rsp + 40], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_466 - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x3714; BYTE $0x01 // vpinsrb xmm2, xmm2, byte [rdi + rsi], 1 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x02c9 // vpextrb ecx, xmm1, 2 - LONG $0x24244c89 // mov dword [rsp + 36], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_896 - -LBB0_467: - LONG $0x1479e3c4; WORD $0x03c9 // vpextrb ecx, xmm1, 3 - LONG $0x20244c89 // mov dword [rsp + 32], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_468 - -LBB0_897: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069a3c4; WORD $0x0f14; BYTE $0x03 // vpinsrb xmm2, xmm2, byte [rdi + r9], 3 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x04c9 // vpextrb ecx, xmm1, 4 - LONG $0x1c244c89 // mov dword [rsp + 28], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_898 - -LBB0_469: - LONG $0x1479e3c4; WORD $0x05c8 // vpextrb eax, xmm1, 5 - LONG $0x18244489 // mov dword [rsp + 24], eax - WORD $0x01a8 // test al, 1 - JE LBB0_471 - -LBB0_470: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069a3c4; WORD $0x2f14; BYTE $0x05 // vpinsrb xmm2, xmm2, byte [rdi + r13], 5 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - -LBB0_471: - LONG $0x24448b48; BYTE $0x70 // mov rax, qword [rsp + 112] - LONG $0x1479e3c4; WORD $0x06c9 // vpextrb ecx, xmm1, 6 - LONG $0x14244c89 // mov dword [rsp + 20], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_472 - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x06 // vpinsrb xmm2, xmm2, byte [rdi + rax], 6 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x07c9 // vpextrb r9d, xmm1, 7 - LONG $0x01c1f641 // test r9b, 1 - JNE LBB0_900 - -LBB0_473: - LONG $0x1479e3c4; WORD $0x08ca // vpextrb edx, xmm1, 8 - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - JE LBB0_474 - -LBB0_901: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000d824848b48 // mov rax, qword [rsp + 216] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x08 // vpinsrb xmm2, xmm2, byte [rdi + rax], 8 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x09c9 // vpextrb ecx, xmm1, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_902 - -LBB0_475: - LONG $0x1479e3c4; WORD $0x0ace // vpextrb esi, xmm1, 10 - LONG $0x01c6f640 // test sil, 1 - JE LBB0_476 - -LBB0_903: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000c824848b48 // mov rax, qword [rsp + 200] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x0a // vpinsrb xmm2, xmm2, byte [rdi + rax], 10 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb eax, xmm1, 11 - WORD $0x01a8 // test al, 1 - JNE LBB0_904 - -LBB0_477: - LONG $0x1479c3c4; WORD $0x0ccd // vpextrb r13d, xmm1, 12 - LONG $0x01c5f641 // test r13b, 1 - JE LBB0_478 - -LBB0_905: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000b8249c8b48 // mov rbx, qword [rsp + 184] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0c // vpinsrb xmm2, xmm2, byte [rdi + rbx], 12 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0dca // vpextrb r10d, xmm1, 13 - LONG $0x01c2f641 // test r10b, 1 - JNE LBB0_906 - -LBB0_479: - LONG $0x1479c3c4; WORD $0x0ecb // vpextrb r11d, xmm1, 14 - LONG $0x01c3f641 // test r11b, 1 - JE LBB0_480 - -LBB0_907: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000a8249c8b48 // mov rbx, qword [rsp + 168] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0e // vpinsrb xmm2, xmm2, byte [rdi + rbx], 14 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0fce // vpextrb r14d, xmm1, 15 - LONG $0x01c6f641 // test r14b, 1 - JNE LBB0_481 - JMP LBB0_482 - -LBB0_466: - LONG $0x1479e3c4; WORD $0x02c9 // vpextrb ecx, xmm1, 2 - LONG $0x24244c89 // mov dword [rsp + 36], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_467 - -LBB0_896: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x02 // vpinsrb xmm2, xmm2, byte [rdi + rbx], 2 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x03c9 // vpextrb ecx, xmm1, 3 - LONG $0x20244c89 // mov dword [rsp + 32], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_897 - -LBB0_468: - LONG $0x1479e3c4; WORD $0x04c9 // vpextrb ecx, xmm1, 4 - LONG $0x1c244c89 // mov dword [rsp + 28], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_469 - -LBB0_898: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x04 // vpinsrb xmm2, xmm2, byte [rdi + rax], 4 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x05c8 // vpextrb eax, xmm1, 5 - LONG $0x18244489 // mov dword [rsp + 24], eax - WORD $0x01a8 // test al, 1 - JNE LBB0_470 - JMP LBB0_471 - -LBB0_472: - LONG $0x1479c3c4; WORD $0x07c9 // vpextrb r9d, xmm1, 7 - LONG $0x01c1f641 // test r9b, 1 - JE LBB0_473 - -LBB0_900: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000f024848b48 // mov rax, qword [rsp + 240] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x07 // vpinsrb xmm2, xmm2, byte [rdi + rax], 7 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x08ca // vpextrb edx, xmm1, 8 - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - JNE LBB0_901 - -LBB0_474: - LONG $0x1479e3c4; WORD $0x09c9 // vpextrb ecx, xmm1, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_475 - -LBB0_902: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000d024848b48 // mov rax, qword [rsp + 208] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x09 // vpinsrb xmm2, xmm2, byte [rdi + rax], 9 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x0ace // vpextrb esi, xmm1, 10 - LONG $0x01c6f640 // test sil, 1 - JNE LBB0_903 - -LBB0_476: - LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb eax, xmm1, 11 - WORD $0x01a8 // test al, 1 - JE LBB0_477 - -LBB0_904: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000c0249c8b48 // mov rbx, qword [rsp + 192] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0b // vpinsrb xmm2, xmm2, byte [rdi + rbx], 11 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0ccd // vpextrb r13d, xmm1, 12 - LONG $0x01c5f641 // test r13b, 1 - JNE LBB0_905 - -LBB0_478: - LONG $0x1479c3c4; WORD $0x0dca // vpextrb r10d, xmm1, 13 - LONG $0x01c2f641 // test r10b, 1 - JE LBB0_479 - -LBB0_906: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000b0249c8b48 // mov rbx, qword [rsp + 176] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0d // vpinsrb xmm2, xmm2, byte [rdi + rbx], 13 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0ecb // vpextrb r11d, xmm1, 14 - LONG $0x01c3f641 // test r11b, 1 - JNE LBB0_907 - -LBB0_480: - LONG $0x1479c3c4; WORD $0x0fce // vpextrb r14d, xmm1, 15 - LONG $0x01c6f641 // test r14b, 1 - JE LBB0_482 - -LBB0_481: - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - QUAD $0x000000a0249c8b48 // mov rbx, qword [rsp + 160] - LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x0f // vpinsrb xmm1, xmm1, byte [rdi + rbx], 15 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - -LBB0_482: - LONG $0x7175c1c4; WORD $0x05d6 // vpsrlw ymm1, ymm14, 5 - QUAD $0x00000080b5db75c5 // vpand ymm14, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */ - LONG $0x7e79c1c4; BYTE $0xf7 // vmovd r15d, xmm6 - LONG $0x01c7f641 // test r15b, 1 - JE LBB0_483 - LONG $0x7ef961c4; BYTE $0xfb // vmovq rbx, xmm15 - LONG $0x147943c4; WORD $0x1834; BYTE $0x00 // vpextrb byte [r8 + rbx], xmm14, 0 - LONG $0x1479e3c4; WORD $0x01f3 // vpextrb ebx, xmm6, 1 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_909 - -LBB0_484: - LONG $0x1479e3c4; WORD $0x02f3 // vpextrb ebx, xmm6, 2 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - QUAD $0x000000e024bc8b4c // mov r15, qword [rsp + 224] - JE LBB0_485 - -LBB0_910: - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x02 // vpextrb byte [r8 + rbx], xmm14, 2 - LONG $0x1479e3c4; WORD $0x03f3 // vpextrb ebx, xmm6, 3 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_911 - -LBB0_486: - LONG $0x1479e3c4; WORD $0x04f3 // vpextrb ebx, xmm6, 4 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_487 - -LBB0_912: - LONG $0x7ef9e1c4; BYTE $0xeb // vmovq rbx, xmm5 - LONG $0x147943c4; WORD $0x1834; BYTE $0x04 // vpextrb byte [r8 + rbx], xmm14, 4 - LONG $0x1479e3c4; WORD $0x05f3 // vpextrb ebx, xmm6, 5 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_913 - -LBB0_488: - LONG $0x1479e3c4; WORD $0x06f3 // vpextrb ebx, xmm6, 6 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_489 - -LBB0_914: - LONG $0x397de3c4; WORD $0x01e9 // vextracti128 xmm1, ymm5, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x06 // vpextrb byte [r8 + rbx], xmm14, 6 - LONG $0x1479e3c4; WORD $0x07f3 // vpextrb ebx, xmm6, 7 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_915 - -LBB0_490: - LONG $0x1479e3c4; WORD $0x08f3 // vpextrb ebx, xmm6, 8 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_491 - -LBB0_916: - LONG $0x7ef961c4; BYTE $0xe3 // vmovq rbx, xmm12 - LONG $0x147943c4; WORD $0x1834; BYTE $0x08 // vpextrb byte [r8 + rbx], xmm14, 8 - LONG $0x1479e3c4; WORD $0x09f3 // vpextrb ebx, xmm6, 9 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_917 - -LBB0_492: - LONG $0x1479e3c4; WORD $0x0af3 // vpextrb ebx, xmm6, 10 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_493 - -LBB0_918: - LONG $0x397d63c4; WORD $0x01e1 // vextracti128 xmm1, ymm12, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0a // vpextrb byte [r8 + rbx], xmm14, 10 - LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb ebx, xmm6, 11 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_919 - -LBB0_494: - LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb ebx, xmm6, 12 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_495 - -LBB0_920: - LONG $0x7ef961c4; BYTE $0xdb // vmovq rbx, xmm11 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0c // vpextrb byte [r8 + rbx], xmm14, 12 - LONG $0x1479e3c4; WORD $0x0df3 // vpextrb ebx, xmm6, 13 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_921 - -LBB0_496: - LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb ebx, xmm6, 14 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_497 - -LBB0_922: - LONG $0x397d63c4; WORD $0x01d9 // vextracti128 xmm1, ymm11, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0e // vpextrb byte [r8 + rbx], xmm14, 14 - LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb ebx, xmm6, 15 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_923 - -LBB0_498: - LONG $0x2c2444f6; BYTE $0x01 // test byte [rsp + 44], 1 - JE LBB0_499 - -LBB0_924: - LONG $0x7ef961c4; BYTE $0xd3 // vmovq rbx, xmm10 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x00 // vpextrb byte [r8 + rbx], xmm1, 0 - LONG $0x282444f6; BYTE $0x01 // test byte [rsp + 40], 1 - JNE LBB0_925 - -LBB0_500: - LONG $0x242444f6; BYTE $0x01 // test byte [rsp + 36], 1 - JE LBB0_501 - -LBB0_926: - LONG $0x397d63c4; WORD $0x01d1 // vextracti128 xmm1, ymm10, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x02 // vpextrb byte [r8 + rbx], xmm1, 2 - LONG $0x202444f6; BYTE $0x01 // test byte [rsp + 32], 1 - JNE LBB0_927 - -LBB0_502: - LONG $0x1c2444f6; BYTE $0x01 // test byte [rsp + 28], 1 - JE LBB0_503 - -LBB0_928: - LONG $0x7ef961c4; BYTE $0xcb // vmovq rbx, xmm9 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x04 // vpextrb byte [r8 + rbx], xmm1, 4 - LONG $0x182444f6; BYTE $0x01 // test byte [rsp + 24], 1 - JNE LBB0_929 - -LBB0_504: - LONG $0x142444f6; BYTE $0x01 // test byte [rsp + 20], 1 - JE LBB0_505 - -LBB0_930: - LONG $0x397d63c4; WORD $0x01c9 // vextracti128 xmm1, ymm9, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x06 // vpextrb byte [r8 + rbx], xmm1, 6 - LONG $0x01c1f641 // test r9b, 1 - JNE LBB0_931 - -LBB0_506: - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - QUAD $0x00000128249c8b48 // mov rbx, qword [rsp + 296] - JE LBB0_507 - -LBB0_932: - LONG $0x7ef961c4; BYTE $0xc2 // vmovq rdx, xmm8 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x100c; BYTE $0x08 // vpextrb byte [r8 + rdx], xmm1, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_933 - -LBB0_508: - LONG $0x01c6f640 // test sil, 1 - QUAD $0x0000013024948b48 // mov rdx, qword [rsp + 304] - JE LBB0_509 - -LBB0_934: - LONG $0x397d63c4; WORD $0x01c1 // vextracti128 xmm1, ymm8, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb byte [r8 + rcx], xmm1, 10 - WORD $0x01a8 // test al, 1 - QUAD $0x0000009824b48b48 // mov rsi, qword [rsp + 152] - JNE LBB0_935 - -LBB0_510: - LONG $0x01c5f641 // test r13b, 1 - JE LBB0_511 - -LBB0_936: - LONG $0x7ef9e1c4; BYTE $0xf9 // vmovq rcx, xmm7 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb byte [r8 + rcx], xmm1, 12 - LONG $0x01c2f641 // test r10b, 1 - QUAD $0x0000011824ac8b4c // mov r13, qword [rsp + 280] - JNE LBB0_937 - -LBB0_512: - LONG $0x01c3f641 // test r11b, 1 - JE LBB0_513 - -LBB0_938: - LONG $0x397de3c4; WORD $0x01f9 // vextracti128 xmm1, ymm7, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb byte [r8 + rcx], xmm1, 14 - LONG $0x01c6f641 // test r14b, 1 - QUAD $0x0000012024848b48 // mov rax, qword [rsp + 288] - QUAD $0x000000e8248c8b4c // mov r9, qword [rsp + 232] - JNE LBB0_514 - JMP LBB0_515 - -LBB0_483: - LONG $0x1479e3c4; WORD $0x01f3 // vpextrb ebx, xmm6, 1 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_484 - -LBB0_909: - LONG $0x16f963c4; WORD $0x01fb // vpextrq rbx, xmm15, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x01 // vpextrb byte [r8 + rbx], xmm14, 1 - LONG $0x1479e3c4; WORD $0x02f3 // vpextrb ebx, xmm6, 2 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - QUAD $0x000000e024bc8b4c // mov r15, qword [rsp + 224] - JNE LBB0_910 - -LBB0_485: - LONG $0x1479e3c4; WORD $0x03f3 // vpextrb ebx, xmm6, 3 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_486 - -LBB0_911: - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x03 // vpextrb byte [r8 + rbx], xmm14, 3 - LONG $0x1479e3c4; WORD $0x04f3 // vpextrb ebx, xmm6, 4 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_912 - -LBB0_487: - LONG $0x1479e3c4; WORD $0x05f3 // vpextrb ebx, xmm6, 5 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_488 - -LBB0_913: - LONG $0x16f9e3c4; WORD $0x01eb // vpextrq rbx, xmm5, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x05 // vpextrb byte [r8 + rbx], xmm14, 5 - LONG $0x1479e3c4; WORD $0x06f3 // vpextrb ebx, xmm6, 6 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_914 - -LBB0_489: - LONG $0x1479e3c4; WORD $0x07f3 // vpextrb ebx, xmm6, 7 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_490 - -LBB0_915: - LONG $0x397de3c4; WORD $0x01e9 // vextracti128 xmm1, ymm5, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x07 // vpextrb byte [r8 + rbx], xmm14, 7 - LONG $0x1479e3c4; WORD $0x08f3 // vpextrb ebx, xmm6, 8 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_916 - -LBB0_491: - LONG $0x1479e3c4; WORD $0x09f3 // vpextrb ebx, xmm6, 9 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_492 - -LBB0_917: - LONG $0x16f963c4; WORD $0x01e3 // vpextrq rbx, xmm12, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x09 // vpextrb byte [r8 + rbx], xmm14, 9 - LONG $0x1479e3c4; WORD $0x0af3 // vpextrb ebx, xmm6, 10 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_918 - -LBB0_493: - LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb ebx, xmm6, 11 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_494 - -LBB0_919: - LONG $0x397d63c4; WORD $0x01e1 // vextracti128 xmm1, ymm12, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0b // vpextrb byte [r8 + rbx], xmm14, 11 - LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb ebx, xmm6, 12 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_920 - -LBB0_495: - LONG $0x1479e3c4; WORD $0x0df3 // vpextrb ebx, xmm6, 13 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_496 - -LBB0_921: - LONG $0x16f963c4; WORD $0x01db // vpextrq rbx, xmm11, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0d // vpextrb byte [r8 + rbx], xmm14, 13 - LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb ebx, xmm6, 14 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_922 - -LBB0_497: - LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb ebx, xmm6, 15 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_498 - -LBB0_923: - LONG $0x397d63c4; WORD $0x01d9 // vextracti128 xmm1, ymm11, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0f // vpextrb byte [r8 + rbx], xmm14, 15 - LONG $0x2c2444f6; BYTE $0x01 // test byte [rsp + 44], 1 - JNE LBB0_924 - -LBB0_499: - LONG $0x282444f6; BYTE $0x01 // test byte [rsp + 40], 1 - JE LBB0_500 - -LBB0_925: - LONG $0x16f963c4; WORD $0x01d3 // vpextrq rbx, xmm10, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x01 // vpextrb byte [r8 + rbx], xmm1, 1 - LONG $0x242444f6; BYTE $0x01 // test byte [rsp + 36], 1 - JNE LBB0_926 - -LBB0_501: - LONG $0x202444f6; BYTE $0x01 // test byte [rsp + 32], 1 - JE LBB0_502 - -LBB0_927: - LONG $0x397d63c4; WORD $0x01d1 // vextracti128 xmm1, ymm10, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x03 // vpextrb byte [r8 + rbx], xmm1, 3 - LONG $0x1c2444f6; BYTE $0x01 // test byte [rsp + 28], 1 - JNE LBB0_928 - -LBB0_503: - LONG $0x182444f6; BYTE $0x01 // test byte [rsp + 24], 1 - JE LBB0_504 - -LBB0_929: - LONG $0x16f963c4; WORD $0x01cb // vpextrq rbx, xmm9, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x05 // vpextrb byte [r8 + rbx], xmm1, 5 - LONG $0x142444f6; BYTE $0x01 // test byte [rsp + 20], 1 - JNE LBB0_930 - -LBB0_505: - LONG $0x01c1f641 // test r9b, 1 - JE LBB0_506 - -LBB0_931: - LONG $0x397d63c4; WORD $0x01c9 // vextracti128 xmm1, ymm9, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x07 // vpextrb byte [r8 + rbx], xmm1, 7 - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - QUAD $0x00000128249c8b48 // mov rbx, qword [rsp + 296] - JNE LBB0_932 - -LBB0_507: - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_508 - -LBB0_933: - LONG $0x16f963c4; WORD $0x01c1 // vpextrq rcx, xmm8, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb byte [r8 + rcx], xmm1, 9 - LONG $0x01c6f640 // test sil, 1 - QUAD $0x0000013024948b48 // mov rdx, qword [rsp + 304] - JNE LBB0_934 - -LBB0_509: - WORD $0x01a8 // test al, 1 - QUAD $0x0000009824b48b48 // mov rsi, qword [rsp + 152] - JE LBB0_510 - -LBB0_935: - LONG $0x397d63c4; WORD $0x01c1 // vextracti128 xmm1, ymm8, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb byte [r8 + rcx], xmm1, 11 - LONG $0x01c5f641 // test r13b, 1 - JNE LBB0_936 - -LBB0_511: - LONG $0x01c2f641 // test r10b, 1 - QUAD $0x0000011824ac8b4c // mov r13, qword [rsp + 280] - JE LBB0_512 - -LBB0_937: - LONG $0x16f9e3c4; WORD $0x01f9 // vpextrq rcx, xmm7, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb byte [r8 + rcx], xmm1, 13 - LONG $0x01c3f641 // test r11b, 1 - JNE LBB0_938 - -LBB0_513: - LONG $0x01c6f641 // test r14b, 1 - QUAD $0x0000012024848b48 // mov rax, qword [rsp + 288] - QUAD $0x000000e8248c8b4c // mov r9, qword [rsp + 232] - JE LBB0_515 - -LBB0_514: - LONG $0x397de3c4; WORD $0x01f9 // vextracti128 xmm1, ymm7, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb byte [r8 + rcx], xmm1, 15 - -LBB0_515: - QUAD $0x000260248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 608] - QUAD $0x00020024bceb75c5; BYTE $0x00 // vpor ymm15, ymm1, yword [rsp + 512] - QUAD $0x0001e024acebf5c5; BYTE $0x00 // vpor ymm5, ymm1, yword [rsp + 480] - QUAD $0x0001802494eb75c5; BYTE $0x00 // vpor ymm10, ymm1, yword [rsp + 384] - QUAD $0x000160248ceb75c5; BYTE $0x00 // vpor ymm9, ymm1, yword [rsp + 352] - QUAD $0x0001c024a4eb75c5; BYTE $0x00 // vpor ymm12, ymm1, yword [rsp + 448] - QUAD $0x0001a0249ceb75c5; BYTE $0x00 // vpor ymm11, ymm1, yword [rsp + 416] - QUAD $0x0001402484eb75c5; BYTE $0x00 // vpor ymm8, ymm1, yword [rsp + 320] - LONG $0xf9ebddc5 // vpor ymm7, ymm4, ymm1 - LONG $0x463de3c4; WORD $0x31cf // vperm2i128 ymm1, ymm8, ymm7, 49 - LONG $0x383de3c4; WORD $0x01d7 // vinserti128 ymm2, ymm8, xmm7, 1 - LONG $0xc9c6ecc5; BYTE $0x88 // vshufps ymm1, ymm2, ymm1, 136 - LONG $0x461dc3c4; WORD $0x31d3 // vperm2i128 ymm2, ymm12, ymm11, 49 - LONG $0x381dc3c4; WORD $0x01db // vinserti128 ymm3, ymm12, xmm11, 1 - LONG $0xd2c6e4c5; BYTE $0x88 // vshufps ymm2, ymm3, ymm2, 136 - LONG $0x462dc3c4; WORD $0x31d9 // vperm2i128 ymm3, ymm10, ymm9, 49 - LONG $0x382d43c4; WORD $0x01e9 // vinserti128 ymm13, ymm10, xmm9, 1 - LONG $0xdbc694c5; BYTE $0x88 // vshufps ymm3, ymm13, ymm3, 136 - LONG $0x460563c4; WORD $0x31ed // vperm2i128 ymm13, ymm15, ymm5, 49 - LONG $0x380563c4; WORD $0x01f5 // vinserti128 ymm14, ymm15, xmm5, 1 - LONG $0xc60c41c4; WORD $0x88ed // vshufps ymm13, ymm14, ymm13, 136 - LONG $0x667d41c4; BYTE $0xed // vpcmpgtd ymm13, ymm0, ymm13 - LONG $0xdb66fdc5 // vpcmpgtd ymm3, ymm0, ymm3 - LONG $0xdb6b95c5 // vpackssdw ymm3, ymm13, ymm3 - LONG $0xd266fdc5 // vpcmpgtd ymm2, ymm0, ymm2 - LONG $0xc966fdc5 // vpcmpgtd ymm1, ymm0, ymm1 - LONG $0xc96bedc5 // vpackssdw ymm1, ymm2, ymm1 - LONG $0x00fde3c4; WORD $0xd8d3 // vpermq ymm2, ymm3, 216 - LONG $0x00fde3c4; WORD $0xd8c9 // vpermq ymm1, ymm1, 216 - LONG $0xc963edc5 // vpacksswb ymm1, ymm2, ymm1 - LONG $0xf6dbf5c5 // vpand ymm6, ymm1, ymm6 - LONG $0xf17ef9c5 // vmovd ecx, xmm6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_516 - LONG $0x787d62c4; WORD $0x1734 // vpbroadcastb ymm14, byte [rdi + rdx] - LONG $0x1479e3c4; WORD $0x01f1 // vpextrb ecx, xmm6, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_940 - -LBB0_517: - LONG $0x24548b48; BYTE $0x68 // mov rdx, qword [rsp + 104] - LONG $0x1479e3c4; WORD $0x02f1 // vpextrb ecx, xmm6, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_519 - -LBB0_518: - LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x02 // vpinsrb xmm1, xmm14, byte [rdi + rbx], 2 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_519: - LONG $0x24748b48; BYTE $0x60 // mov rsi, qword [rsp + 96] - LONG $0x24548b4c; BYTE $0x48 // mov r10, qword [rsp + 72] - LONG $0x1479e3c4; WORD $0x03f1 // vpextrb ecx, xmm6, 3 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_520 - QUAD $0x00000110248c8b48 // mov rcx, qword [rsp + 272] - LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb xmm1, xmm14, byte [rdi + rcx], 3 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x04f1 // vpextrb ecx, xmm6, 4 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_942 - -LBB0_521: - LONG $0x1479e3c4; WORD $0x05f1 // vpextrb ecx, xmm6, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_522 - -LBB0_943: - LONG $0x2009e3c4; WORD $0x170c; BYTE $0x05 // vpinsrb xmm1, xmm14, byte [rdi + rdx], 5 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x06f1 // vpextrb ecx, xmm6, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_944 - -LBB0_523: - LONG $0x1479e3c4; WORD $0x07f1 // vpextrb ecx, xmm6, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_524 - -LBB0_945: - LONG $0x2009a3c4; WORD $0x0f0c; BYTE $0x07 // vpinsrb xmm1, xmm14, byte [rdi + r9], 7 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x08f1 // vpextrb ecx, xmm6, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_946 - -LBB0_525: - LONG $0x24548b48; BYTE $0x58 // mov rdx, qword [rsp + 88] - LONG $0x1479e3c4; WORD $0x09f1 // vpextrb ecx, xmm6, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_527 - -LBB0_526: - LONG $0x2009a3c4; WORD $0x3f0c; BYTE $0x09 // vpinsrb xmm1, xmm14, byte [rdi + r15], 9 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_527: - QUAD $0x0000009024848b48 // mov rax, qword [rsp + 144] - QUAD $0x0000008824b48b48 // mov rsi, qword [rsp + 136] - QUAD $0x00000080249c8b48 // mov rbx, qword [rsp + 128] - LONG $0x244c8b4c; BYTE $0x78 // mov r9, qword [rsp + 120] - LONG $0x1479e3c4; WORD $0x0af1 // vpextrb ecx, xmm6, 10 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_528 - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb xmm1, xmm14, byte [rdi + rax], 10 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb ecx, xmm6, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_948 - -LBB0_529: - LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb ecx, xmm6, 12 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_530 - -LBB0_949: - QUAD $0x000000f824848b48 // mov rax, qword [rsp + 248] - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0c // vpinsrb xmm1, xmm14, byte [rdi + rax], 12 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0df1 // vpextrb ecx, xmm6, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_531 - JMP LBB0_532 - -LBB0_516: - LONG $0x1479e3c4; WORD $0x01f1 // vpextrb ecx, xmm6, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_517 - -LBB0_940: - LONG $0x2009e3c4; WORD $0x370c; BYTE $0x01 // vpinsrb xmm1, xmm14, byte [rdi + rsi], 1 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x24548b48; BYTE $0x68 // mov rdx, qword [rsp + 104] - LONG $0x1479e3c4; WORD $0x02f1 // vpextrb ecx, xmm6, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_518 - JMP LBB0_519 - -LBB0_520: - LONG $0x1479e3c4; WORD $0x04f1 // vpextrb ecx, xmm6, 4 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_521 - -LBB0_942: - QUAD $0x00000108248c8b48 // mov rcx, qword [rsp + 264] - LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x04 // vpinsrb xmm1, xmm14, byte [rdi + rcx], 4 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x05f1 // vpextrb ecx, xmm6, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_943 - -LBB0_522: - LONG $0x1479e3c4; WORD $0x06f1 // vpextrb ecx, xmm6, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_523 - -LBB0_944: - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x06 // vpinsrb xmm1, xmm14, byte [rdi + rax], 6 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x07f1 // vpextrb ecx, xmm6, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_945 - -LBB0_524: - LONG $0x1479e3c4; WORD $0x08f1 // vpextrb ecx, xmm6, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_525 - -LBB0_946: - LONG $0x2009e3c4; WORD $0x370c; BYTE $0x08 // vpinsrb xmm1, xmm14, byte [rdi + rsi], 8 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x24548b48; BYTE $0x58 // mov rdx, qword [rsp + 88] - LONG $0x1479e3c4; WORD $0x09f1 // vpextrb ecx, xmm6, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_526 - JMP LBB0_527 - -LBB0_528: - LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb ecx, xmm6, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_529 - -LBB0_948: - QUAD $0x0000010024848b48 // mov rax, qword [rsp + 256] - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0b // vpinsrb xmm1, xmm14, byte [rdi + rax], 11 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb ecx, xmm6, 12 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_949 - -LBB0_530: - LONG $0x1479e3c4; WORD $0x0df1 // vpextrb ecx, xmm6, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_532 - -LBB0_531: - LONG $0x2009e3c4; WORD $0x170c; BYTE $0x0d // vpinsrb xmm1, xmm14, byte [rdi + rdx], 13 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_532: - LONG $0x24448b48; BYTE $0x50 // mov rax, qword [rsp + 80] - LONG $0x24548b48; BYTE $0x40 // mov rdx, qword [rsp + 64] - LONG $0x1479e3c4; WORD $0x0ef1 // vpextrb ecx, xmm6, 14 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_534 - LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0e // vpinsrb xmm1, xmm14, byte [rdi + rax], 14 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_534: - LONG $0x1479e3c4; WORD $0x0ff1 // vpextrb ecx, xmm6, 15 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_536 - LONG $0x2009a3c4; WORD $0x170c; BYTE $0x0f // vpinsrb xmm1, xmm14, byte [rdi + r10], 15 - LONG $0x020d63c4; WORD $0x0ff1 // vpblendd ymm14, ymm14, ymm1, 15 - -LBB0_536: - LONG $0x397de3c4; WORD $0x01f1 // vextracti128 xmm1, ymm6, 1 - LONG $0xc87ef9c5 // vmovd eax, xmm1 - LONG $0x2c244489 // mov dword [rsp + 44], eax - WORD $0x01a8 // test al, 1 - JE LBB0_538 - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x1714; BYTE $0x00 // vpinsrb xmm2, xmm2, byte [rdi + rdx], 0 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - -LBB0_538: - LONG $0x24448b48; BYTE $0x38 // mov rax, qword [rsp + 56] - LONG $0x1479e3c4; WORD $0x01c9 // vpextrb ecx, xmm1, 1 - LONG $0x28244c89 // mov dword [rsp + 40], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_539 - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x3714; BYTE $0x01 // vpinsrb xmm2, xmm2, byte [rdi + rsi], 1 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x02c9 // vpextrb ecx, xmm1, 2 - LONG $0x24244c89 // mov dword [rsp + 36], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_951 - -LBB0_540: - LONG $0x1479e3c4; WORD $0x03c9 // vpextrb ecx, xmm1, 3 - LONG $0x20244c89 // mov dword [rsp + 32], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_541 - -LBB0_952: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069a3c4; WORD $0x0f14; BYTE $0x03 // vpinsrb xmm2, xmm2, byte [rdi + r9], 3 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x04c9 // vpextrb ecx, xmm1, 4 - LONG $0x1c244c89 // mov dword [rsp + 28], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_953 - -LBB0_542: - LONG $0x1479e3c4; WORD $0x05c8 // vpextrb eax, xmm1, 5 - LONG $0x18244489 // mov dword [rsp + 24], eax - WORD $0x01a8 // test al, 1 - JE LBB0_544 - -LBB0_543: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069a3c4; WORD $0x2f14; BYTE $0x05 // vpinsrb xmm2, xmm2, byte [rdi + r13], 5 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - -LBB0_544: - LONG $0x24448b48; BYTE $0x70 // mov rax, qword [rsp + 112] - LONG $0x1479e3c4; WORD $0x06c9 // vpextrb ecx, xmm1, 6 - LONG $0x14244c89 // mov dword [rsp + 20], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_545 - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x06 // vpinsrb xmm2, xmm2, byte [rdi + rax], 6 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x07c9 // vpextrb r9d, xmm1, 7 - LONG $0x01c1f641 // test r9b, 1 - JNE LBB0_955 - -LBB0_546: - LONG $0x1479e3c4; WORD $0x08ca // vpextrb edx, xmm1, 8 - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - JE LBB0_547 - -LBB0_956: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000d824848b48 // mov rax, qword [rsp + 216] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x08 // vpinsrb xmm2, xmm2, byte [rdi + rax], 8 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x09c9 // vpextrb ecx, xmm1, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_957 - -LBB0_548: - LONG $0x1479e3c4; WORD $0x0ace // vpextrb esi, xmm1, 10 - LONG $0x01c6f640 // test sil, 1 - JE LBB0_549 - -LBB0_958: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000c824848b48 // mov rax, qword [rsp + 200] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x0a // vpinsrb xmm2, xmm2, byte [rdi + rax], 10 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb eax, xmm1, 11 - WORD $0x01a8 // test al, 1 - JNE LBB0_959 - -LBB0_550: - LONG $0x1479c3c4; WORD $0x0ccd // vpextrb r13d, xmm1, 12 - LONG $0x01c5f641 // test r13b, 1 - JE LBB0_551 - -LBB0_960: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000b8249c8b48 // mov rbx, qword [rsp + 184] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0c // vpinsrb xmm2, xmm2, byte [rdi + rbx], 12 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0dca // vpextrb r10d, xmm1, 13 - LONG $0x01c2f641 // test r10b, 1 - JNE LBB0_961 - -LBB0_552: - LONG $0x1479c3c4; WORD $0x0ecb // vpextrb r11d, xmm1, 14 - LONG $0x01c3f641 // test r11b, 1 - JE LBB0_553 - -LBB0_962: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000a8249c8b48 // mov rbx, qword [rsp + 168] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0e // vpinsrb xmm2, xmm2, byte [rdi + rbx], 14 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0fce // vpextrb r14d, xmm1, 15 - LONG $0x01c6f641 // test r14b, 1 - JNE LBB0_554 - JMP LBB0_555 - -LBB0_539: - LONG $0x1479e3c4; WORD $0x02c9 // vpextrb ecx, xmm1, 2 - LONG $0x24244c89 // mov dword [rsp + 36], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_540 - -LBB0_951: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x02 // vpinsrb xmm2, xmm2, byte [rdi + rbx], 2 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x03c9 // vpextrb ecx, xmm1, 3 - LONG $0x20244c89 // mov dword [rsp + 32], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_952 - -LBB0_541: - LONG $0x1479e3c4; WORD $0x04c9 // vpextrb ecx, xmm1, 4 - LONG $0x1c244c89 // mov dword [rsp + 28], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_542 - -LBB0_953: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x04 // vpinsrb xmm2, xmm2, byte [rdi + rax], 4 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x05c8 // vpextrb eax, xmm1, 5 - LONG $0x18244489 // mov dword [rsp + 24], eax - WORD $0x01a8 // test al, 1 - JNE LBB0_543 - JMP LBB0_544 - -LBB0_545: - LONG $0x1479c3c4; WORD $0x07c9 // vpextrb r9d, xmm1, 7 - LONG $0x01c1f641 // test r9b, 1 - JE LBB0_546 - -LBB0_955: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000f024848b48 // mov rax, qword [rsp + 240] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x07 // vpinsrb xmm2, xmm2, byte [rdi + rax], 7 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x08ca // vpextrb edx, xmm1, 8 - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - JNE LBB0_956 - -LBB0_547: - LONG $0x1479e3c4; WORD $0x09c9 // vpextrb ecx, xmm1, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_548 - -LBB0_957: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000d024848b48 // mov rax, qword [rsp + 208] - LONG $0x2069e3c4; WORD $0x0714; BYTE $0x09 // vpinsrb xmm2, xmm2, byte [rdi + rax], 9 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479e3c4; WORD $0x0ace // vpextrb esi, xmm1, 10 - LONG $0x01c6f640 // test sil, 1 - JNE LBB0_958 - -LBB0_549: - LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb eax, xmm1, 11 - WORD $0x01a8 // test al, 1 - JE LBB0_550 - -LBB0_959: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000c0249c8b48 // mov rbx, qword [rsp + 192] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0b // vpinsrb xmm2, xmm2, byte [rdi + rbx], 11 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0ccd // vpextrb r13d, xmm1, 12 - LONG $0x01c5f641 // test r13b, 1 - JNE LBB0_960 - -LBB0_551: - LONG $0x1479c3c4; WORD $0x0dca // vpextrb r10d, xmm1, 13 - LONG $0x01c2f641 // test r10b, 1 - JE LBB0_552 - -LBB0_961: - LONG $0x397d63c4; WORD $0x01f2 // vextracti128 xmm2, ymm14, 1 - QUAD $0x000000b0249c8b48 // mov rbx, qword [rsp + 176] - LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0d // vpinsrb xmm2, xmm2, byte [rdi + rbx], 13 - LONG $0x380d63c4; WORD $0x01f2 // vinserti128 ymm14, ymm14, xmm2, 1 - LONG $0x1479c3c4; WORD $0x0ecb // vpextrb r11d, xmm1, 14 - LONG $0x01c3f641 // test r11b, 1 - JNE LBB0_962 - -LBB0_553: - LONG $0x1479c3c4; WORD $0x0fce // vpextrb r14d, xmm1, 15 - LONG $0x01c6f641 // test r14b, 1 - JE LBB0_555 - -LBB0_554: - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - QUAD $0x000000a0249c8b48 // mov rbx, qword [rsp + 160] - LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x0f // vpinsrb xmm1, xmm1, byte [rdi + rbx], 15 - LONG $0x380d63c4; WORD $0x01f1 // vinserti128 ymm14, ymm14, xmm1, 1 - -LBB0_555: - LONG $0x7175c1c4; WORD $0x06d6 // vpsrlw ymm1, ymm14, 6 - QUAD $0x00000080b5db75c5 // vpand ymm14, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */ - LONG $0x7e79c1c4; BYTE $0xf7 // vmovd r15d, xmm6 - LONG $0x01c7f641 // test r15b, 1 - JE LBB0_556 - LONG $0x7ef961c4; BYTE $0xfb // vmovq rbx, xmm15 - LONG $0x147943c4; WORD $0x1834; BYTE $0x00 // vpextrb byte [r8 + rbx], xmm14, 0 - LONG $0x1479e3c4; WORD $0x01f3 // vpextrb ebx, xmm6, 1 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_964 - -LBB0_557: - LONG $0x1479e3c4; WORD $0x02f3 // vpextrb ebx, xmm6, 2 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - QUAD $0x000000e024bc8b4c // mov r15, qword [rsp + 224] - JE LBB0_558 - -LBB0_965: - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x02 // vpextrb byte [r8 + rbx], xmm14, 2 - LONG $0x1479e3c4; WORD $0x03f3 // vpextrb ebx, xmm6, 3 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_966 - -LBB0_559: - LONG $0x1479e3c4; WORD $0x04f3 // vpextrb ebx, xmm6, 4 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_560 - -LBB0_967: - LONG $0x7ef9e1c4; BYTE $0xeb // vmovq rbx, xmm5 - LONG $0x147943c4; WORD $0x1834; BYTE $0x04 // vpextrb byte [r8 + rbx], xmm14, 4 - LONG $0x1479e3c4; WORD $0x05f3 // vpextrb ebx, xmm6, 5 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_968 - -LBB0_561: - LONG $0x1479e3c4; WORD $0x06f3 // vpextrb ebx, xmm6, 6 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_562 - -LBB0_969: - LONG $0x397de3c4; WORD $0x01e9 // vextracti128 xmm1, ymm5, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x06 // vpextrb byte [r8 + rbx], xmm14, 6 - LONG $0x1479e3c4; WORD $0x07f3 // vpextrb ebx, xmm6, 7 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_970 - -LBB0_563: - LONG $0x1479e3c4; WORD $0x08f3 // vpextrb ebx, xmm6, 8 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_564 - -LBB0_971: - LONG $0x7ef961c4; BYTE $0xe3 // vmovq rbx, xmm12 - LONG $0x147943c4; WORD $0x1834; BYTE $0x08 // vpextrb byte [r8 + rbx], xmm14, 8 - LONG $0x1479e3c4; WORD $0x09f3 // vpextrb ebx, xmm6, 9 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_972 - -LBB0_565: - LONG $0x1479e3c4; WORD $0x0af3 // vpextrb ebx, xmm6, 10 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_566 - -LBB0_973: - LONG $0x397d63c4; WORD $0x01e1 // vextracti128 xmm1, ymm12, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0a // vpextrb byte [r8 + rbx], xmm14, 10 - LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb ebx, xmm6, 11 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_974 - -LBB0_567: - LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb ebx, xmm6, 12 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_568 - -LBB0_975: - LONG $0x7ef961c4; BYTE $0xdb // vmovq rbx, xmm11 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0c // vpextrb byte [r8 + rbx], xmm14, 12 - LONG $0x1479e3c4; WORD $0x0df3 // vpextrb ebx, xmm6, 13 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_976 - -LBB0_569: - LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb ebx, xmm6, 14 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_570 - -LBB0_977: - LONG $0x397d63c4; WORD $0x01d9 // vextracti128 xmm1, ymm11, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0e // vpextrb byte [r8 + rbx], xmm14, 14 - LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb ebx, xmm6, 15 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_978 - -LBB0_571: - LONG $0x2c2444f6; BYTE $0x01 // test byte [rsp + 44], 1 - JE LBB0_572 - -LBB0_979: - LONG $0x7ef961c4; BYTE $0xd3 // vmovq rbx, xmm10 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x00 // vpextrb byte [r8 + rbx], xmm1, 0 - LONG $0x282444f6; BYTE $0x01 // test byte [rsp + 40], 1 - JNE LBB0_980 - -LBB0_573: - LONG $0x242444f6; BYTE $0x01 // test byte [rsp + 36], 1 - JE LBB0_574 - -LBB0_981: - LONG $0x397d63c4; WORD $0x01d1 // vextracti128 xmm1, ymm10, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x02 // vpextrb byte [r8 + rbx], xmm1, 2 - LONG $0x202444f6; BYTE $0x01 // test byte [rsp + 32], 1 - JNE LBB0_982 - -LBB0_575: - LONG $0x1c2444f6; BYTE $0x01 // test byte [rsp + 28], 1 - JE LBB0_576 - -LBB0_983: - LONG $0x7ef961c4; BYTE $0xcb // vmovq rbx, xmm9 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x04 // vpextrb byte [r8 + rbx], xmm1, 4 - LONG $0x182444f6; BYTE $0x01 // test byte [rsp + 24], 1 - JNE LBB0_984 - -LBB0_577: - LONG $0x142444f6; BYTE $0x01 // test byte [rsp + 20], 1 - JE LBB0_578 - -LBB0_985: - LONG $0x397d63c4; WORD $0x01c9 // vextracti128 xmm1, ymm9, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x06 // vpextrb byte [r8 + rbx], xmm1, 6 - LONG $0x01c1f641 // test r9b, 1 - JNE LBB0_986 - -LBB0_579: - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - QUAD $0x00000128249c8b48 // mov rbx, qword [rsp + 296] - JE LBB0_580 - -LBB0_987: - LONG $0x7ef961c4; BYTE $0xc2 // vmovq rdx, xmm8 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x100c; BYTE $0x08 // vpextrb byte [r8 + rdx], xmm1, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_988 - -LBB0_581: - LONG $0x01c6f640 // test sil, 1 - QUAD $0x0000013024948b48 // mov rdx, qword [rsp + 304] - JE LBB0_582 - -LBB0_989: - LONG $0x397d63c4; WORD $0x01c1 // vextracti128 xmm1, ymm8, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb byte [r8 + rcx], xmm1, 10 - WORD $0x01a8 // test al, 1 - QUAD $0x0000009824b48b48 // mov rsi, qword [rsp + 152] - JNE LBB0_990 - -LBB0_583: - LONG $0x01c5f641 // test r13b, 1 - JE LBB0_584 - -LBB0_991: - LONG $0x7ef9e1c4; BYTE $0xf9 // vmovq rcx, xmm7 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb byte [r8 + rcx], xmm1, 12 - LONG $0x01c2f641 // test r10b, 1 - QUAD $0x0000011824ac8b4c // mov r13, qword [rsp + 280] - JNE LBB0_992 - -LBB0_585: - LONG $0x01c3f641 // test r11b, 1 - JE LBB0_586 - -LBB0_993: - LONG $0x397de3c4; WORD $0x01f9 // vextracti128 xmm1, ymm7, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb byte [r8 + rcx], xmm1, 14 - LONG $0x01c6f641 // test r14b, 1 - QUAD $0x0000012024848b48 // mov rax, qword [rsp + 288] - QUAD $0x000000e8248c8b4c // mov r9, qword [rsp + 232] - JNE LBB0_587 - JMP LBB0_588 - -LBB0_556: - LONG $0x1479e3c4; WORD $0x01f3 // vpextrb ebx, xmm6, 1 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_557 - -LBB0_964: - LONG $0x16f963c4; WORD $0x01fb // vpextrq rbx, xmm15, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x01 // vpextrb byte [r8 + rbx], xmm14, 1 - LONG $0x1479e3c4; WORD $0x02f3 // vpextrb ebx, xmm6, 2 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - QUAD $0x000000e024bc8b4c // mov r15, qword [rsp + 224] - JNE LBB0_965 - -LBB0_558: - LONG $0x1479e3c4; WORD $0x03f3 // vpextrb ebx, xmm6, 3 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_559 - -LBB0_966: - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x03 // vpextrb byte [r8 + rbx], xmm14, 3 - LONG $0x1479e3c4; WORD $0x04f3 // vpextrb ebx, xmm6, 4 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_967 - -LBB0_560: - LONG $0x1479e3c4; WORD $0x05f3 // vpextrb ebx, xmm6, 5 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_561 - -LBB0_968: - LONG $0x16f9e3c4; WORD $0x01eb // vpextrq rbx, xmm5, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x05 // vpextrb byte [r8 + rbx], xmm14, 5 - LONG $0x1479e3c4; WORD $0x06f3 // vpextrb ebx, xmm6, 6 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_969 - -LBB0_562: - LONG $0x1479e3c4; WORD $0x07f3 // vpextrb ebx, xmm6, 7 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_563 - -LBB0_970: - LONG $0x397de3c4; WORD $0x01e9 // vextracti128 xmm1, ymm5, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x07 // vpextrb byte [r8 + rbx], xmm14, 7 - LONG $0x1479e3c4; WORD $0x08f3 // vpextrb ebx, xmm6, 8 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_971 - -LBB0_564: - LONG $0x1479e3c4; WORD $0x09f3 // vpextrb ebx, xmm6, 9 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_565 - -LBB0_972: - LONG $0x16f963c4; WORD $0x01e3 // vpextrq rbx, xmm12, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x09 // vpextrb byte [r8 + rbx], xmm14, 9 - LONG $0x1479e3c4; WORD $0x0af3 // vpextrb ebx, xmm6, 10 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_973 - -LBB0_566: - LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb ebx, xmm6, 11 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_567 - -LBB0_974: - LONG $0x397d63c4; WORD $0x01e1 // vextracti128 xmm1, ymm12, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0b // vpextrb byte [r8 + rbx], xmm14, 11 - LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb ebx, xmm6, 12 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_975 - -LBB0_568: - LONG $0x1479e3c4; WORD $0x0df3 // vpextrb ebx, xmm6, 13 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_569 - -LBB0_976: - LONG $0x16f963c4; WORD $0x01db // vpextrq rbx, xmm11, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0d // vpextrb byte [r8 + rbx], xmm14, 13 - LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb ebx, xmm6, 14 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_977 - -LBB0_570: - LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb ebx, xmm6, 15 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_571 - -LBB0_978: - LONG $0x397d63c4; WORD $0x01d9 // vextracti128 xmm1, ymm11, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x147943c4; WORD $0x1834; BYTE $0x0f // vpextrb byte [r8 + rbx], xmm14, 15 - LONG $0x2c2444f6; BYTE $0x01 // test byte [rsp + 44], 1 - JNE LBB0_979 - -LBB0_572: - LONG $0x282444f6; BYTE $0x01 // test byte [rsp + 40], 1 - JE LBB0_573 - -LBB0_980: - LONG $0x16f963c4; WORD $0x01d3 // vpextrq rbx, xmm10, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x01 // vpextrb byte [r8 + rbx], xmm1, 1 - LONG $0x242444f6; BYTE $0x01 // test byte [rsp + 36], 1 - JNE LBB0_981 - -LBB0_574: - LONG $0x202444f6; BYTE $0x01 // test byte [rsp + 32], 1 - JE LBB0_575 - -LBB0_982: - LONG $0x397d63c4; WORD $0x01d1 // vextracti128 xmm1, ymm10, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x03 // vpextrb byte [r8 + rbx], xmm1, 3 - LONG $0x1c2444f6; BYTE $0x01 // test byte [rsp + 28], 1 - JNE LBB0_983 - -LBB0_576: - LONG $0x182444f6; BYTE $0x01 // test byte [rsp + 24], 1 - JE LBB0_577 - -LBB0_984: - LONG $0x16f963c4; WORD $0x01cb // vpextrq rbx, xmm9, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x05 // vpextrb byte [r8 + rbx], xmm1, 5 - LONG $0x142444f6; BYTE $0x01 // test byte [rsp + 20], 1 - JNE LBB0_985 - -LBB0_578: - LONG $0x01c1f641 // test r9b, 1 - JE LBB0_579 - -LBB0_986: - LONG $0x397d63c4; WORD $0x01c9 // vextracti128 xmm1, ymm9, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x07 // vpextrb byte [r8 + rbx], xmm1, 7 - WORD $0xc2f6; BYTE $0x01 // test dl, 1 - QUAD $0x00000128249c8b48 // mov rbx, qword [rsp + 296] - JNE LBB0_987 - -LBB0_580: - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_581 - -LBB0_988: - LONG $0x16f963c4; WORD $0x01c1 // vpextrq rcx, xmm8, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb byte [r8 + rcx], xmm1, 9 - LONG $0x01c6f640 // test sil, 1 - QUAD $0x0000013024948b48 // mov rdx, qword [rsp + 304] - JNE LBB0_989 - -LBB0_582: - WORD $0x01a8 // test al, 1 - QUAD $0x0000009824b48b48 // mov rsi, qword [rsp + 152] - JE LBB0_583 - -LBB0_990: - LONG $0x397d63c4; WORD $0x01c1 // vextracti128 xmm1, ymm8, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb byte [r8 + rcx], xmm1, 11 - LONG $0x01c5f641 // test r13b, 1 - JNE LBB0_991 - -LBB0_584: - LONG $0x01c2f641 // test r10b, 1 - QUAD $0x0000011824ac8b4c // mov r13, qword [rsp + 280] - JE LBB0_585 - -LBB0_992: - LONG $0x16f9e3c4; WORD $0x01f9 // vpextrq rcx, xmm7, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb byte [r8 + rcx], xmm1, 13 - LONG $0x01c3f641 // test r11b, 1 - JNE LBB0_993 - -LBB0_586: - LONG $0x01c6f641 // test r14b, 1 - QUAD $0x0000012024848b48 // mov rax, qword [rsp + 288] - QUAD $0x000000e8248c8b4c // mov r9, qword [rsp + 232] - JE LBB0_588 - -LBB0_587: - LONG $0x397de3c4; WORD $0x01f9 // vextracti128 xmm1, ymm7, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397d63c4; WORD $0x01f1 // vextracti128 xmm1, ymm14, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb byte [r8 + rcx], xmm1, 15 - -LBB0_588: - QUAD $0x000240248c6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword [rsp + 576] - QUAD $0x000200249ceb75c5; BYTE $0x00 // vpor ymm11, ymm1, yword [rsp + 512] - QUAD $0x0001e02494eb75c5; BYTE $0x00 // vpor ymm10, ymm1, yword [rsp + 480] - QUAD $0x0001802484eb75c5; BYTE $0x00 // vpor ymm8, ymm1, yword [rsp + 384] - QUAD $0x00016024bcebf5c5; BYTE $0x00 // vpor ymm7, ymm1, yword [rsp + 352] - QUAD $0x0001c0248ceb75c5; BYTE $0x00 // vpor ymm9, ymm1, yword [rsp + 448] - QUAD $0x0001a024acebf5c5; BYTE $0x00 // vpor ymm5, ymm1, yword [rsp + 416] - QUAD $0x0001402494ebf5c5; BYTE $0x00 // vpor ymm2, ymm1, yword [rsp + 320] - LONG $0xf9eb5dc5 // vpor ymm15, ymm4, ymm1 - LONG $0x466dc3c4; WORD $0x31df // vperm2i128 ymm3, ymm2, ymm15, 49 - LONG $0x386dc3c4; WORD $0x01e7 // vinserti128 ymm4, ymm2, xmm15, 1 - LONG $0xdbc6dcc5; BYTE $0x88 // vshufps ymm3, ymm4, ymm3, 136 - LONG $0x4635e3c4; WORD $0x31e5 // vperm2i128 ymm4, ymm9, ymm5, 49 - LONG $0x383563c4; WORD $0x01e5 // vinserti128 ymm12, ymm9, xmm5, 1 - LONG $0xe4c69cc5; BYTE $0x88 // vshufps ymm4, ymm12, ymm4, 136 - LONG $0x463d63c4; WORD $0x31e7 // vperm2i128 ymm12, ymm8, ymm7, 49 - LONG $0x383d63c4; WORD $0x01ef // vinserti128 ymm13, ymm8, xmm7, 1 - LONG $0xc61441c4; WORD $0x88e4 // vshufps ymm12, ymm13, ymm12, 136 - LONG $0x462543c4; WORD $0x31ea // vperm2i128 ymm13, ymm11, ymm10, 49 - LONG $0x382543c4; WORD $0x01f2 // vinserti128 ymm14, ymm11, xmm10, 1 - LONG $0xc60c41c4; WORD $0x88ed // vshufps ymm13, ymm14, ymm13, 136 - LONG $0x667d41c4; BYTE $0xed // vpcmpgtd ymm13, ymm0, ymm13 - LONG $0x667d41c4; BYTE $0xe4 // vpcmpgtd ymm12, ymm0, ymm12 - LONG $0x6b1541c4; BYTE $0xe4 // vpackssdw ymm12, ymm13, ymm12 - LONG $0x00fd43c4; WORD $0xd8e4 // vpermq ymm12, ymm12, 216 - LONG $0xe466fdc5 // vpcmpgtd ymm4, ymm0, ymm4 - LONG $0xdb66fdc5 // vpcmpgtd ymm3, ymm0, ymm3 - LONG $0xdb6bddc5 // vpackssdw ymm3, ymm4, ymm3 - LONG $0x00fde3c4; WORD $0xd8db // vpermq ymm3, ymm3, 216 - LONG $0xdb639dc5 // vpacksswb ymm3, ymm12, ymm3 - LONG $0xdedbe5c5 // vpand ymm3, ymm3, ymm6 - LONG $0xd97ef9c5 // vmovd ecx, xmm3 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_589 - LONG $0x787de2c4; WORD $0x1724 // vpbroadcastb ymm4, byte [rdi + rdx] - LONG $0x1479e3c4; WORD $0x01d9 // vpextrb ecx, xmm3, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_995 - -LBB0_590: - LONG $0x24548b48; BYTE $0x68 // mov rdx, qword [rsp + 104] - LONG $0x1479e3c4; WORD $0x02d9 // vpextrb ecx, xmm3, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_592 - -LBB0_591: - LONG $0x2059e3c4; WORD $0x1f34; BYTE $0x02 // vpinsrb xmm6, xmm4, byte [rdi + rbx], 2 - LONG $0x025de3c4; WORD $0x0fe6 // vpblendd ymm4, ymm4, ymm6, 15 - -LBB0_592: - LONG $0x24748b48; BYTE $0x60 // mov rsi, qword [rsp + 96] - LONG $0x24548b4c; BYTE $0x48 // mov r10, qword [rsp + 72] - LONG $0x1479e3c4; WORD $0x03d9 // vpextrb ecx, xmm3, 3 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_593 - QUAD $0x00000110248c8b48 // mov rcx, qword [rsp + 272] - LONG $0x2059e3c4; WORD $0x0f34; BYTE $0x03 // vpinsrb xmm6, xmm4, byte [rdi + rcx], 3 - LONG $0x025de3c4; WORD $0x0fe6 // vpblendd ymm4, ymm4, ymm6, 15 - LONG $0x1479e3c4; WORD $0x04d9 // vpextrb ecx, xmm3, 4 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_997 - -LBB0_594: - LONG $0x1479e3c4; WORD $0x05d9 // vpextrb ecx, xmm3, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_595 - -LBB0_998: - LONG $0x2059e3c4; WORD $0x1734; BYTE $0x05 // vpinsrb xmm6, xmm4, byte [rdi + rdx], 5 - LONG $0x025de3c4; WORD $0x0fe6 // vpblendd ymm4, ymm4, ymm6, 15 - LONG $0x1479e3c4; WORD $0x06d9 // vpextrb ecx, xmm3, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_999 - -LBB0_596: - LONG $0x1479e3c4; WORD $0x07d9 // vpextrb ecx, xmm3, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_597 - -LBB0_1000: - LONG $0x2059a3c4; WORD $0x0f34; BYTE $0x07 // vpinsrb xmm6, xmm4, byte [rdi + r9], 7 - LONG $0x025de3c4; WORD $0x0fe6 // vpblendd ymm4, ymm4, ymm6, 15 - LONG $0x1479e3c4; WORD $0x08d9 // vpextrb ecx, xmm3, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_1001 - -LBB0_598: - LONG $0x24548b48; BYTE $0x58 // mov rdx, qword [rsp + 88] - LONG $0x1479e3c4; WORD $0x09d9 // vpextrb ecx, xmm3, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_600 - -LBB0_599: - LONG $0x2059a3c4; WORD $0x3f34; BYTE $0x09 // vpinsrb xmm6, xmm4, byte [rdi + r15], 9 - LONG $0x025de3c4; WORD $0x0fe6 // vpblendd ymm4, ymm4, ymm6, 15 - -LBB0_600: - QUAD $0x0000009024848b48 // mov rax, qword [rsp + 144] - QUAD $0x0000008824b48b48 // mov rsi, qword [rsp + 136] - QUAD $0x00000080249c8b48 // mov rbx, qword [rsp + 128] - LONG $0x244c8b4c; BYTE $0x78 // mov r9, qword [rsp + 120] - LONG $0x1479e3c4; WORD $0x0ad9 // vpextrb ecx, xmm3, 10 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_601 - LONG $0x2059e3c4; WORD $0x0734; BYTE $0x0a // vpinsrb xmm6, xmm4, byte [rdi + rax], 10 - LONG $0x025de3c4; WORD $0x0fe6 // vpblendd ymm4, ymm4, ymm6, 15 - LONG $0x1479e3c4; WORD $0x0bd9 // vpextrb ecx, xmm3, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_1003 - -LBB0_602: - LONG $0x1479e3c4; WORD $0x0cd9 // vpextrb ecx, xmm3, 12 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_603 - -LBB0_1004: - QUAD $0x000000f824848b48 // mov rax, qword [rsp + 248] - LONG $0x2059e3c4; WORD $0x0734; BYTE $0x0c // vpinsrb xmm6, xmm4, byte [rdi + rax], 12 - LONG $0x025de3c4; WORD $0x0fe6 // vpblendd ymm4, ymm4, ymm6, 15 - LONG $0x1479e3c4; WORD $0x0dd9 // vpextrb ecx, xmm3, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_604 - JMP LBB0_605 - -LBB0_589: - LONG $0x1479e3c4; WORD $0x01d9 // vpextrb ecx, xmm3, 1 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_590 - -LBB0_995: - LONG $0x2059e3c4; WORD $0x3734; BYTE $0x01 // vpinsrb xmm6, xmm4, byte [rdi + rsi], 1 - LONG $0x025de3c4; WORD $0x0fe6 // vpblendd ymm4, ymm4, ymm6, 15 - LONG $0x24548b48; BYTE $0x68 // mov rdx, qword [rsp + 104] - LONG $0x1479e3c4; WORD $0x02d9 // vpextrb ecx, xmm3, 2 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_591 - JMP LBB0_592 - -LBB0_593: - LONG $0x1479e3c4; WORD $0x04d9 // vpextrb ecx, xmm3, 4 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_594 - -LBB0_997: - QUAD $0x00000108248c8b48 // mov rcx, qword [rsp + 264] - LONG $0x2059e3c4; WORD $0x0f34; BYTE $0x04 // vpinsrb xmm6, xmm4, byte [rdi + rcx], 4 - LONG $0x025de3c4; WORD $0x0fe6 // vpblendd ymm4, ymm4, ymm6, 15 - LONG $0x1479e3c4; WORD $0x05d9 // vpextrb ecx, xmm3, 5 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_998 - -LBB0_595: - LONG $0x1479e3c4; WORD $0x06d9 // vpextrb ecx, xmm3, 6 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_596 - -LBB0_999: - LONG $0x2059e3c4; WORD $0x0734; BYTE $0x06 // vpinsrb xmm6, xmm4, byte [rdi + rax], 6 - LONG $0x025de3c4; WORD $0x0fe6 // vpblendd ymm4, ymm4, ymm6, 15 - LONG $0x1479e3c4; WORD $0x07d9 // vpextrb ecx, xmm3, 7 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_1000 - -LBB0_597: - LONG $0x1479e3c4; WORD $0x08d9 // vpextrb ecx, xmm3, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_598 - -LBB0_1001: - LONG $0x2059e3c4; WORD $0x3734; BYTE $0x08 // vpinsrb xmm6, xmm4, byte [rdi + rsi], 8 - LONG $0x025de3c4; WORD $0x0fe6 // vpblendd ymm4, ymm4, ymm6, 15 - LONG $0x24548b48; BYTE $0x58 // mov rdx, qword [rsp + 88] - LONG $0x1479e3c4; WORD $0x09d9 // vpextrb ecx, xmm3, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_599 - JMP LBB0_600 - -LBB0_601: - LONG $0x1479e3c4; WORD $0x0bd9 // vpextrb ecx, xmm3, 11 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_602 - -LBB0_1003: - QUAD $0x0000010024848b48 // mov rax, qword [rsp + 256] - LONG $0x2059e3c4; WORD $0x0734; BYTE $0x0b // vpinsrb xmm6, xmm4, byte [rdi + rax], 11 - LONG $0x025de3c4; WORD $0x0fe6 // vpblendd ymm4, ymm4, ymm6, 15 - LONG $0x1479e3c4; WORD $0x0cd9 // vpextrb ecx, xmm3, 12 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_1004 - -LBB0_603: - LONG $0x1479e3c4; WORD $0x0dd9 // vpextrb ecx, xmm3, 13 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_605 - -LBB0_604: - LONG $0x2059e3c4; WORD $0x1734; BYTE $0x0d // vpinsrb xmm6, xmm4, byte [rdi + rdx], 13 - LONG $0x025de3c4; WORD $0x0fe6 // vpblendd ymm4, ymm4, ymm6, 15 - -LBB0_605: - LONG $0x24448b48; BYTE $0x50 // mov rax, qword [rsp + 80] - LONG $0x24548b48; BYTE $0x40 // mov rdx, qword [rsp + 64] - LONG $0x1479e3c4; WORD $0x0ed9 // vpextrb ecx, xmm3, 14 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_607 - LONG $0x2059e3c4; WORD $0x0734; BYTE $0x0e // vpinsrb xmm6, xmm4, byte [rdi + rax], 14 - LONG $0x025de3c4; WORD $0x0fe6 // vpblendd ymm4, ymm4, ymm6, 15 - -LBB0_607: - LONG $0x1479e3c4; WORD $0x0fd9 // vpextrb ecx, xmm3, 15 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_609 - LONG $0x2059a3c4; WORD $0x1734; BYTE $0x0f // vpinsrb xmm6, xmm4, byte [rdi + r10], 15 - LONG $0x025de3c4; WORD $0x0fe6 // vpblendd ymm4, ymm4, ymm6, 15 - -LBB0_609: - LONG $0x397de3c4; WORD $0x01de // vextracti128 xmm6, ymm3, 1 - LONG $0xf07ef9c5 // vmovd eax, xmm6 - LONG $0x00248489; WORD $0x0002; BYTE $0x00 // mov dword [rsp + 512], eax - WORD $0x01a8 // test al, 1 - JE LBB0_611 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x2071e3c4; WORD $0x170c; BYTE $0x00 // vpinsrb xmm1, xmm1, byte [rdi + rdx], 0 - LONG $0x385de3c4; WORD $0x01e1 // vinserti128 ymm4, ymm4, xmm1, 1 - -LBB0_611: - LONG $0x24448b48; BYTE $0x38 // mov rax, qword [rsp + 56] - LONG $0x1479e3c4; WORD $0x01f1 // vpextrb ecx, xmm6, 1 - LONG $0xe0248c89; WORD $0x0001; BYTE $0x00 // mov dword [rsp + 480], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_612 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x2071e3c4; WORD $0x370c; BYTE $0x01 // vpinsrb xmm1, xmm1, byte [rdi + rsi], 1 - LONG $0x385de3c4; WORD $0x01e1 // vinserti128 ymm4, ymm4, xmm1, 1 - LONG $0x1479e3c4; WORD $0x02f1 // vpextrb ecx, xmm6, 2 - LONG $0xc0248c89; WORD $0x0001; BYTE $0x00 // mov dword [rsp + 448], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_1006 - -LBB0_613: - LONG $0x1479e3c4; WORD $0x03f1 // vpextrb ecx, xmm6, 3 - LONG $0xa0248c89; WORD $0x0001; BYTE $0x00 // mov dword [rsp + 416], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_614 - -LBB0_1007: - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x2071a3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb xmm1, xmm1, byte [rdi + r9], 3 - LONG $0x385de3c4; WORD $0x01e1 // vinserti128 ymm4, ymm4, xmm1, 1 - LONG $0x1479e3c4; WORD $0x04f1 // vpextrb ecx, xmm6, 4 - LONG $0x80248c89; WORD $0x0001; BYTE $0x00 // mov dword [rsp + 384], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_1008 - -LBB0_615: - LONG $0x1479e3c4; WORD $0x05f0 // vpextrb eax, xmm6, 5 - LONG $0x60248489; WORD $0x0001; BYTE $0x00 // mov dword [rsp + 352], eax - WORD $0x01a8 // test al, 1 - JE LBB0_617 - -LBB0_616: - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x2071a3c4; WORD $0x2f0c; BYTE $0x05 // vpinsrb xmm1, xmm1, byte [rdi + r13], 5 - LONG $0x385de3c4; WORD $0x01e1 // vinserti128 ymm4, ymm4, xmm1, 1 - -LBB0_617: - LONG $0x24448b48; BYTE $0x70 // mov rax, qword [rsp + 112] - QUAD $0x000000b8249c8b48 // mov rbx, qword [rsp + 184] - QUAD $0x000000b024948b48 // mov rdx, qword [rsp + 176] - LONG $0x1479e3c4; WORD $0x06f1 // vpextrb ecx, xmm6, 6 - LONG $0x40248c89; WORD $0x0001; BYTE $0x00 // mov dword [rsp + 320], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_618 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x2071e3c4; WORD $0x070c; BYTE $0x06 // vpinsrb xmm1, xmm1, byte [rdi + rax], 6 - LONG $0x385de3c4; WORD $0x01e1 // vinserti128 ymm4, ymm4, xmm1, 1 - LONG $0x1479e3c4; WORD $0x07f0 // vpextrb eax, xmm6, 7 - LONG $0x98248489; WORD $0x0000; BYTE $0x00 // mov dword [rsp + 152], eax - WORD $0x01a8 // test al, 1 - JNE LBB0_1010 - -LBB0_619: - LONG $0x1479c3c4; WORD $0x08f1 // vpextrb r9d, xmm6, 8 - LONG $0x01c1f641 // test r9b, 1 - JE LBB0_620 - -LBB0_1011: - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - QUAD $0x000000d824848b48 // mov rax, qword [rsp + 216] - LONG $0x2071e3c4; WORD $0x070c; BYTE $0x08 // vpinsrb xmm1, xmm1, byte [rdi + rax], 8 - LONG $0x385de3c4; WORD $0x01e1 // vinserti128 ymm4, ymm4, xmm1, 1 - LONG $0x1479e3c4; WORD $0x09f1 // vpextrb ecx, xmm6, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_1012 - -LBB0_621: - LONG $0x1479c3c4; WORD $0x0af3 // vpextrb r11d, xmm6, 10 - LONG $0x01c3f641 // test r11b, 1 - JE LBB0_622 - -LBB0_1013: - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - QUAD $0x000000c824848b48 // mov rax, qword [rsp + 200] - LONG $0x2071e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb xmm1, xmm1, byte [rdi + rax], 10 - LONG $0x385de3c4; WORD $0x01e1 // vinserti128 ymm4, ymm4, xmm1, 1 - LONG $0x1479e3c4; WORD $0x0bf0 // vpextrb eax, xmm6, 11 - WORD $0x01a8 // test al, 1 - JNE LBB0_1014 - -LBB0_623: - LONG $0x1479e3c4; WORD $0x0cf6 // vpextrb esi, xmm6, 12 - LONG $0x01c6f640 // test sil, 1 - JE LBB0_624 - -LBB0_1015: - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x0c // vpinsrb xmm1, xmm1, byte [rdi + rbx], 12 - LONG $0x385de3c4; WORD $0x01e1 // vinserti128 ymm4, ymm4, xmm1, 1 - LONG $0x1479c3c4; WORD $0x0df2 // vpextrb r10d, xmm6, 13 - LONG $0x01c2f641 // test r10b, 1 - JNE LBB0_1016 - -LBB0_625: - QUAD $0x000000a824948b48 // mov rdx, qword [rsp + 168] - LONG $0x1479c3c4; WORD $0x0ef5 // vpextrb r13d, xmm6, 14 - LONG $0x01c5f641 // test r13b, 1 - JE LBB0_626 - -LBB0_1017: - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x2071e3c4; WORD $0x170c; BYTE $0x0e // vpinsrb xmm1, xmm1, byte [rdi + rdx], 14 - LONG $0x385de3c4; WORD $0x01e1 // vinserti128 ymm4, ymm4, xmm1, 1 - QUAD $0x000000a024948b48 // mov rdx, qword [rsp + 160] - LONG $0x1479c3c4; WORD $0x0ff6 // vpextrb r14d, xmm6, 15 - LONG $0x01c6f641 // test r14b, 1 - JNE LBB0_627 - JMP LBB0_628 - -LBB0_612: - LONG $0x1479e3c4; WORD $0x02f1 // vpextrb ecx, xmm6, 2 - LONG $0xc0248c89; WORD $0x0001; BYTE $0x00 // mov dword [rsp + 448], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_613 - -LBB0_1006: - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x02 // vpinsrb xmm1, xmm1, byte [rdi + rbx], 2 - LONG $0x385de3c4; WORD $0x01e1 // vinserti128 ymm4, ymm4, xmm1, 1 - LONG $0x1479e3c4; WORD $0x03f1 // vpextrb ecx, xmm6, 3 - LONG $0xa0248c89; WORD $0x0001; BYTE $0x00 // mov dword [rsp + 416], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_1007 - -LBB0_614: - LONG $0x1479e3c4; WORD $0x04f1 // vpextrb ecx, xmm6, 4 - LONG $0x80248c89; WORD $0x0001; BYTE $0x00 // mov dword [rsp + 384], ecx - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_615 - -LBB0_1008: - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x2071e3c4; WORD $0x070c; BYTE $0x04 // vpinsrb xmm1, xmm1, byte [rdi + rax], 4 - LONG $0x385de3c4; WORD $0x01e1 // vinserti128 ymm4, ymm4, xmm1, 1 - LONG $0x1479e3c4; WORD $0x05f0 // vpextrb eax, xmm6, 5 - LONG $0x60248489; WORD $0x0001; BYTE $0x00 // mov dword [rsp + 352], eax - WORD $0x01a8 // test al, 1 - JNE LBB0_616 - JMP LBB0_617 - -LBB0_618: - LONG $0x1479e3c4; WORD $0x07f0 // vpextrb eax, xmm6, 7 - LONG $0x98248489; WORD $0x0000; BYTE $0x00 // mov dword [rsp + 152], eax - WORD $0x01a8 // test al, 1 - JE LBB0_619 - -LBB0_1010: - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - QUAD $0x000000f024848b48 // mov rax, qword [rsp + 240] - LONG $0x2071e3c4; WORD $0x070c; BYTE $0x07 // vpinsrb xmm1, xmm1, byte [rdi + rax], 7 - LONG $0x385de3c4; WORD $0x01e1 // vinserti128 ymm4, ymm4, xmm1, 1 - LONG $0x1479c3c4; WORD $0x08f1 // vpextrb r9d, xmm6, 8 - LONG $0x01c1f641 // test r9b, 1 - JNE LBB0_1011 - -LBB0_620: - LONG $0x1479e3c4; WORD $0x09f1 // vpextrb ecx, xmm6, 9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_621 - -LBB0_1012: - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - QUAD $0x000000d024848b48 // mov rax, qword [rsp + 208] - LONG $0x2071e3c4; WORD $0x070c; BYTE $0x09 // vpinsrb xmm1, xmm1, byte [rdi + rax], 9 - LONG $0x385de3c4; WORD $0x01e1 // vinserti128 ymm4, ymm4, xmm1, 1 - LONG $0x1479c3c4; WORD $0x0af3 // vpextrb r11d, xmm6, 10 - LONG $0x01c3f641 // test r11b, 1 - JNE LBB0_1013 - -LBB0_622: - LONG $0x1479e3c4; WORD $0x0bf0 // vpextrb eax, xmm6, 11 - WORD $0x01a8 // test al, 1 - JE LBB0_623 - -LBB0_1014: - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - QUAD $0x000000c024b48b48 // mov rsi, qword [rsp + 192] - LONG $0x2071e3c4; WORD $0x370c; BYTE $0x0b // vpinsrb xmm1, xmm1, byte [rdi + rsi], 11 - LONG $0x385de3c4; WORD $0x01e1 // vinserti128 ymm4, ymm4, xmm1, 1 - LONG $0x1479e3c4; WORD $0x0cf6 // vpextrb esi, xmm6, 12 - LONG $0x01c6f640 // test sil, 1 - JNE LBB0_1015 - -LBB0_624: - LONG $0x1479c3c4; WORD $0x0df2 // vpextrb r10d, xmm6, 13 - LONG $0x01c2f641 // test r10b, 1 - JE LBB0_625 - -LBB0_1016: - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x2071e3c4; WORD $0x170c; BYTE $0x0d // vpinsrb xmm1, xmm1, byte [rdi + rdx], 13 - LONG $0x385de3c4; WORD $0x01e1 // vinserti128 ymm4, ymm4, xmm1, 1 - QUAD $0x000000a824948b48 // mov rdx, qword [rsp + 168] - LONG $0x1479c3c4; WORD $0x0ef5 // vpextrb r13d, xmm6, 14 - LONG $0x01c5f641 // test r13b, 1 - JNE LBB0_1017 - -LBB0_626: - QUAD $0x000000a024948b48 // mov rdx, qword [rsp + 160] - LONG $0x1479c3c4; WORD $0x0ff6 // vpextrb r14d, xmm6, 15 - LONG $0x01c6f641 // test r14b, 1 - JE LBB0_628 - -LBB0_627: - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x2071e3c4; WORD $0x170c; BYTE $0x0f // vpinsrb xmm1, xmm1, byte [rdi + rdx], 15 - LONG $0x385de3c4; WORD $0x01e1 // vinserti128 ymm4, ymm4, xmm1, 1 - -LBB0_628: - LONG $0xd471f5c5; BYTE $0x07 // vpsrlw ymm1, ymm4, 7 - QUAD $0x00000080a5dbf5c5 // vpand ymm4, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */ - LONG $0x7e79c1c4; BYTE $0xdf // vmovd r15d, xmm3 - LONG $0x01c7f641 // test r15b, 1 - JE LBB0_629 - LONG $0x7ef961c4; BYTE $0xdb // vmovq rbx, xmm11 - LONG $0x1479c3c4; WORD $0x1824; BYTE $0x00 // vpextrb byte [r8 + rbx], xmm4, 0 - LONG $0x1479e3c4; WORD $0x01db // vpextrb ebx, xmm3, 1 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_1019 - -LBB0_630: - LONG $0x1479e3c4; WORD $0x02db // vpextrb ebx, xmm3, 2 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_631 - -LBB0_1020: - LONG $0x397d63c4; WORD $0x01d9 // vextracti128 xmm1, ymm11, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x1479c3c4; WORD $0x1824; BYTE $0x02 // vpextrb byte [r8 + rbx], xmm4, 2 - LONG $0x1479e3c4; WORD $0x03db // vpextrb ebx, xmm3, 3 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_1021 - -LBB0_632: - LONG $0x1479e3c4; WORD $0x04db // vpextrb ebx, xmm3, 4 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_633 - -LBB0_1022: - LONG $0x7ef961c4; BYTE $0xd3 // vmovq rbx, xmm10 - LONG $0x1479c3c4; WORD $0x1824; BYTE $0x04 // vpextrb byte [r8 + rbx], xmm4, 4 - LONG $0x1479e3c4; WORD $0x05db // vpextrb ebx, xmm3, 5 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_1023 - -LBB0_634: - LONG $0x1479e3c4; WORD $0x06db // vpextrb ebx, xmm3, 6 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_635 - -LBB0_1024: - LONG $0x397d63c4; WORD $0x01d1 // vextracti128 xmm1, ymm10, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x1479c3c4; WORD $0x1824; BYTE $0x06 // vpextrb byte [r8 + rbx], xmm4, 6 - LONG $0x1479e3c4; WORD $0x07db // vpextrb ebx, xmm3, 7 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_1025 - -LBB0_636: - LONG $0x1479e3c4; WORD $0x08db // vpextrb ebx, xmm3, 8 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_637 - -LBB0_1026: - LONG $0x7ef961c4; BYTE $0xcb // vmovq rbx, xmm9 - LONG $0x1479c3c4; WORD $0x1824; BYTE $0x08 // vpextrb byte [r8 + rbx], xmm4, 8 - LONG $0x1479e3c4; WORD $0x09db // vpextrb ebx, xmm3, 9 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_1027 - -LBB0_638: - LONG $0x1479e3c4; WORD $0x0adb // vpextrb ebx, xmm3, 10 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_639 - -LBB0_1028: - LONG $0x397d63c4; WORD $0x01c9 // vextracti128 xmm1, ymm9, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x1479c3c4; WORD $0x1824; BYTE $0x0a // vpextrb byte [r8 + rbx], xmm4, 10 - LONG $0x1479e3c4; WORD $0x0bdb // vpextrb ebx, xmm3, 11 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_1029 - -LBB0_640: - LONG $0x1479e3c4; WORD $0x0cdb // vpextrb ebx, xmm3, 12 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_641 - -LBB0_1030: - LONG $0x7ef9e1c4; BYTE $0xeb // vmovq rbx, xmm5 - LONG $0x1479c3c4; WORD $0x1824; BYTE $0x0c // vpextrb byte [r8 + rbx], xmm4, 12 - LONG $0x1479e3c4; WORD $0x0ddb // vpextrb ebx, xmm3, 13 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - QUAD $0x000380248c6f7dc5; BYTE $0x00 // vmovdqa ymm9, yword [rsp + 896] - JNE LBB0_1031 - -LBB0_642: - LONG $0x1479e3c4; WORD $0x0edb // vpextrb ebx, xmm3, 14 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_643 - -LBB0_1032: - LONG $0x397de3c4; WORD $0x01e9 // vextracti128 xmm1, ymm5, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x1479c3c4; WORD $0x1824; BYTE $0x0e // vpextrb byte [r8 + rbx], xmm4, 14 - LONG $0x1479e3c4; WORD $0x0fdb // vpextrb ebx, xmm3, 15 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_1033 - -LBB0_644: - QUAD $0x01000002002484f6 // test byte [rsp + 512], 1 - QUAD $0x000340249c6ffdc5; BYTE $0x00 // vmovdqa ymm3, yword [rsp + 832] - JE LBB0_645 - -LBB0_1034: - LONG $0x7ef961c4; BYTE $0xc3 // vmovq rbx, xmm8 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x00 // vpextrb byte [r8 + rbx], xmm1, 0 - QUAD $0x01000001e02484f6 // test byte [rsp + 480], 1 - JNE LBB0_1035 - -LBB0_646: - QUAD $0x01000001c02484f6 // test byte [rsp + 448], 1 - JE LBB0_647 - -LBB0_1036: - LONG $0x397d63c4; WORD $0x01c1 // vextracti128 xmm1, ymm8, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x02 // vpextrb byte [r8 + rbx], xmm1, 2 - QUAD $0x01000001a02484f6 // test byte [rsp + 416], 1 - JNE LBB0_1037 - -LBB0_648: - QUAD $0x01000001802484f6 // test byte [rsp + 384], 1 - JE LBB0_649 - -LBB0_1038: - LONG $0x7ef9e1c4; BYTE $0xfb // vmovq rbx, xmm7 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x04 // vpextrb byte [r8 + rbx], xmm1, 4 - QUAD $0x01000001602484f6 // test byte [rsp + 352], 1 - QUAD $0x00036024846f7dc5; BYTE $0x00 // vmovdqa ymm8, yword [rsp + 864] - JNE LBB0_1039 - -LBB0_650: - QUAD $0x01000001402484f6 // test byte [rsp + 320], 1 - JE LBB0_651 - -LBB0_1040: - LONG $0x397de3c4; WORD $0x01f9 // vextracti128 xmm1, ymm7, 1 - LONG $0x7ef9e1c4; BYTE $0xcb // vmovq rbx, xmm1 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x06 // vpextrb byte [r8 + rbx], xmm1, 6 - QUAD $0x01000000982484f6 // test byte [rsp + 152], 1 - JNE LBB0_1041 - -LBB0_652: - LONG $0x01c1f641 // test r9b, 1 - LONG $0x244c8b44; BYTE $0x10 // mov r9d, dword [rsp + 16] - JE LBB0_653 - -LBB0_1042: - LONG $0x7ef9e1c4; BYTE $0xd2 // vmovq rdx, xmm2 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x1479c3c4; WORD $0x100c; BYTE $0x08 // vpextrb byte [r8 + rdx], xmm1, 8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JNE LBB0_1043 - -LBB0_654: - LONG $0x01c3f641 // test r11b, 1 - QUAD $0x00000130249c8b4c // mov r11, qword [rsp + 304] - JE LBB0_655 - -LBB0_1044: - LONG $0x397de3c4; WORD $0x01d1 // vextracti128 xmm1, ymm2, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb byte [r8 + rcx], xmm1, 10 - WORD $0x01a8 // test al, 1 - JNE LBB0_1045 - -LBB0_656: - LONG $0x01c6f640 // test sil, 1 - JE LBB0_657 - -LBB0_1046: - LONG $0x7ef961c4; BYTE $0xf9 // vmovq rcx, xmm15 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb byte [r8 + rcx], xmm1, 12 - LONG $0x01c2f641 // test r10b, 1 - QUAD $0x00032024946ffdc5; BYTE $0x00 // vmovdqa ymm2, yword [rsp + 800] - JNE LBB0_1047 - -LBB0_658: - LONG $0x01c5f641 // test r13b, 1 - LONG $0x24548b4c; BYTE $0x30 // mov r10, qword [rsp + 48] - JE LBB0_659 - -LBB0_1048: - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x7ef9e1c4; BYTE $0xc9 // vmovq rcx, xmm1 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb byte [r8 + rcx], xmm1, 14 - LONG $0x01c6f641 // test r14b, 1 - JE LBB0_25 - JMP LBB0_1049 - -LBB0_629: - LONG $0x1479e3c4; WORD $0x01db // vpextrb ebx, xmm3, 1 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_630 - -LBB0_1019: - LONG $0x16f963c4; WORD $0x01db // vpextrq rbx, xmm11, 1 - LONG $0x1479c3c4; WORD $0x1824; BYTE $0x01 // vpextrb byte [r8 + rbx], xmm4, 1 - LONG $0x1479e3c4; WORD $0x02db // vpextrb ebx, xmm3, 2 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_1020 - -LBB0_631: - LONG $0x1479e3c4; WORD $0x03db // vpextrb ebx, xmm3, 3 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_632 - -LBB0_1021: - LONG $0x397d63c4; WORD $0x01d9 // vextracti128 xmm1, ymm11, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x1479c3c4; WORD $0x1824; BYTE $0x03 // vpextrb byte [r8 + rbx], xmm4, 3 - LONG $0x1479e3c4; WORD $0x04db // vpextrb ebx, xmm3, 4 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_1022 - -LBB0_633: - LONG $0x1479e3c4; WORD $0x05db // vpextrb ebx, xmm3, 5 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_634 - -LBB0_1023: - LONG $0x16f963c4; WORD $0x01d3 // vpextrq rbx, xmm10, 1 - LONG $0x1479c3c4; WORD $0x1824; BYTE $0x05 // vpextrb byte [r8 + rbx], xmm4, 5 - LONG $0x1479e3c4; WORD $0x06db // vpextrb ebx, xmm3, 6 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_1024 - -LBB0_635: - LONG $0x1479e3c4; WORD $0x07db // vpextrb ebx, xmm3, 7 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_636 - -LBB0_1025: - LONG $0x397d63c4; WORD $0x01d1 // vextracti128 xmm1, ymm10, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x1479c3c4; WORD $0x1824; BYTE $0x07 // vpextrb byte [r8 + rbx], xmm4, 7 - LONG $0x1479e3c4; WORD $0x08db // vpextrb ebx, xmm3, 8 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_1026 - -LBB0_637: - LONG $0x1479e3c4; WORD $0x09db // vpextrb ebx, xmm3, 9 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_638 - -LBB0_1027: - LONG $0x16f963c4; WORD $0x01cb // vpextrq rbx, xmm9, 1 - LONG $0x1479c3c4; WORD $0x1824; BYTE $0x09 // vpextrb byte [r8 + rbx], xmm4, 9 - LONG $0x1479e3c4; WORD $0x0adb // vpextrb ebx, xmm3, 10 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_1028 - -LBB0_639: - LONG $0x1479e3c4; WORD $0x0bdb // vpextrb ebx, xmm3, 11 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_640 - -LBB0_1029: - LONG $0x397d63c4; WORD $0x01c9 // vextracti128 xmm1, ymm9, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x1479c3c4; WORD $0x1824; BYTE $0x0b // vpextrb byte [r8 + rbx], xmm4, 11 - LONG $0x1479e3c4; WORD $0x0cdb // vpextrb ebx, xmm3, 12 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_1030 - -LBB0_641: - LONG $0x1479e3c4; WORD $0x0ddb // vpextrb ebx, xmm3, 13 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - QUAD $0x000380248c6f7dc5; BYTE $0x00 // vmovdqa ymm9, yword [rsp + 896] - JE LBB0_642 - -LBB0_1031: - LONG $0x16f9e3c4; WORD $0x01eb // vpextrq rbx, xmm5, 1 - LONG $0x1479c3c4; WORD $0x1824; BYTE $0x0d // vpextrb byte [r8 + rbx], xmm4, 13 - LONG $0x1479e3c4; WORD $0x0edb // vpextrb ebx, xmm3, 14 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JNE LBB0_1032 - -LBB0_643: - LONG $0x1479e3c4; WORD $0x0fdb // vpextrb ebx, xmm3, 15 - WORD $0xc3f6; BYTE $0x01 // test bl, 1 - JE LBB0_644 - -LBB0_1033: - LONG $0x397de3c4; WORD $0x01e9 // vextracti128 xmm1, ymm5, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x1479c3c4; WORD $0x1824; BYTE $0x0f // vpextrb byte [r8 + rbx], xmm4, 15 - QUAD $0x01000002002484f6 // test byte [rsp + 512], 1 - QUAD $0x000340249c6ffdc5; BYTE $0x00 // vmovdqa ymm3, yword [rsp + 832] - JNE LBB0_1034 - -LBB0_645: - QUAD $0x01000001e02484f6 // test byte [rsp + 480], 1 - JE LBB0_646 - -LBB0_1035: - LONG $0x16f963c4; WORD $0x01c3 // vpextrq rbx, xmm8, 1 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x01 // vpextrb byte [r8 + rbx], xmm1, 1 - QUAD $0x01000001c02484f6 // test byte [rsp + 448], 1 - JNE LBB0_1036 - -LBB0_647: - QUAD $0x01000001a02484f6 // test byte [rsp + 416], 1 - JE LBB0_648 - -LBB0_1037: - LONG $0x397d63c4; WORD $0x01c1 // vextracti128 xmm1, ymm8, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x03 // vpextrb byte [r8 + rbx], xmm1, 3 - QUAD $0x01000001802484f6 // test byte [rsp + 384], 1 - JNE LBB0_1038 - -LBB0_649: - QUAD $0x01000001602484f6 // test byte [rsp + 352], 1 - QUAD $0x00036024846f7dc5; BYTE $0x00 // vmovdqa ymm8, yword [rsp + 864] - JE LBB0_650 - -LBB0_1039: - LONG $0x16f9e3c4; WORD $0x01fb // vpextrq rbx, xmm7, 1 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x05 // vpextrb byte [r8 + rbx], xmm1, 5 - QUAD $0x01000001402484f6 // test byte [rsp + 320], 1 - JNE LBB0_1040 - -LBB0_651: - QUAD $0x01000000982484f6 // test byte [rsp + 152], 1 - JE LBB0_652 - -LBB0_1041: - LONG $0x397de3c4; WORD $0x01f9 // vextracti128 xmm1, ymm7, 1 - LONG $0x16f9e3c4; WORD $0x01cb // vpextrq rbx, xmm1, 1 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x1479c3c4; WORD $0x180c; BYTE $0x07 // vpextrb byte [r8 + rbx], xmm1, 7 - LONG $0x01c1f641 // test r9b, 1 - LONG $0x244c8b44; BYTE $0x10 // mov r9d, dword [rsp + 16] - JNE LBB0_1042 - -LBB0_653: - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB0_654 - -LBB0_1043: - LONG $0x16f9e3c4; WORD $0x01d1 // vpextrq rcx, xmm2, 1 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb byte [r8 + rcx], xmm1, 9 - LONG $0x01c3f641 // test r11b, 1 - QUAD $0x00000130249c8b4c // mov r11, qword [rsp + 304] - JNE LBB0_1044 - -LBB0_655: - WORD $0x01a8 // test al, 1 - JE LBB0_656 - -LBB0_1045: - LONG $0x397de3c4; WORD $0x01d1 // vextracti128 xmm1, ymm2, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb byte [r8 + rcx], xmm1, 11 - LONG $0x01c6f640 // test sil, 1 - JNE LBB0_1046 - -LBB0_657: - LONG $0x01c2f641 // test r10b, 1 - QUAD $0x00032024946ffdc5; BYTE $0x00 // vmovdqa ymm2, yword [rsp + 800] - JE LBB0_658 - -LBB0_1047: - LONG $0x16f963c4; WORD $0x01f9 // vpextrq rcx, xmm15, 1 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb byte [r8 + rcx], xmm1, 13 - LONG $0x01c5f641 // test r13b, 1 - LONG $0x24548b4c; BYTE $0x30 // mov r10, qword [rsp + 48] - JNE LBB0_1048 - -LBB0_659: - LONG $0x01c6f641 // test r14b, 1 - JE LBB0_25 - -LBB0_1049: - LONG $0x397d63c4; WORD $0x01f9 // vextracti128 xmm1, ymm15, 1 - LONG $0x16f9e3c4; WORD $0x01c9 // vpextrq rcx, xmm1, 1 - LONG $0x397de3c4; WORD $0x01e1 // vextracti128 xmm1, ymm4, 1 - LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb byte [r8 + rcx], xmm1, 15 - JMP LBB0_25 - -LBB0_1050: - WORD $0x394d; BYTE $0xd4 // cmp r12, r10 - JNE LBB0_1055 - -LBB0_1051: - MOVQ 960(SP), SP - VZEROUPPER + WORD $0x3941; BYTE $0xca // cmp r10d, ecx + JGE LBB0_4 + WORD $0x8945; BYTE $0xd1 // mov r9d, r10d + WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi] + WORD $0x0124 // and al, 1 + LONG $0x0a048842 // mov byte [rdx + r9], al + WORD $0x894c; BYTE $0xce // mov rsi, r9 + LONG $0x01ce8348 // or rsi, 1 + WORD $0xce39 // cmp esi, ecx + JGE LBB0_4 + WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi] + WORD $0xe8d0 // shr al, 1 + WORD $0x0124 // and al, 1 + WORD $0x0488; BYTE $0x32 // mov byte [rdx + rsi], al + WORD $0x894c; BYTE $0xce // mov rsi, r9 + LONG $0x02ce8348 // or rsi, 2 + WORD $0xce39 // cmp esi, ecx + JGE LBB0_4 + WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi] + WORD $0xe8c0; BYTE $0x02 // shr al, 2 + WORD $0x0124 // and al, 1 + WORD $0x0488; BYTE $0x32 // mov byte [rdx + rsi], al + WORD $0x894c; BYTE $0xce // mov rsi, r9 + LONG $0x03ce8348 // or rsi, 3 + WORD $0xce39 // cmp esi, ecx + JGE LBB0_4 + WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi] + WORD $0xe8c0; BYTE $0x03 // shr al, 3 + WORD $0x0124 // and al, 1 + WORD $0x0488; BYTE $0x32 // mov byte [rdx + rsi], al + WORD $0x894c; BYTE $0xce // mov rsi, r9 + LONG $0x04ce8348 // or rsi, 4 + WORD $0xce39 // cmp esi, ecx + JGE LBB0_4 + WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi] + WORD $0xe8c0; BYTE $0x04 // shr al, 4 + WORD $0x0124 // and al, 1 + WORD $0x0488; BYTE $0x32 // mov byte [rdx + rsi], al + WORD $0x894c; BYTE $0xce // mov rsi, r9 + LONG $0x05ce8348 // or rsi, 5 + WORD $0xce39 // cmp esi, ecx + JGE LBB0_4 + WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi] + WORD $0xe8c0; BYTE $0x05 // shr al, 5 + WORD $0x0124 // and al, 1 + WORD $0x0488; BYTE $0x32 // mov byte [rdx + rsi], al + WORD $0x894c; BYTE $0xce // mov rsi, r9 + LONG $0x06ce8348 // or rsi, 6 + WORD $0xce39 // cmp esi, ecx + JGE LBB0_4 + WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi] + WORD $0xe8c0; BYTE $0x06 // shr al, 6 + WORD $0x0124 // and al, 1 + WORD $0x0488; BYTE $0x32 // mov byte [rdx + rsi], al + LONG $0x07c98349 // or r9, 7 + WORD $0x3941; BYTE $0xc9 // cmp r9d, ecx + JGE LBB0_4 + WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi] + WORD $0xe8c0; BYTE $0x07 // shr al, 7 + LONG $0x0a048842 // mov byte [rdx + r9], al + JMP LBB0_4 + +LBB0_5: RET - -LBB0_1052: - LONG $0x244c8b44; BYTE $0x10 // mov r9d, dword [rsp + 16] - LONG $0x24548b4c; BYTE $0x30 // mov r10, qword [rsp + 48] - JMP LBB0_1055 - -LBB0_1054: - LONG $0x244c8b44; BYTE $0x10 // mov r9d, dword [rsp + 16] - JMP LBB0_1055