diff --git a/bytes.go b/bytes.go index 6fe5734..6bfd3ea 100644 --- a/bytes.go +++ b/bytes.go @@ -6,16 +6,60 @@ package utils // ToLowerBytes converts ascii slice to lower-case func ToLowerBytes(b []byte) []byte { - for i := range b { - b[i] = toLowerTable[b[i]] + table := toLowerTable + n := len(b) + i := 0 + + // Unroll by 4 to balance instruction-level parallelism with cache pressure. + limit := n &^ 3 + for i < limit { + b0 := b[i+0] + b1 := b[i+1] + b2 := b[i+2] + b3 := b[i+3] + + b[i+0] = table[b0] + b[i+1] = table[b1] + b[i+2] = table[b2] + b[i+3] = table[b3] + + i += 4 } + + for i < n { + b[i] = table[b[i]] + i++ + } + return b } // ToUpperBytes converts ascii slice to upper-case func ToUpperBytes(b []byte) []byte { - for i := range b { - b[i] = toUpperTable[b[i]] + table := toUpperTable + n := len(b) + i := 0 + + // Unroll by 4 to match ToLowerBytes and maximize throughput on amd64. + limit := n &^ 3 + for i < limit { + b0 := b[i+0] + b1 := b[i+1] + b2 := b[i+2] + b3 := b[i+3] + + b[i+0] = table[b0] + b[i+1] = table[b1] + b[i+2] = table[b2] + b[i+3] = table[b3] + + i += 4 } + + for i < n { + b[i] = table[b[i]] + i++ + } + return b } diff --git a/byteseq.go b/byteseq.go index f0f4ce9..6a0b26a 100644 --- a/byteseq.go +++ b/byteseq.go @@ -9,10 +9,46 @@ func EqualFold[S byteSeq](b, s S) bool { if len(b) != len(s) { return false } - for i := len(b) - 1; i >= 0; i-- { - if toUpperTable[b[i]] != toUpperTable[s[i]] { + + table := toUpperTable + n := len(b) + i := 0 + + // Unroll by 4 to match other hot paths and drive instruction-level parallelism. + limit := n &^ 3 + for i < limit { + b0 := b[i+0] + s0 := s[i+0] + if table[b0] != table[s0] { + return false + } + + b1 := b[i+1] + s1 := s[i+1] + if table[b1] != table[s1] { + return false + } + + b2 := b[i+2] + s2 := s[i+2] + if table[b2] != table[s2] { + return false + } + + b3 := b[i+3] + s3 := s[i+3] + if table[b3] != table[s3] { + return false + } + + i += 4 + } + + for i < n { + if table[b[i]] != table[s[i]] { return false } + i++ } return true } diff --git a/ips.go b/ips.go index 3776956..38fe3be 100644 --- a/ips.go +++ b/ips.go @@ -7,8 +7,7 @@ import ( // IsIPv4 works the same way as net.ParseIP, // but without check for IPv6 case and without returning net.IP slice, whereby IsIPv4 makes no allocations. func IsIPv4(s string) bool { - //nolint:modernize-loop // old way is more readable - for i := 0; i < net.IPv4len; i++ { + for i := range net.IPv4len { if len(s) == 0 { return false } diff --git a/ips_test.go b/ips_test.go index e8aef2d..2808e0b 100644 --- a/ips_test.go +++ b/ips_test.go @@ -101,7 +101,7 @@ func Test_IPWhitespace(t *testing.T) { require.False(t, IsIPv6("::1 ")) } -// go test -v -run=^$ -bench=UnsafeString -benchmem -count=2 +// go test -v -run=^$ -bench=Benchmark_IsIPv4 -benchmem -count=6 func Benchmark_IsIPv4(b *testing.B) { ip := "174.23.33.100" var res bool @@ -121,7 +121,7 @@ func Benchmark_IsIPv4(b *testing.B) { }) } -// go test -v -run=^$ -bench=UnsafeString -benchmem -count=2 +// go test -v -run=^$ -bench=Benchmark_IsIPv6 -benchmem -count=6 func Benchmark_IsIPv6(b *testing.B) { ip := "9396:9549:b4f7:8ed0:4791:1330:8c06:e62d" var res bool