From b79dda84558faf5fd1c8f42777c2e71be7229512 Mon Sep 17 00:00:00 2001 From: Artur Melanchyk <13834276+arturmelanchyk@users.noreply.github.com> Date: Mon, 3 Nov 2025 14:01:57 +0100 Subject: [PATCH 1/4] perf: unroll loop in ToLowerBytes/ToUpperBytes --- bytes.go | 52 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/bytes.go b/bytes.go index 6fe5734..6bfd3ea 100644 --- a/bytes.go +++ b/bytes.go @@ -6,16 +6,60 @@ package utils // ToLowerBytes converts ascii slice to lower-case func ToLowerBytes(b []byte) []byte { - for i := range b { - b[i] = toLowerTable[b[i]] + table := toLowerTable + n := len(b) + i := 0 + + // Unroll by 4 to balance instruction-level parallelism with cache pressure. + limit := n &^ 3 + for i < limit { + b0 := b[i+0] + b1 := b[i+1] + b2 := b[i+2] + b3 := b[i+3] + + b[i+0] = table[b0] + b[i+1] = table[b1] + b[i+2] = table[b2] + b[i+3] = table[b3] + + i += 4 } + + for i < n { + b[i] = table[b[i]] + i++ + } + return b } // ToUpperBytes converts ascii slice to upper-case func ToUpperBytes(b []byte) []byte { - for i := range b { - b[i] = toUpperTable[b[i]] + table := toUpperTable + n := len(b) + i := 0 + + // Unroll by 4 to match ToLowerBytes and maximize throughput on amd64. + limit := n &^ 3 + for i < limit { + b0 := b[i+0] + b1 := b[i+1] + b2 := b[i+2] + b3 := b[i+3] + + b[i+0] = table[b0] + b[i+1] = table[b1] + b[i+2] = table[b2] + b[i+3] = table[b3] + + i += 4 } + + for i < n { + b[i] = table[b[i]] + i++ + } + return b } From 9ea03e1bad03285fd014144ddaf325f0ccc86ed1 Mon Sep 17 00:00:00 2001 From: Artur Melanchyk <13834276+arturmelanchyk@users.noreply.github.com> Date: Mon, 3 Nov 2025 14:33:59 +0100 Subject: [PATCH 2/4] perf: unroll loop in EqualFold --- byteseq.go | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/byteseq.go b/byteseq.go index f0f4ce9..6a0b26a 100644 --- a/byteseq.go +++ b/byteseq.go @@ -9,10 +9,46 @@ func EqualFold[S byteSeq](b, s S) bool { if len(b) != len(s) { return false } - for i := len(b) - 1; i >= 0; i-- { - if toUpperTable[b[i]] != toUpperTable[s[i]] { + + table := toUpperTable + n := len(b) + i := 0 + + // Unroll by 4 to match other hot paths and drive instruction-level parallelism. + limit := n &^ 3 + for i < limit { + b0 := b[i+0] + s0 := s[i+0] + if table[b0] != table[s0] { + return false + } + + b1 := b[i+1] + s1 := s[i+1] + if table[b1] != table[s1] { + return false + } + + b2 := b[i+2] + s2 := s[i+2] + if table[b2] != table[s2] { + return false + } + + b3 := b[i+3] + s3 := s[i+3] + if table[b3] != table[s3] { + return false + } + + i += 4 + } + + for i < n { + if table[b[i]] != table[s[i]] { return false } + i++ } return true } From eb8645be0089cb68344b3a588efdfdf54a30de41 Mon Sep 17 00:00:00 2001 From: Artur Melanchyk <13834276+arturmelanchyk@users.noreply.github.com> Date: Mon, 3 Nov 2025 17:59:47 +0100 Subject: [PATCH 3/4] chore: modernize loop in IsIPv4 Signed-off-by: Artur Melanchyk <13834276+arturmelanchyk@users.noreply.github.com> --- ips.go | 2 +- ips_test.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ips.go b/ips.go index 3776956..e391218 100644 --- a/ips.go +++ b/ips.go @@ -8,7 +8,7 @@ import ( // but without check for IPv6 case and without returning net.IP slice, whereby IsIPv4 makes no allocations. func IsIPv4(s string) bool { //nolint:modernize-loop // old way is more readable - for i := 0; i < net.IPv4len; i++ { + for i := range net.IPv4len { if len(s) == 0 { return false } diff --git a/ips_test.go b/ips_test.go index e8aef2d..2808e0b 100644 --- a/ips_test.go +++ b/ips_test.go @@ -101,7 +101,7 @@ func Test_IPWhitespace(t *testing.T) { require.False(t, IsIPv6("::1 ")) } -// go test -v -run=^$ -bench=UnsafeString -benchmem -count=2 +// go test -v -run=^$ -bench=Benchmark_IsIPv4 -benchmem -count=6 func Benchmark_IsIPv4(b *testing.B) { ip := "174.23.33.100" var res bool @@ -121,7 +121,7 @@ func Benchmark_IsIPv4(b *testing.B) { }) } -// go test -v -run=^$ -bench=UnsafeString -benchmem -count=2 +// go test -v -run=^$ -bench=Benchmark_IsIPv6 -benchmem -count=6 func Benchmark_IsIPv6(b *testing.B) { ip := "9396:9549:b4f7:8ed0:4791:1330:8c06:e62d" var res bool From 9ef41fddc4cc07cf1216bf4e33566dc379e2a976 Mon Sep 17 00:00:00 2001 From: Artur Melanchyk <13834276+arturmelanchyk@users.noreply.github.com> Date: Mon, 3 Nov 2025 18:03:49 +0100 Subject: [PATCH 4/4] chore: modernize loop in IsIPv4 Signed-off-by: Artur Melanchyk <13834276+arturmelanchyk@users.noreply.github.com> --- ips.go | 1 - 1 file changed, 1 deletion(-) diff --git a/ips.go b/ips.go index e391218..38fe3be 100644 --- a/ips.go +++ b/ips.go @@ -7,7 +7,6 @@ import ( // IsIPv4 works the same way as net.ParseIP, // but without check for IPv6 case and without returning net.IP slice, whereby IsIPv4 makes no allocations. func IsIPv4(s string) bool { - //nolint:modernize-loop // old way is more readable for i := range net.IPv4len { if len(s) == 0 { return false