From 7363a134ac917c521dfbb0548394da44faec1214 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 12 Jul 2025 19:31:31 +0900 Subject: [PATCH] Improve performance of `URI::MailTo::EMAIL_REGEXP` Fix the performance regression at #172 for valid emails. ``` yml prelude: | require 'uri/mailto' n = 1000 re = URI::MailTo::EMAIL_REGEXP benchmark: n.t..t.: re.match?("n.t..t.@docomo.ne.jp") example: re.match?("example@example.info") ``` | |released| 788274b| c5974f0| this| |:--------|-------:|-------:|-------:|-------:| |n.t..t. | 3.795M| 4.864M| 4.993M| 8.739M| | | -| 1.28x| 1.32x| 2.30x| |example | 3.911M| 3.740M| 2.838M| 3.880M| | | 1.38x| 1.32x| -| 1.37x| --- lib/uri/mailto.rb | 6 +++++- test/uri/test_mailto.rb | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/lib/uri/mailto.rb b/lib/uri/mailto.rb index bd63aa0..f747b79 100644 --- a/lib/uri/mailto.rb +++ b/lib/uri/mailto.rb @@ -52,7 +52,11 @@ class MailTo < Generic HEADER_REGEXP = /\A(?(?:%\h\h|[!$'-.0-;@-Z_a-z~])*=(?:%\h\h|[!$'-.0-;@-Z_a-z~])*)(?:&\g)*\z/ # practical regexp for email address # https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address - EMAIL_REGEXP = /\A(?!\.)(?!.*\.{2})[a-zA-Z0-9.!\#$%&'*+\/=?^_`{|}~-]+(?(n) {'a@invalid' + longlabel*(n*rate) + endlabel} + assert_linear_performance(1..10, pre: pre) do |to| + re =~ to or flunk + end + endlabel = '.' + 'email'.rjust(64, 'd') + assert_linear_performance(1..10, pre: pre) do |to| + re =~ to and flunk + end + end + def test_to_s u = URI::MailTo.build([nil, 'subject=Ruby'])