From b6d268f223669169694ead8bf1300a74703d5f4c Mon Sep 17 00:00:00 2001 From: Mark Beech Date: Mon, 11 May 2026 19:10:43 +0100 Subject: [PATCH] Drop unused capture group and memoize compiled regex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The outer parentheses around the alternation produced a capturing group whose backreference was never read — preg_match's $matches[0] returns the full match regardless. Switched to (?:...) so PCRE doesn't have to allocate the group. Also added a process-wide static cache for the compiled pattern string, keyed by hash of the pattern list. Repeated `new CrawlerDetect` calls (common in per-request frameworks) now skip the implode of the ~1500-entry crawler list after the first instance. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/CrawlerDetect.php | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/CrawlerDetect.php b/src/CrawlerDetect.php index 5adc62c..4121fad 100644 --- a/src/CrawlerDetect.php +++ b/src/CrawlerDetect.php @@ -73,6 +73,15 @@ class CrawlerDetect */ protected $compiledExclusions; + /** + * Cache of compiled regex strings keyed by pattern-list hash, shared + * across instances so per-request `new CrawlerDetect` calls don't + * re-implode the (~1500-entry) pattern list each time. + * + * @var array + */ + protected static $compileCache = []; + /** * Class constructor. */ @@ -92,12 +101,21 @@ public function __construct(?array $headers = null, $userAgent = null) /** * Compile the regex patterns into one regex string. * + * A non-capturing group is used because callers only need the full + * match (preg_match's $matches[0]), not a back-reference. + * * @param array * @return string */ public function compileRegex($patterns) { - return '('.implode('|', $patterns).')'; + $key = md5(serialize($patterns)); + + if (! isset(self::$compileCache[$key])) { + self::$compileCache[$key] = '(?:'.implode('|', $patterns).')'; + } + + return self::$compileCache[$key]; } /**