From 2856614bc9cf4f1adf5bbdbc8e9c63c0554db416 Mon Sep 17 00:00:00 2001 From: masklinn Date: Tue, 13 Feb 2024 20:27:16 +0100 Subject: [PATCH] Add support for lazy matchers Add lazy builtin matchers (with a separately compiled file), as well as loading json or yaml files using lazy matchers. Lazy matchers are very much a tradeoff: they improve import speed (and memory consumption until triggered), but slow down run speed, possibly dramatically: - importing the package itself takes ~36ms - importing the lazy matchers takes ~36ms (including the package, so ~0) and ~70kB RSS - importing the eager matchers takes ~97ms and ~780kB RSS - triggering the instantiation of the lazy matchers adds ~800kB RSS - running bench on the sample file using the lazy matcher has 700~800ms overhead compared to the eager matchers While the lazy matchers are less costly across the board until they're used, benching the sample file causes the loading of *every* regex -- likely due to matching failures -- has a 700~800ms overhead over eager matchers, and increases the RSS by ~800kB (on top of the original 70). Thus lazy matchers are not a great default for the basic parser. Though they might be a good opt-in if the user only ever uses one of the domains (especially if it's not the devices one as that's by far the largest). With the re2 parser however, only 156 of the 1162 regexes get evaluated, leading to a minor CPU overhead of 20~30ms (1% of bench time) and a more reasonable memory overhead. Thus use the lazy matcher fot the re2 parser. On the more net-negative but relatively minor side of things, the pregenerated lazy matchers file adds 120k to the on-disk requirements of the library, and ~25k to the wheel archive. This is also what the _regexes and _matchers precompiled files do. pyc files seem to be even bigger (~130k) so the tradeoff is dubious even if they are slightly faster. Fixes #171, fixes #173 --- setup.py | 239 +++++++++++++++++++++--------------- src/ua_parser/__init__.py | 5 +- src/ua_parser/_lazy.pyi | 10 ++ src/ua_parser/_matchers.pyi | 11 +- src/ua_parser/basic.py | 7 +- src/ua_parser/core.py | 57 +++++++-- src/ua_parser/lazy.py | 167 +++++++++++++++++++++++++ src/ua_parser/loaders.py | 72 +++++++++-- src/ua_parser/re2.py | 20 +-- tests/test_core.py | 2 + 10 files changed, 452 insertions(+), 138 deletions(-) create mode 100644 src/ua_parser/_lazy.pyi create mode 100644 src/ua_parser/lazy.py diff --git a/setup.py b/setup.py index d33bc82..5730379 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # flake8: noqa import io -from contextlib import suppress +from contextlib import suppress, contextmanager from os import fspath from pathlib import Path from typing import Optional, List, Dict @@ -52,21 +52,6 @@ def run(self) -> None: f"Unable to find regexes.yaml, should be at {yaml_src!r}" ) - def write_matcher(f, typ: str, fields: List[Optional[object]]): - f.write(f" {typ}(".encode()) - while len(fields) > 1 and fields[-1] is None: - fields = fields[:-1] - f.write(", ".join(map(repr, fields)).encode()) - f.write(b"),\n") - - def write_params(fields): - # strip trailing None values - while len(fields) > 1 and fields[-1] is None: - fields.pop() - - for field in fields: - fp.write((f" {field!r},\n").encode()) - with yaml_src.open("rb") as f: regexes = yaml.safe_load(f) @@ -79,96 +64,150 @@ def write_params(fields): outdir.mkdir(parents=True, exist_ok=True) dest = outdir / "_matchers.py" + dest_lazy = outdir / "_lazy.py" dest_legacy = outdir / "_regexes.py" - with dest.open("wb") as f, dest_legacy.open("wb") as fp: - # fmt: off - f.write(b"""\ + with dest.open("wb") as eager, dest_lazy.open("wb") as lazy, dest_legacy.open( + "wb" + ) as legacy: + eager = EagerWriter(eager) + lazy = LazyWriter(lazy) + legacy = LegacyWriter(legacy) + + for section in ["user_agent_parsers", "os_parsers", "device_parsers"]: + with eager.section(section), lazy.section(section), legacy.section( + section + ): + extract = EXTRACTORS[section] + for p in regexes[section]: + el = trim(extract(p)) + eager.item(el) + lazy.item(el) + legacy.item(el) + eager.end() + lazy.end() + legacy.end() + + +def trim(l): + while len(l) > 1 and l[-1] is None: + l.pop() + return l + + +EXTRACTORS = { + "user_agent_parsers": lambda p: [ + p["regex"], + p.get("family_replacement"), + p.get("v1_replacement"), + p.get("v2_replacement"), + ], + "os_parsers": lambda p: [ + p["regex"], + p.get("os_replacement"), + p.get("os_v1_replacement"), + p.get("os_v2_replacement"), + p.get("os_v3_replacement"), + p.get("os_v4_replacement"), + ], + "device_parsers": lambda p: [ + p["regex"], + p.get("regex_flag"), + p.get("device_replacement"), + p.get("brand_replacement"), + p.get("model_replacement"), + ], +} + + +class Writer: + section_end = b"" + + def __init__(self, fp): + self.fp = fp + self.fp.write( + b"""\ ######################################################## # NOTICE: this file is autogenerated from regexes.yaml # ######################################################## +""" + ) + self.fp.write(self.prefix) + self._section = None + + @contextmanager + def section(self, id): + self._section = id + self.fp.write(self.sections[id]) + yield + self.fp.write(self.section_end) + + def item(self, elements): + # DeviceMatcher(re, flag, repl1), + self.fp.write(self.items[self._section]) + self.fp.write(", ".join(map(repr, elements)).encode()) + self.fp.write(b"),\n") + + def end(self): + self.fp.write(self.suffix) + + +class LegacyWriter(Writer): + prefix = b"""\ +__all__ = [ + "USER_AGENT_PARSERS", + "DEVICE_PARSERS", + "OS_PARSERS", +] + +from .user_agent_parser import UserAgentParser, DeviceParser, OSParser + +""" + sections = { + "user_agent_parsers": b"USER_AGENT_PARSERS = [\n", + "os_parsers": b"\n\nOS_PARSERS = [\n", + "device_parsers": b"\n\nDEVICE_PARSERS = [\n", + } + section_end = b"]" + items = { + "user_agent_parsers": b" UserAgentParser(", + "os_parsers": b" OSParser(", + "device_parsers": b" DeviceParser(", + } + suffix = b"\n" + + +class EagerWriter(Writer): + prefix = b"""\ +__all__ = ["MATCHERS"] + +from typing import Tuple, List +from .core import UserAgentMatcher, OSMatcher, DeviceMatcher + +MATCHERS: Tuple[List[UserAgentMatcher], List[OSMatcher], List[DeviceMatcher]] = ([ +""" + sections = { + "user_agent_parsers": b"", + "os_parsers": b"], [\n", + "device_parsers": b"], [\n", + } + items = { + "user_agent_parsers": b" UserAgentMatcher(", + "os_parsers": b" OSMatcher(", + "device_parsers": b" DeviceMatcher(", + } + suffix = b"])\n" + + +class LazyWriter(EagerWriter): + prefix = b"""\ +__all__ = ["MATCHERS"] + +from typing import Tuple, List +from .lazy import UserAgentMatcher, OSMatcher, DeviceMatcher -from .core import Matchers, UserAgentMatcher, OSMatcher, DeviceMatcher - -MATCHERS: Matchers = ([ -""") - fp.write(b"# -*- coding: utf-8 -*-\n") - fp.write(b"########################################################\n") - fp.write(b"# NOTICE: This file is autogenerated from regexes.yaml #\n") - fp.write(b"########################################################\n") - fp.write(b"\n") - fp.write(b"from .user_agent_parser import (\n") - fp.write(b" UserAgentParser, DeviceParser, OSParser,\n") - fp.write(b")\n") - fp.write(b"\n") - fp.write(b"__all__ = ('USER_AGENT_PARSERS', 'DEVICE_PARSERS', 'OS_PARSERS')\n") - fp.write(b"\n") - fp.write(b"USER_AGENT_PARSERS = [\n") - for device_parser in regexes["user_agent_parsers"]: - write_matcher(f, "UserAgentMatcher", [ - device_parser["regex"], - device_parser.get("family_replacement"), - device_parser.get("v1_replacement"), - device_parser.get("v2_replacement"), - ]) - - fp.write(b" UserAgentParser(\n") - write_params([ - device_parser["regex"], - device_parser.get("family_replacement"), - device_parser.get("v1_replacement"), - device_parser.get("v2_replacement"), - ]) - fp.write(b" ),\n") - f.write(b" ], [\n") - fp.write(b"]\n\n") - - fp.write(b"OS_PARSERS = [\n") - for device_parser in regexes["os_parsers"]: - write_matcher(f, "OSMatcher", [ - device_parser["regex"], - device_parser.get("os_replacement"), - device_parser.get("os_v1_replacement"), - device_parser.get("os_v2_replacement"), - device_parser.get("os_v3_replacement"), - device_parser.get("os_v4_replacement"), - ]) - - fp.write(b" OSParser(\n") - write_params([ - device_parser["regex"], - device_parser.get("os_replacement"), - device_parser.get("os_v1_replacement"), - device_parser.get("os_v2_replacement"), - device_parser.get("os_v3_replacement"), - device_parser.get("os_v4_replacement"), - ]) - fp.write(b" ),\n") - f.write(b" ], [\n") - fp.write(b"]\n\n") - - fp.write(b"DEVICE_PARSERS = [\n") - for device_parser in regexes["device_parsers"]: - write_matcher(f, "DeviceMatcher", [ - device_parser["regex"], - device_parser.get("regex_flag"), - device_parser.get("device_replacement"), - device_parser.get("brand_replacement"), - device_parser.get("model_replacement"), - ]) - - fp.write(b" DeviceParser(\n") - write_params([ - device_parser["regex"], - device_parser.get("regex_flag"), - device_parser.get("device_replacement"), - device_parser.get("brand_replacement"), - device_parser.get("model_replacement"), - ]) - fp.write(b" ),\n") - f.write(b"])\n") - fp.write(b"]\n") - # fmt: on +MATCHERS: Tuple[List[UserAgentMatcher], List[OSMatcher], List[DeviceMatcher]] = ([ +""" setup( diff --git a/src/ua_parser/__init__.py b/src/ua_parser/__init__.py index 01b73ef..46cae5b 100644 --- a/src/ua_parser/__init__.py +++ b/src/ua_parser/__init__.py @@ -36,6 +36,7 @@ "UserAgent", "UserAgentMatcher", "load_builtins", + "load_lazy_builtins", "load_data", "load_yaml", "parse", @@ -65,7 +66,7 @@ ) from .basic import Parser as BasicParser from .caching import CachingParser, Clearing, LRU, Locking -from .loaders import load_builtins, load_data, load_yaml +from .loaders import load_builtins, load_lazy_builtins, load_data, load_yaml Re2Parser: Optional[Callable[[Matchers], Parser]] = None with contextlib.suppress(ImportError): @@ -79,7 +80,7 @@ def __getattr__(name: str) -> Parser: global parser if name == "parser": if Re2Parser is not None: - parser = Re2Parser(load_builtins()) + parser = Re2Parser(load_lazy_builtins()) else: parser = CachingParser( BasicParser(load_builtins()), diff --git a/src/ua_parser/_lazy.pyi b/src/ua_parser/_lazy.pyi new file mode 100644 index 0000000..aa67478 --- /dev/null +++ b/src/ua_parser/_lazy.pyi @@ -0,0 +1,10 @@ +__all__ = ["MATCHERS"] + +from typing import Tuple, List +from .lazy import UserAgentMatcher, OSMatcher, DeviceMatcher + +MATCHERS: Tuple[ + List[UserAgentMatcher], + List[OSMatcher], + List[DeviceMatcher], +] diff --git a/src/ua_parser/_matchers.pyi b/src/ua_parser/_matchers.pyi index a27227f..da0b023 100644 --- a/src/ua_parser/_matchers.pyi +++ b/src/ua_parser/_matchers.pyi @@ -1,3 +1,10 @@ -from .core import Matchers +__all__ = ["MATCHERS"] -MATCHERS: Matchers +from typing import Tuple, List +from .core import UserAgentMatcher, OSMatcher, DeviceMatcher + +MATCHERS: Tuple[ + List[UserAgentMatcher], + List[OSMatcher], + List[DeviceMatcher], +] diff --git a/src/ua_parser/basic.py b/src/ua_parser/basic.py index bd16746..828a6cb 100644 --- a/src/ua_parser/basic.py +++ b/src/ua_parser/basic.py @@ -7,6 +7,7 @@ Device, DeviceMatcher, Domain, + Matcher, Matchers, OS, OSMatcher, @@ -23,9 +24,9 @@ class Parser(AbstractParser): when one matches. """ - user_agent_matchers: List[UserAgentMatcher] - os_matchers: List[OSMatcher] - device_matchers: List[DeviceMatcher] + user_agent_matchers: List[Matcher[UserAgent]] + os_matchers: List[Matcher[OS]] + device_matchers: List[Matcher[Device]] def __init__( self, diff --git a/src/ua_parser/core.py b/src/ua_parser/core.py index cb9947e..15d86e9 100644 --- a/src/ua_parser/core.py +++ b/src/ua_parser/core.py @@ -1,9 +1,8 @@ import abc import re -from collections.abc import Callable, Sequence -from dataclasses import dataclass, fields +from dataclasses import dataclass from enum import Flag, auto -from typing import Literal, Optional, Tuple, List, TypeVar, Match, Pattern +from typing import Generic, Literal, Optional, Tuple, List, TypeVar, Match, Pattern __all__ = [ "DefaultedParseResult", @@ -221,7 +220,23 @@ def _replacer(repl: str, m: Match[str]) -> Optional[str]: return re.sub(r"\$(\d)", lambda n: _get(m, int(n[1])) or "", repl).strip() or None -class UserAgentMatcher: +T = TypeVar("T") + + +class Matcher(abc.ABC, Generic[T]): + @abc.abstractmethod + def __call__(self, ua: str) -> Optional[T]: ... + + @property + @abc.abstractmethod + def pattern(self) -> str: ... + + @property + def flags(self) -> int: + return 0 + + +class UserAgentMatcher(Matcher[UserAgent]): regex: Pattern[str] family: str major: Optional[str] @@ -260,6 +275,10 @@ def __call__(self, ua: str) -> Optional[UserAgent]: ) return None + @property + def pattern(self) -> str: + return self.regex.pattern + def __repr__(self) -> str: fields = [ ("family", self.family if self.family != "$1" else None), @@ -270,10 +289,10 @@ def __repr__(self) -> str: ] args = "".join(f", {k}={v!r}" for k, v in fields if v is not None) - return f"UserAgentMatcher({self.regex.pattern!r}{args})" + return f"UserAgentMatcher({self.pattern!r}{args})" -class OSMatcher: +class OSMatcher(Matcher[OS]): regex: Pattern[str] family: str major: str @@ -311,6 +330,10 @@ def __call__(self, ua: str) -> Optional[OS]: ) return None + @property + def pattern(self) -> str: + return self.regex.pattern + def __repr__(self) -> str: fields = [ ("family", self.family if self.family != "$1" else None), @@ -321,10 +344,10 @@ def __repr__(self) -> str: ] args = "".join(f", {k}={v!r}" for k, v in fields if v is not None) - return f"OSMatcher({self.regex.pattern!r}{args})" + return f"OSMatcher({self.pattern!r}{args})" -class DeviceMatcher: +class DeviceMatcher(Matcher[Device]): regex: Pattern[str] family: str brand: str @@ -355,20 +378,28 @@ def __call__(self, ua: str) -> Optional[Device]: ) return None + @property + def pattern(self) -> str: + return self.regex.pattern + + @property + def flags(self) -> int: + return self.regex.flags + def __repr__(self) -> str: fields = [ ("family", self.family if self.family != "$1" else None), ("brand", self.brand or None), ("model", self.model if self.model != "$1" else None), ] - iflag = ', "i"' if self.regex.flags & re.IGNORECASE else "" + iflag = ', "i"' if self.flags & re.IGNORECASE else "" args = iflag + "".join(f", {k}={v!r}" for k, v in fields if v is not None) - return f"DeviceMatcher({self.regex.pattern!r}{args})" + return f"DeviceMatcher({self.pattern!r}{args})" Matchers = Tuple[ - List[UserAgentMatcher], - List[OSMatcher], - List[DeviceMatcher], + List[Matcher[UserAgent]], + List[Matcher[OS]], + List[Matcher[Device]], ] diff --git a/src/ua_parser/lazy.py b/src/ua_parser/lazy.py new file mode 100644 index 0000000..fb24735 --- /dev/null +++ b/src/ua_parser/lazy.py @@ -0,0 +1,167 @@ +__all__ = ["UserAgentMatcher", "OSMatcher", "DeviceMatcher"] + +import re +from functools import cached_property +from typing import Literal, Optional, Pattern + +from .core import Matcher, UserAgent, OS, Device, _replacer, _get + + +class UserAgentMatcher(Matcher[UserAgent]): + pattern: str = "" + family: str + major: Optional[str] + minor: Optional[str] + patch: Optional[str] + patch_minor: Optional[str] + + def __init__( + self, + regex: str, + family: Optional[str] = None, + major: Optional[str] = None, + minor: Optional[str] = None, + patch: Optional[str] = None, + patch_minor: Optional[str] = None, + ) -> None: + self.pattern = regex + self.family = family or "$1" + self.major = major + self.minor = minor + self.patch = patch + self.patch_minor = patch_minor + + def __call__(self, ua: str) -> Optional[UserAgent]: + if m := self.regex.search(ua): + return UserAgent( + family=( + self.family.replace("$1", m[1]) + if "$1" in self.family + else self.family + ), + major=self.major or _get(m, 2), + minor=self.minor or _get(m, 3), + patch=self.patch or _get(m, 4), + patch_minor=self.patch_minor or _get(m, 5), + ) + return None + + @cached_property + def regex(self) -> Pattern[str]: + return re.compile(self.pattern) + + def __repr__(self) -> str: + fields = [ + ("family", self.family if self.family != "$1" else None), + ("major", self.major), + ("minor", self.minor), + ("patch", self.patch), + ("patch_minor", self.patch_minor), + ] + args = "".join(f", {k}={v!r}" for k, v in fields if v is not None) + + return f"UserAgentMatcher({self.pattern!r}{args})" + + +class OSMatcher(Matcher[OS]): + pattern: str = "" + family: str + major: str + minor: str + patch: str + patch_minor: str + + def __init__( + self, + regex: str, + family: Optional[str] = None, + major: Optional[str] = None, + minor: Optional[str] = None, + patch: Optional[str] = None, + patch_minor: Optional[str] = None, + ) -> None: + self.pattern = regex + self.family = family or "$1" + self.major = major or "$2" + self.minor = minor or "$3" + self.patch = patch or "$4" + self.patch_minor = patch_minor or "$5" + + def __call__(self, ua: str) -> Optional[OS]: + if m := self.regex.search(ua): + family = _replacer(self.family, m) + if family is None: + raise ValueError(f"Unable to find OS family in {ua}") + return OS( + family=family, + major=_replacer(self.major, m), + minor=_replacer(self.minor, m), + patch=_replacer(self.patch, m), + patch_minor=_replacer(self.patch_minor, m), + ) + return None + + @cached_property + def regex(self) -> Pattern[str]: + return re.compile(self.pattern) + + def __repr__(self) -> str: + fields = [ + ("family", self.family if self.family != "$1" else None), + ("major", self.major if self.major != "$2" else None), + ("minor", self.minor if self.minor != "$3" else None), + ("patch", self.patch if self.patch != "$4" else None), + ("patch_minor", self.patch_minor if self.patch_minor != "$5" else None), + ] + args = "".join(f", {k}={v!r}" for k, v in fields if v is not None) + + return f"OSMatcher({self.pattern!r}{args})" + + +class DeviceMatcher(Matcher[Device]): + pattern: str = "" + flags: int = 0 + family: str + brand: str + model: str + + def __init__( + self, + regex: str, + regex_flag: Optional[Literal["i"]] = None, + family: Optional[str] = None, + brand: Optional[str] = None, + model: Optional[str] = None, + ) -> None: + self.pattern = regex + self.flags = re.IGNORECASE if regex_flag == "i" else 0 + self.family = family or "$1" + self.brand = brand or "" + self.model = model or "$1" + + def __call__(self, ua: str) -> Optional[Device]: + if m := self.regex.search(ua): + family = _replacer(self.family, m) + if family is None: + raise ValueError(f"Unable to find device family in {ua}") + return Device( + family=family, + brand=_replacer(self.brand, m), + model=_replacer(self.model, m), + ) + return None + + @cached_property + def regex(self) -> Pattern[str]: + return re.compile(self.pattern, flags=self.flags) + + def __repr__(self) -> str: + fields = [ + ("family", self.family if self.family != "$1" else None), + ("brand", self.brand or None), + ("model", self.model if self.model != "$1" else None), + ] + iflag = ', "i"' if self.flags & re.IGNORECASE else "" + args = iflag + "".join(f", {k}={v!r}" for k, v in fields if v is not None) + + return f"DeviceMatcher({self.pattern!r}{args})" diff --git a/src/ua_parser/loaders.py b/src/ua_parser/loaders.py index 2e9718b..7743117 100644 --- a/src/ua_parser/loaders.py +++ b/src/ua_parser/loaders.py @@ -2,6 +2,7 @@ __all__ = [ "load_builtins", + "load_lazy_builtins", "load_data", "load_yaml", "MatchersData", @@ -14,19 +15,20 @@ import json import os from typing import ( - Any, Callable, - Dict, List, + Literal, Optional, + Protocol, Tuple, Type, - Union, TypedDict, - Literal, + Union, TYPE_CHECKING, + cast, ) +from . import lazy from .core import Matchers, UserAgentMatcher, OSMatcher, DeviceMatcher if TYPE_CHECKING: @@ -44,7 +46,15 @@ def load_builtins() -> Matchers: from ._matchers import MATCHERS - return MATCHERS + # typing and mypy don't have safe upcast (#5756) and mypy is + # unhappy about returning concrete matchers for a mixed type + return cast(Matchers, MATCHERS) + + +def load_lazy_builtins() -> Matchers: + from ._lazy import MATCHERS + + return cast(Matchers, MATCHERS) # superclass needed to mix required & optional typed dict entries @@ -77,6 +87,7 @@ class DeviceDict(_RegexDict, total=False): MatchersData = Tuple[List[UserAgentDict], List[OSDict], List[DeviceDict]] +DataLoader = Callable[[MatchersData], Matchers] def load_data(d: MatchersData) -> Matchers: @@ -116,14 +127,57 @@ def load_data(d: MatchersData) -> Matchers: ) -def load_json(f: PathOrFile) -> Matchers: +def load_lazy(d: MatchersData) -> Matchers: + return ( + [ + lazy.UserAgentMatcher( + p["regex"], + p.get("family_replacement"), + p.get("v1_replacement"), + p.get("v2_replacement"), + p.get("v3_replacement"), + p.get("v4_replacement"), + ) + for p in d[0] + ], + [ + lazy.OSMatcher( + p["regex"], + p.get("os_replacement"), + p.get("os_v1_replacement"), + p.get("os_v2_replacement"), + p.get("os_v3_replacement"), + p.get("os_v4_replacement"), + ) + for p in d[1] + ], + [ + lazy.DeviceMatcher( + p["regex"], + p.get("regex_flag"), + p.get("device_replacement"), + p.get("brand_replacement"), + p.get("model_replacement"), + ) + for p in d[2] + ], + ) + + +class FileLoader(Protocol): + def __call__( + self, path: PathOrFile, loader: DataLoader = load_data + ) -> Matchers: ... + + +def load_json(f: PathOrFile, loader: DataLoader = load_data) -> Matchers: if isinstance(f, (str, os.PathLike)): with open(f) as fp: regexes = json.load(fp) else: regexes = json.load(f) - return load_data( + return loader( ( regexes["user_agent_parsers"], regexes["os_parsers"], @@ -132,12 +186,12 @@ def load_json(f: PathOrFile) -> Matchers: ) -load_yaml: Optional[Callable[[PathOrFile], Matchers]] +load_yaml: Optional[FileLoader] if load is None: load_yaml = None else: - def load_yaml(path: PathOrFile) -> Matchers: + def load_yaml(path: PathOrFile, loader: DataLoader = load_data) -> Matchers: if isinstance(path, (str, os.PathLike)): with open(path) as fp: regexes = load(fp, Loader=SafeLoader) # type: ignore diff --git a/src/ua_parser/re2.py b/src/ua_parser/re2.py index f9a92c4..867edfa 100644 --- a/src/ua_parser/re2.py +++ b/src/ua_parser/re2.py @@ -12,6 +12,8 @@ PartialParseResult, Device, Domain, + Matcher, + Matchers, OS, UserAgent, UserAgentMatcher, @@ -22,26 +24,26 @@ class Parser(AbstractParser): ua: re2.Filter - user_agent_matchers: List[UserAgentMatcher] + user_agent_matchers: List[Matcher[UserAgent]] os: re2.Filter - os_matchers: List[OSMatcher] + os_matchers: List[Matcher[OS]] devices: re2.Filter - device_matchers: List[DeviceMatcher] + device_matchers: List[Matcher[Device]] def __init__( self, - matchers: Tuple[List[UserAgentMatcher], List[OSMatcher], List[DeviceMatcher]], + matchers: Matchers, ) -> None: self.user_agent_matchers, self.os_matchers, self.device_matchers = matchers self.ua = re2.Filter() for u in self.user_agent_matchers: - self.ua.Add(u.regex.pattern) + self.ua.Add(u.pattern) self.ua.Compile() self.os = re2.Filter() for o in self.os_matchers: - self.os.Add(o.regex.pattern) + self.os.Add(o.pattern) self.os.Compile() self.devices = re2.Filter() @@ -49,10 +51,10 @@ def __init__( # Prepend the i global flag if IGNORECASE is set. Assumes # no pattern uses global flags, but since they're not # supported in JS that seems safe. - if d.regex.flags & re.IGNORECASE: - self.devices.Add("(?i)" + d.regex.pattern) + if d.flags & re.IGNORECASE: + self.devices.Add("(?i)" + d.pattern) else: - self.devices.Add(d.regex.pattern) + self.devices.Add(d.pattern) self.devices.Compile() def __call__(self, ua: str, domains: Domain, /) -> PartialParseResult: diff --git a/tests/test_core.py b/tests/test_core.py index 6abc06a..f92c1a2 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -30,6 +30,7 @@ ParseResult, UserAgentMatcher, load_builtins, + load_lazy_builtins, caching, ) @@ -38,6 +39,7 @@ PARSERS = [ pytest.param(BasicParser(load_builtins()), id="basic"), + pytest.param(BasicParser(load_lazy_builtins()), id="lazy"), pytest.param( caching.CachingParser( BasicParser(load_builtins()),