From 2e60231091272016ceee8cb3b610bb3a7b325bed Mon Sep 17 00:00:00 2001
From: Cormac Doherty <25778167+DocArmoryTech@users.noreply.github.com>
Date: Wed, 1 Apr 2026 13:36:17 +0100
Subject: [PATCH 1/8] refactor(capture): decompose capture_page into helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Split `capture_page` into two helper functions to reduce duplication and make the capture lifecycle easier to follow:

* `setup_page_capture()`: gathers per-page event wiring that was previously inline in `capture_page`. This includes download tracking, request-body storage for favicon extraction, dialog acceptance, and the headless-Chromium PDF workaround. Returns a `PageCaptureState` TypedDict so that the same state can be passed to finalization without relying on closure variables.

* `_finalize_capture()`: consolidates post-navigation teardown formerly in `capture_page`’s `finally` block. This handles multiple-download deduplication/zip, cookie and storage collection, page/context shutdown, HAR loading, SOCKS5 IP resolution, and trusted-timestamp requests.

`capture_page` now delegates to these helpers; observable behavior remains unchanged. The `PageCaptureState` TypedDict and the `Awaitable`/`Callable`/`Mapping` imports required for its annotations are added in this commit.

No feature changes are included in this refactor.
---
 playwrightcapture/capture.py | 550 ++++++++++++++++++++++-------------
 1 file changed, 346 insertions(+), 204 deletions(-)

diff --git a/playwrightcapture/capture.py b/playwrightcapture/capture.py
index 4fc191b..d767f64 100644
--- a/playwrightcapture/capture.py
+++ b/playwrightcapture/capture.py
@@ -18,7 +18,7 @@
 from logging import LoggerAdapter, Logger
 from tempfile import NamedTemporaryFile
 from typing import Any, Literal, TYPE_CHECKING
-from collections.abc import MutableMapping
+from collections.abc import Awaitable, Callable, Mapping, MutableMapping
 from urllib.parse import urlparse, unquote, urljoin, urlsplit, urlunsplit, parse_qs, unquote_plus
 from zipfile import ZipFile
 
@@ -105,6 +105,14 @@ class CaptureResponse(TypedDict, total=False):
     potential_favicons: set[bytes] | None
 
 
+class PageCaptureState(TypedDict):
+    """Per-page runtime state shared between setup and finalization."""
+
+    multiple_downloads: list[tuple[str, bytes]]
+    store_request: Callable[[Request], Awaitable[None]]
+    mark_favicons_done: Callable[[], None]
+
+
 class PlaywrightCaptureLogAdapter(LoggerAdapter):  # type: ignore[type-arg]
     """
     Prepend log entry with the UUID of the capture
@@ -153,8 +161,9 @@ def __init__(self, browser: BROWSER | None=None, device_name: str | None=None,
         :param general_timeout_in_sec: The general timeout for the capture, including children.
         :param loglevel: Python loglevel
         :param uuid: The UUID of the capture.
-        :param headless: Whether to run the browser in headless mode. WARNING: requires to run in a graphical environment.
-        :param init_script: An optional JavaScript that will be executed on each page - See https://playwright.dev/python/docs/api/class-browsercontext#browser-context-add-init-script
+        :param headless: Whether to run the browser in headless mode. Set to False only when a graphical environment is available.
+        :param init_script: An optional JavaScript executed on each page - See https://playwright.dev/python/docs/api/class-browsercontext#browser-context-add-init-script
+        :param tt_settings: Optional trusted-timestamp configuration used to timestamp capture artifacts.
         """
         master_logger = logging.getLogger('playwrightcapture')
         master_logger.setLevel(loglevel)
@@ -226,11 +235,13 @@ def __prepare_proxy_aiohttp(self, proxy: ProxySettings) -> str:
         return proxy['server']
 
     async def __aenter__(self) -> Capture:
-        '''Launch the browser'''
-        # Ignore the fonts by the time we take the screenshot
+        """Launch Playwright and the configured browser for this capture."""
+
+        # Do not wait for webfonts before taking screenshots.
         # 2026-02-02: the environment is copied into the process when initialized, so we need to set it globally here,
         # and not in the method where we take the screenshot
         os.environ['PW_TEST_SCREENSHOT_NO_FONTS_READY'] = '1'
+
         self.playwright = await async_playwright().start()
 
         if self.device_name:
@@ -263,6 +274,7 @@ async def __aenter__(self) -> Capture:
         return self
 
     async def __aexit__(self, exc_type: Any, exc: Any, tb: Any) -> bool:
+        """Close browser resources and suppress exceptions like the upstream context manager."""
 
         try:
             await self.browser.close(reason="Closing browser at the end of the capture.")
@@ -283,6 +295,115 @@ async def __aexit__(self, exc_type: Any, exc: Any, tb: Any) -> bool:
 
         return True
 
+    async def setup_page_capture(self, page: Page, *, allow_tracking: bool=False) -> PageCaptureState:
+        """Prepare a page for a single-page capture without changing capture semantics.
+
+        This method preserves the existing per-page setup used by capture_page:
+        download tracking, request body storage for image responses, dialog
+        acceptance, and the PDF download workaround in headless Chromium.
+        Interactive sessions reuse it so the operator-driven session can still
+        finalize like a normal single-page capture later on.
+        """
+        got_favicons = False
+
+        # We don't need to be super strict on the lock, as it simply triggers a wait for network idle before stoping the capture
+        # but we still need it to be an integer in case we have more than one download triggered and one finished when the others haven't
+        self.wait_for_download = 0
+
+        # We may have multiple download triggered via JS
+        multiple_downloads: list[tuple[str, bytes]] = []
+
+        async def handle_download(download: Download) -> None:
+            # This method is called when a download event is triggered from JS in a page that also renders
+            try:
+                self.wait_for_download += 1
+                with NamedTemporaryFile() as tmp_f:
+                    self.logger.info('Got a download triggered from JS.')
+                    await download.save_as(tmp_f.name)
+                    filename = download.suggested_filename
+                    with open(tmp_f.name, "rb") as f:
+                        file_content = f.read()
+                    multiple_downloads.append((filename, file_content))
+                    self.logger.info('Done with download.')
+            except Exception as e:
+                if download.page.is_closed():
+                    # Page is closed, skip logging.
+                    pass
+                else:
+                    self.logger.warning(f'Unable to finish download triggered from JS: {e}')
+            finally:
+                self.wait_for_download -= 1
+
+        async def store_request(request: Request) -> None:
+            # This method is called on each request, to store the body (if it is an image) in a dict indexed by URL
+            if got_favicons or request.resource_type != 'image':
+                return
+            try:
+                if response := await request.response():
+                    if got_favicons:
+                        return
+                    if request.resource_type == 'image' and response.ok:
+                        try:
+                            if body := await response.body():
+                                m = self.magicdb.best_magic_buffer(body)
+                                if m.mime_type.startswith('image'):
+                                    self._requests[request.url] = body
+                        except Exception:
+                            pass
+            except Exception as e:
+                self.logger.info(f'Unable to store request: {e}')
+
+        def mark_favicons_done() -> None:
+            nonlocal got_favicons
+            got_favicons = True
+
+        if self.browser_name == 'chromium' and self.headless:
+            async def _override_content_disposition_handler(route: Route, request: Request) -> None:
+                """Special case to handle PDF rendered in the browser directly"""
+                try:
+                    response = await route.fetch()  # performs the request
+                    overridden_headers = {
+                        **response.headers,
+                        "content-disposition": 'attachment'
+                    }
+                    self.logger.info('Got a PDF in headless chromium, force download')
+                    await route.fulfill(response=response, headers=overridden_headers)
+                except Error as e:
+                    self.logger.info(f'Unable to force download: {e}')
+                    await route.continue_()
+
+            # overwrite in chromium in headless mode, to trigger a download
+            # otherwise it is rendered in the PDF viewer.
+            try:
+                await page.route("**/*.pdf", handler=_override_content_disposition_handler)
+            except Error as e:
+                self.logger.warning(f'Failed at fetching PDF in headless chromium: {e}')
+
+        if allow_tracking:
+            # Add authorization clickthroughs
+            await self.__dialog_didomi_clickthrough(page)
+            await self.__dialog_onetrust_clickthrough(page)
+            await self.__dialog_hubspot_clickthrough(page)
+            await self.__dialog_cookiebot_clickthrough(page)
+            await self.__dialog_complianz_clickthrough(page)
+            await self.__dialog_yahoo_clickthrough(page)
+            await self.__dialog_ppms_clickthrough(page)
+            await self.__dialog_alert_dialog_clickthrough(page)
+            await self.__dialog_clickthrough(page)
+            await self.__dialog_tarteaucitron_clickthrough(page)
+
+        page.set_default_timeout((self._capture_timeout - 2) * 1000)
+        # trigger a callback on each request to store it in a dict indexed by URL to get it back from the favicon fetcher
+        page.on("requestfinished", store_request)
+        page.on("dialog", lambda dialog: dialog.accept())
+        page.on("download", handle_download)
+
+        return {
+            'multiple_downloads': multiple_downloads,
+            'store_request': store_request,
+            'mark_favicons_done': mark_favicons_done,
+        }
+
     @property
     def locale(self) -> str:
         return self._locale
@@ -344,26 +465,74 @@ def geolocation(self, geolocation: dict[str, str | int | float] | None) -> None:
     def cookies(self) -> list[Cookie]:
         return self._cookies
 
+    def _coerce_cookie_mapping(self, cookie: object) -> Mapping[str, Any] | None:
+        """Normalize supported cookie payload shapes to a mapping.
+
+        Accepts plain mappings, Pydantic-style models, and simple objects with
+        cookie attributes so older callers can keep passing their existing
+        cookie objects.
+        """
+        if isinstance(cookie, Mapping):
+            return cookie
+
+        model_dump = getattr(cookie, 'model_dump', None)
+        if callable(model_dump):
+            try:
+                dumped_cookie = model_dump(exclude_none=True)
+            except TypeError:
+                dumped_cookie = model_dump()
+            if isinstance(dumped_cookie, Mapping):
+                return dumped_cookie
+
+        dict_method = getattr(cookie, 'dict', None)
+        if callable(dict_method):
+            try:
+                dumped_cookie = dict_method(exclude_none=True)
+            except TypeError:
+                dumped_cookie = dict_method()
+            if isinstance(dumped_cookie, Mapping):
+                return dumped_cookie
+
+        cookie_name = getattr(cookie, 'name', None)
+        cookie_value = getattr(cookie, 'value', None)
+        if cookie_name is None or cookie_value is None:
+            return None
+
+        normalized_cookie: dict[str, Any] = {
+            'name': cookie_name,
+            'value': cookie_value,
+        }
+        for optional_key in ('url', 'domain', 'path', 'expires', 'httpOnly', 'secure', 'sameSite', 'partitionKey'):
+            optional_value = getattr(cookie, optional_key, None)
+            if optional_value is not None:
+                normalized_cookie[optional_key] = optional_value
+        return normalized_cookie
+
     @cookies.setter
-    def cookies(self, cookies: list[Cookie | dict[str, Any]] | None) -> None:
+    def cookies(self, cookies: list[Cookie | dict[str, Any] | object] | None) -> None:
         '''Cookies to send along to the initial request.
+        Accepts Playwright cookie dictionaries as well as model/object wrappers
+        exposing equivalent fields.
+
         :param cookies: The cookies, in this format: https://playwright.dev/python/docs/api/class-browsercontext#browser-context-add-cookies
         '''
         if not cookies:
             return
-        for cookie in cookies:
-            if not cookie:
+        for raw_cookie in cookies:
+            if not raw_cookie:
+                continue
+            if isinstance(raw_cookie, Cookie):
+                self._cookies.append(raw_cookie)
                 continue
-            if isinstance(cookie, Cookie):
-                self._cookies.append(cookie)
-            elif isinstance(cookie, dict):
-                try:
-                    self._cookies.append(Cookie.model_validate(cookie))
-                except Exception as e:
-                    self.logger.warning(f'Invalid cookie: {e}')
-            else:
-                # None, ignore
-                pass
+
+            cookie = self._coerce_cookie_mapping(raw_cookie)
+            if cookie is None:
+                self.logger.warning(f'Ignoring unsupported cookie payload: {raw_cookie!r}')
+                continue
+            try:
+                self._cookies.append(Cookie.model_validate(cookie))
+            except Exception as e:
+                self.logger.warning(f'Invalid cookie: {e}')
 
     @property
     def storage(self) -> StorageState:
@@ -967,6 +1136,144 @@ async def __instrumentation(self, page: Page, url: str, allow_tracking: bool, fi
         await self._safe_wait(page)
         self.logger.debug('Done with waiting.')
 
+    async def _finalize_capture(
+        self,
+        *,
+        page: Page,
+        store_request: Callable[[Request], Awaitable[None]] | None,
+        multiple_downloads: list[tuple[str, bytes]] | None,
+        to_return: CaptureResponse,
+        errors: list[str],
+        with_trusted_timestamps: bool,
+    ) -> None:
+        """Common finalization logic for captures (downloads, cookies, storage, HAR, socks5, timestamps).
+
+        This helper centralizes the tail of a capture, which previously lived at the end
+        of capture_page. It is now also used by capture_current_page, consuming the
+        state returned by setup_page_capture when available, to avoid code duplication
+        while keeping single-page finalization behavior aligned.
+        """
+
+        self.logger.debug('Finishing up capture (helper).')
+
+        # We may have multiple downloads triggered via JS; if so, deduplicate them and,
+        # when there is more than one, bundle them into a zip stored in-memory.
+        # This mirrors the behavior previously implemented at the end of capture_page.
+        if multiple_downloads is not None:
+            if multiple_dls := set(multiple_downloads):
+                if len(multiple_dls) == 1:
+                    dl = multiple_dls.pop()
+                    to_return["downloaded_filename"] = dl[0]
+                    to_return["downloaded_file"] = dl[1]
+                else:
+                    mem_zip = BytesIO()
+                    to_return["downloaded_filename"] = f'{self.uuid}_multiple_downloads.zip'
+                    with ZipFile(mem_zip, 'w') as z:
+                        for i, f_details in enumerate(multiple_dls):
+                            filename, file_content = f_details
+                            z.writestr(f'{i}_{filename}', file_content)
+                    to_return["downloaded_file"] = mem_zip.getvalue()
+
+        # Collect cookies from the context (may time out or fail depending on page state).
+        try:
+            async with timeout(15):
+                # NOTE: Ignore type until we can use python 3.12 + only
+                # playwrightcapture.capture.SetCookieParam == playwright._impl._api_structures.SetCookieParam
+                to_return['cookies'] = await self.context.cookies()  # type: ignore[typeddict-item]
+        except (TimeoutError, asyncio.TimeoutError):
+            self.logger.warning("Unable to get cookies (timeout).")
+            errors.append("Unable to get the cookies (timeout).")
+            self.should_retry = True
+        except Error as e:
+            self.logger.warning(f"Unable to get cookies: {e}")
+            errors.append(f'Unable to get the cookies: {e}')
+            self.should_retry = True
+
+        # Collect storage state, including IndexedDB, to capture the full browser state.
+        try:
+            async with timeout(15):
+                to_return['storage'] = await self.context.storage_state(indexed_db=True)
+        except (TimeoutError, asyncio.TimeoutError):
+            self.logger.warning("Unable to get storage (timeout).")
+            errors.append("Unable to get the storage (timeout).")
+            self.should_retry = True
+        except Error as e:
+            self.logger.warning(f"Unable to get the storage: {e}")
+            errors.append(f'Unable to get the storage: {e}')
+            self.should_retry = True
+
+        try:
+            if not page.is_closed():
+                # Remove request listener if we set one; best-effort only as it is
+                # primarily used for favicon extraction and should not break captures.
+                if store_request is not None:
+                    try:
+                        page.remove_listener("requestfinished", store_request)
+                    except Exception:
+                        # Best-effort only
+                        pass
+
+                try:
+                    # Give in-flight operations a short grace period, then switch the
+                    # context offline to stop further network activity before closing.
+                    await asyncio.sleep(1)
+                    async with timeout(3):
+                        await self.context.set_offline(True)
+                        self.logger.debug('Page offline.')
+                except (TimeoutError, asyncio.TimeoutError):
+                    self.logger.debug("Unable switch offline.")
+
+                try:
+                    # Finally close the page itself; failures here are non-fatal but
+                    # are logged to help debug flaky environments.
+                    async with timeout(5):
+                        await page.close(reason="Closing the page because the capture finished.")
+                        self.logger.debug('Page closed.')
+                except (TimeoutError, asyncio.TimeoutError):
+                    self.logger.warning("Unable close page.")
+
+            # Close the context to flush the HAR file to disk, then load it.
+            async with timeout(30):
+                await self.context.close(reason="Closing the context because the capture finished.")  # context needs to be closed to generate the HAR
+                self.logger.debug('Context closed.')
+                with open(self._temp_harfile.name, 'rb') as _har:
+                    to_return['har'] = orjson.loads(_har.read())
+                self.logger.debug('Got HAR.')
+
+            # When using a socks5 proxy, post-process the HAR to resolve IPs via
+            # the proxy so the stored HAR contains addresses consistent with what
+            # the proxy saw.
+            if (to_return.get('har') and self.proxy and self.proxy.get('server')
+                    and self.proxy['server'].startswith('socks5')):
+                if har := to_return['har']:  # Could be None
+                    try:
+                        async with timeout(120):
+                            await self.socks5_resolver(har)
+                    except (TimeoutError, asyncio.TimeoutError):
+                        self.logger.warning("Unable to resolve all the IPs via the socks5 proxy.")
+                        errors.append("Unable to resolve all the IPs via the socks5 proxy.")
+                        self.should_retry = True
+
+        except (TimeoutError, asyncio.TimeoutError):
+            # If closing the context or generating the HAR takes too long, the
+            # capture is considered incomplete but we still return what we have.
+            self.logger.warning("Unable to close context at the end of the capture.")
+            errors.append("Unable to close context at the end of the capture.")
+            self.should_retry = True
+        except Exception as e:
+            # Any other unexpected failure while finalizing the capture is logged
+            # and surfaced as a generic HAR-generation error.
+            self.logger.warning(f"Other exception while finishing up the capture: {e}.")
+            errors.append(f'Unable to generate HAR file: {e}')
+
+        if errors:
+            to_return['error'] = '\n'.join(errors)
+        if with_trusted_timestamps:
+            try:
+                await self._get_trusted_timestamps(to_return)
+            except Exception as e:
+                self.logger.warning(f'Unable to get trusted timestamps: {e}')
+
     async def capture_page(self, url: str, *, max_depth_capture_time: int,
                            referer: str | None=None,
                            page: Page | None=None, depth: int=0,
@@ -977,57 +1284,17 @@ async def capture_page(self, url: str, *, max_depth_capture_time: int,
                            with_trusted_timestamps: bool=False,
                            final_wait: int=5
                            ) -> CaptureResponse:
+        """Capture a URL and optionally recurse into child links.
+
+        When `page` is not provided, this method creates and prepares a new page,
+        performs the navigation, and finalizes the capture before returning.
+        Recursive child captures reuse the existing page and therefore skip the
+        outer setup/finalization path.
+        """
 
         to_return: CaptureResponse = {}
         errors: list[str] = []
-        got_favicons = False
-
-        # We don't need to be super strict on the lock, as it simply triggers a wait for network idle before stoping the capture
-        # but we still need it to be an integer in case we have more than one download triggered and one finished when the others haven't
-        self.wait_for_download = 0
-
-        # We may have multiple download triggered via JS
-        multiple_downloads: list[tuple[str, bytes]] = []
-
-        async def handle_download(download: Download) -> None:
-            # This method is called when a download event is triggered from JS in a page that also renders
-            try:
-                self.wait_for_download += 1
-                with NamedTemporaryFile() as tmp_f:
-                    self.logger.info('Got a download triggered from JS.')
-                    await download.save_as(tmp_f.name)
-                    filename = download.suggested_filename
-                    with open(tmp_f.name, "rb") as f:
-                        file_content = f.read()
-                    multiple_downloads.append((filename, file_content))
-                    self.logger.info('Done with download.')
-            except Exception as e:
-                if download.page.is_closed():
-                    # Page is closed, skip logging.
-                    pass
-                else:
-                    self.logger.warning(f'Unable to finish download triggered from JS: {e}')
-            finally:
-                self.wait_for_download -= 1
-
-        async def store_request(request: Request) -> None:
-            # This method is called on each request, to store the body (if it is an image) in a dict indexed by URL
-            if got_favicons or request.resource_type != 'image':
-                return
-            try:
-                if response := await request.response():
-                    if got_favicons:
-                        return
-                    if request.resource_type == 'image' and response.ok:
-                        try:
-                            if body := await response.body():
-                                m = self.magicdb.best_magic_buffer(body)
-                                if m.mime_type.startswith('image'):
-                                    self._requests[request.url] = body
-                        except Exception:
-                            pass
-            except Exception as e:
-                self.logger.info(f'Unable to store request: {e}')
+        page_capture_state: PageCaptureState | None = None
 
         if page is not None:
             capturing_sub = True
@@ -1035,58 +1302,16 @@ async def store_request(request: Request) -> None:
             capturing_sub = False
             try:
                 page = await self.context.new_page()
-
-                if self.browser_name == 'chromium' and self.headless:
-                    async def _override_content_disposition_handler(route: Route, request: Request) -> None:
-                        """Special case to handle PDF rendered in the browser directly"""
-                        try:
-                            response = await route.fetch()  # performs the request
-                            overridden_headers = {
-                                **response.headers,
-                                "content-disposition": 'attachment'
-                            }
-                            self.logger.info('Got a PDF in headless chromium, force download')
-                            await route.fulfill(response=response, headers=overridden_headers)
-                        except Error as e:
-                            self.logger.info(f'Unable to force download: {e}')
-                            await route.continue_()
-
-                    # overwrite in chromium in headless mode, to trigger a download
-                    # otherwise it is rendered in the PDF viewer.
-                    try:
-                        await page.route("**/*.pdf", handler=_override_content_disposition_handler)
-                    except Error as e:
-                        self.logger.warning(f'Failed at fetching PDF in headless chromium: {e}')
-
-                # client = await page.context.new_cdp_session(page)
-                # await client.detach()
             except Error as e:
                 self.logger.warning(f'Unable to create new page, the context is in a broken state: {e}')
                 self.should_retry = True
                 to_return['error'] = f'Unable to create new page: {e}'
                 return to_return
 
-            if allow_tracking:
-                # Add authorization clickthroughs
-                await self.__dialog_didomi_clickthrough(page)
-                await self.__dialog_onetrust_clickthrough(page)
-                await self.__dialog_hubspot_clickthrough(page)
-                await self.__dialog_cookiebot_clickthrough(page)
-                await self.__dialog_complianz_clickthrough(page)
-                await self.__dialog_yahoo_clickthrough(page)
-                await self.__dialog_ppms_clickthrough(page)
-                await self.__dialog_alert_dialog_clickthrough(page)
-                await self.__dialog_clickthrough(page)
-                await self.__dialog_tarteaucitron_clickthrough(page)
-
-            page.set_default_timeout((self._capture_timeout - 2) * 1000)
-            # trigger a callback on each request to store it in a dict indexed by URL to get it back from the favicon fetcher
-            page.on("requestfinished", store_request)
-            page.on("dialog", lambda dialog: dialog.accept())
+            page_capture_state = await self.setup_page_capture(page, allow_tracking=allow_tracking)
 
         try:
             try:
-                page.on("download", handle_download)
                 await page.goto(url, wait_until='domcontentloaded', referer=referer if referer else '')
             except Error as initial_error:
                 self._update_exceptions(initial_error)
@@ -1106,7 +1331,8 @@ async def _override_content_disposition_handler(route: Route, request: Request)
                                 filename = download.suggested_filename
                                 with open(tmp_f.name, "rb") as f:
                                     file_content = f.read()
-                                multiple_downloads.append((filename, file_content))
+                                if page_capture_state is not None:
+                                    page_capture_state['multiple_downloads'].append((filename, file_content))
                     except PlaywrightTimeoutError:
                         self.logger.debug('No download has been triggered.')
                         raise initial_error
@@ -1171,7 +1397,8 @@ async def _override_content_disposition_handler(route: Route, request: Request)
                     # TODO: check that?
                     try:
                         to_return['potential_favicons'] = await self.get_favicons(page.url, to_return['html'])
-                        got_favicons = True
+                        if page_capture_state is not None:
+                            page_capture_state['mark_favicons_done']()
                     except (TimeoutError, asyncio.TimeoutError) as e:
                         self.logger.warning(f'[Timeout] Unable to get favicons: {e}')
                     except Exception as e:
@@ -1286,101 +1513,16 @@ async def _override_content_disposition_handler(route: Route, request: Request)
             else:
                 raise e
         finally:
-            self.logger.debug('Finishing up capture.')
             if not capturing_sub:
-                # Deduplicate list
-                if multiple_dls := set(multiple_downloads):
-                    if len(multiple_dls) == 1:
-                        dl = multiple_dls.pop()
-                        to_return["downloaded_filename"] = dl[0]
-                        to_return["downloaded_file"] = dl[1]
-                    else:
-                        # we have multiple downloads, making it a zip, make sure the filename is unique
-                        mem_zip = BytesIO()
-                        to_return["downloaded_filename"] = f'{self.uuid}_multiple_downloads.zip'
-                        with ZipFile(mem_zip, 'w') as z:
-                            for i, f_details in enumerate(multiple_dls):
-                                filename, file_content = f_details
-                                z.writestr(f'{i}_{filename}', file_content)
-                        to_return["downloaded_file"] = mem_zip.getvalue()
-
-                try:
-                    async with timeout(15):
-                        # NOTE: Ignore type until we can use python 3.12 + only
-                        # playwrightcapture.capture.SetCookieParam == playwright._impl._api_structures.SetCookieParam
-                        to_return['cookies'] = await self.context.cookies()  # type: ignore[typeddict-item]
-                except (TimeoutError, asyncio.TimeoutError):
-                    self.logger.warning("Unable to get cookies (timeout).")
-                    errors.append("Unable to get the cookies (timeout).")
-                    self.should_retry = True
-                except Error as e:
-                    self.logger.warning(f"Unable to get cookies: {e}")
-                    errors.append(f'Unable to get the cookies: {e}')
-                    self.should_retry = True
-
-                try:
-                    async with timeout(15):
-                        to_return['storage'] = await self.context.storage_state(indexed_db=True)
-                except (TimeoutError, asyncio.TimeoutError):
-                    self.logger.warning("Unable to get storage (timeout).")
-                    errors.append("Unable to get the storage (timeout).")
-                    self.should_retry = True
-                except Error as e:
-                    self.logger.warning(f"Unable to get the storage: {e}")
-                    errors.append(f'Unable to get the storage: {e}')
-                    self.should_retry = True
-                try:
-                    if not page.is_closed():
-                        try:
-                            page.remove_listener("requestfinished", store_request)
-                            await asyncio.sleep(1)
-                            async with timeout(3):
-                                await self.context.set_offline(True)
-                                self.logger.debug('Page offline.')
-                        except (TimeoutError, asyncio.TimeoutError):
-                            self.logger.debug("Unable switch offline.")
-
-                        try:
-                            async with timeout(5):
-                                await page.close(reason="Closing the page because the capture finished.")
-                                self.logger.debug('Page closed.')
-                        except (TimeoutError, asyncio.TimeoutError):
-                            self.logger.warning("Unable close page.")
-
-                    async with timeout(30):
-                        await self.context.close(reason="Closing the context because the capture finished.")  # context needs to be closed to generate the HAR
-                        self.logger.debug('Context closed.')
-                        with open(self._temp_harfile.name, 'rb') as _har:
-                            to_return['har'] = orjson.loads(_har.read())
-                        self.logger.debug('Got HAR.')
-
-                    if (to_return.get('har') and self.proxy and self.proxy.get('server')
-                            and self.proxy['server'].startswith('socks5')):
-                        # Only if the capture was not done via a socks5 proxy
-                        if har := to_return['har']:  # Could be None
-                            try:
-                                async with timeout(120):
-                                    await self.socks5_resolver(har)
-                            except (TimeoutError, asyncio.TimeoutError):
-                                self.logger.warning("Unable to resolve all the IPs via the socks5 proxy.")
-                                errors.append("Unable to resolve all the IPs via the socks5 proxy.")
-                                self.should_retry = True
-
-                except (TimeoutError, asyncio.TimeoutError):
-                    self.logger.warning("Unable to close context at the end of the capture.")
-                    errors.append("Unable to close context at the end of the capture.")
-                    self.should_retry = True
-                except Exception as e:
-                    self.logger.warning(f"Other exception while finishing up the capture: {e}.")
-                    errors.append(f'Unable to generate HAR file: {e}')
+                await self._finalize_capture(
+                    page=page,
+                    store_request=page_capture_state['store_request'] if page_capture_state is not None else None,
+                    multiple_downloads=page_capture_state['multiple_downloads'] if page_capture_state is not None else None,
+                    to_return=to_return,
+                    errors=errors,
+                    with_trusted_timestamps=with_trusted_timestamps,
+                )
         self.logger.debug('Capture done')
-        if errors:
-            to_return['error'] = '\n'.join(errors)
-        if with_trusted_timestamps:
-            try:
-                await self._get_trusted_timestamps(to_return)
-            except Exception as e:
-                self.logger.warning(f'Unable to get trusted timestamps: {e}')
         return to_return
 
     async def _get_trusted_timestamps(self, capture_response: CaptureResponse) -> None:

From 8dfe0eef368799e677967d603a1f7266cd78ea59 Mon Sep 17 00:00:00 2001
From: Cormac Doherty <25778167+DocArmoryTech@users.noreply.github.com>
Date: Wed, 1 Apr 2026 14:09:29 +0100
Subject: [PATCH 2/8] feat(capture): add display parameter and
 capture_current_page method

`Capture.__init__` now accepts an optional `display` parameter (default `None`).
When provided, `__aenter__` constructs a per-launch environment dictionary that overrides `DISPLAY`, allowing each concurrent interactive session to target its own X11 server without modifying the process-global `os.environ`.

A new `capture_current_page()` method captures the current page state without navigating or recursing into child URLs. It reuses `setup_page_capture` (called by the caller before navigation) and the existing `_finalize_capture` workflow for cookies, storage, HAR, and trusted timestamps. This method serves as the final-capture step when an interactive session signals it is ready.
---
 playwrightcapture/capture.py | 126 ++++++++++++++++++++++++++++++++++-
 1 file changed, 124 insertions(+), 2 deletions(-)

diff --git a/playwrightcapture/capture.py b/playwrightcapture/capture.py
index d767f64..7b7f973 100644
--- a/playwrightcapture/capture.py
+++ b/playwrightcapture/capture.py
@@ -151,7 +151,8 @@ def __init__(self, browser: BROWSER | None=None, device_name: str | None=None,
                  socks5_dns_resolver: str | list[str] | None=None,
                  general_timeout_in_sec: int | None=None, loglevel: str | int='INFO',
                  uuid: str | None=None, headless: bool=True,
-                 *, init_script: str | None=None, tt_settings: TrustedTimestampSettings | None=None):
+                 *, init_script: str | None=None, tt_settings: TrustedTimestampSettings | None=None,
+                 display: str | None=None):
         """Captures a page with Playwright.
 
         :param browser: The browser to use for the capture.
@@ -164,6 +165,7 @@ def __init__(self, browser: BROWSER | None=None, device_name: str | None=None,
         :param headless: Whether to run the browser in headless mode. Set to False only when a graphical environment is available.
         :param init_script: An optional JavaScript executed on each page - See https://playwright.dev/python/docs/api/class-browsercontext#browser-context-add-init-script
         :param tt_settings: Optional trusted-timestamp configuration used to timestamp capture artifacts.
+        :param display: Optional X11 display passed to the browser subprocess. Used by interactive headed captures to isolate concurrent sessions.
         """
         master_logger = logging.getLogger('playwrightcapture')
         master_logger.setLevel(loglevel)
@@ -217,6 +219,9 @@ def __init__(self, browser: BROWSER | None=None, device_name: str | None=None,
         self._init_script = init_script
 
         self.tt_settings = tt_settings
+        # X11 display to use for the browser subprocess.  Passed via env so each
+        # concurrent capture gets its own display without mutating os.environ.
+        self._display = display
 
         # Initialize the magic DB
         self.magicdb = MagicDb()
@@ -258,11 +263,20 @@ async def __aenter__(self) -> Capture:
                     '--unsafely-treat-insecure-origin-as-secure',  # Allows to run crypto API on .onion URLs (See https://github.com/Lookyloo/PlaywrightCapture/issues/65)
                     ]
 
+        # Build a per-launch environment so concurrent captures each target
+        # their own X11 display without mutating the process-global DISPLAY.
+        launch_env: dict[str, str] | None = None
+        if self._display:
+            launch_env = {**os.environ, 'DISPLAY': self._display}
+            self.logger.info(f'Launching browser on DISPLAY {self._display}')
+        else:
+            self.logger.info(f'Launching browser on default DISPLAY {os.environ.get("DISPLAY", "<unset>")}')
         self.browser = await self.playwright[self.browser_name].launch(
             proxy=self.proxy if self.proxy else None,
             channel="chromium" if self.browser_name == "chromium" else None,
             args=args,
-            headless=self.headless
+            headless=self.headless,
+            env=launch_env,
         )
 
         # Set of URLs that were captured in that context
@@ -1525,6 +1539,114 @@ async def capture_page(self, url: str, *, max_depth_capture_time: int,
         self.logger.debug('Capture done')
         return to_return
 
+    async def capture_current_page(
+        self,
+        page: Page,
+        *,
+        rendered_hostname_only: bool = True,
+        with_screenshot: bool = True,
+        with_favicon: bool = False,
+        with_trusted_timestamps: bool = False,
+        page_capture_state: PageCaptureState | None = None,
+    ) -> CaptureResponse:
+        """Capture the state of the current page only.
+
+        This method is the final-page path used by interactive captures. It does
+        not navigate, recurse into links, or perform crawler-style expansion.
+        It snapshots the page as it exists when called, then runs the normal
+        single-page finalization steps. If the caller already ran
+        setup_page_capture, pass its state so download and favicon bookkeeping are
+        finalized consistently.
+        """
+
+        to_return: CaptureResponse = {}
+        errors: list[str] = []
+
+        try:
+            # Build frame tree and extract main HTML / URL, similar to capture_page
+            to_return['frames'] = await self.make_frame_tree(page.main_frame)
+
+            if frames := to_return.get('frames'):
+                # The first content is what we call rendered HTML, keep it as-is
+                if content := frames.get('content'):
+                    to_return['html'] = content
+                if u := frames.get('url'):
+                    if not u:
+                        self.logger.error('Unable to get the URL of the main frame.')
+                        u = '/!\\ Unknown /!\\'
+                    to_return['last_redirected_url'] = u
+
+            if 'html' in to_return and to_return['html'] is not None and with_favicon:
+                # We're probably (?) safe only looking for favicons in the main frame.
+                # TODO: check that?
+                try:
+                    to_return['potential_favicons'] = await self.get_favicons(page.url, to_return['html'])
+                    if page_capture_state is not None:
+                        page_capture_state['mark_favicons_done']()
+                except (TimeoutError, asyncio.TimeoutError) as e:
+                    self.logger.warning(f'[Timeout] Unable to get favicons on current page: {e}')
+                except Exception as e:
+                    self.logger.warning(f'Unable to get favicons on current page: {e}')
+
+            if with_screenshot:
+                to_return['png'] = await self._failsafe_get_screenshot(page)
+
+            # Keep that all the way down there in case the capture failed.
+            if url := to_return.get('last_redirected_url'):
+                self._already_captured.add(url)
+            else:
+                self._already_captured.add(page.url)
+
+        except PlaywrightTimeoutError as e:
+            errors.append(f"The capture took too long while capturing current page - {e.message}")
+            self.should_retry = True
+        except (asyncio.TimeoutError, TimeoutError):
+            errors.append("Something in the capture of the current page took too long")
+            self.should_retry = True
+        except TargetClosedError as e:
+            errors.append(f"The target was closed while capturing current page - {e}")
+            self.should_retry = True
+        except Error as e:
+            # NOTE: there are a lot of errors that look like duplicates and they are triggered at different times in the process.
+            # it is tricky to figure our which one should (and should not) trigger a retry. Below is our best guess and it will change over time.
+            self._update_exceptions(e)
+            errors.append(e.message)
+            to_return['error_name'] = e.name
+            # TODO: check e.message and figure out if it is worth retrying or not.
+            # NOTE: e.name is generally (always?) "Error"
+            if self._fatal_network_error(e) or self._fatal_auth_error(e) or self.fatal_browser_error(e):
+                self.logger.info(f'Unable to process current page: {e.name}')
+            elif self._retry_network_error(e) or self._retry_browser_error(e):
+                # this one sounds like something we can retry...
+                self.logger.info(f'Issue while capturing current page (retrying): {e.message}')
+                errors.append(f'Issue while capturing current page: {e.message}')
+                self.should_retry = True
+            else:
+                # Unexpected ones
+                self.logger.exception(f'Something went poorly while capturing current page: "{e.name}" - {e.message}')
+        except Exception as e:
+            # we may get a non-playwright exception to.
+            # The ones we try to handle here should be treated as if they were.
+            errors.append(str(e))
+            if str(e) in ['Connection closed while reading from the driver']:
+                self.logger.info(f'Issue while capturing current page (retrying): {e}')
+                errors.append(f'Issue while capturing current page: {e}')
+                self.should_retry = True
+            else:
+                raise e
+
+        await self._finalize_capture(
+            page=page,
+            store_request=page_capture_state['store_request'] if page_capture_state is not None else None,
+            multiple_downloads=page_capture_state['multiple_downloads'] if page_capture_state is not None else None,
+            to_return=to_return,
+            errors=errors,
+            with_trusted_timestamps=with_trusted_timestamps,
+        )
+
+        self.logger.debug('Current-page capture done')
+        return to_return
+
     async def _get_trusted_timestamps(self, capture_response: CaptureResponse) -> None:
         """Get trusted timestamps for the relevant values in the response"""
         if not self.tt_settings:

From 0b113ca6b480457160a25c6bc18be01d86e49dda Mon Sep 17 00:00:00 2001
From: Cormac Doherty <25778167+DocArmoryTech@users.noreply.github.com>
Date: Wed, 1 Apr 2026 15:48:22 +0100
Subject: [PATCH 3/8] fix: correct datetime import

---
 playwrightcapture/capture.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/playwrightcapture/capture.py b/playwrightcapture/capture.py
index 7b7f973..f347637 100644
--- a/playwrightcapture/capture.py
+++ b/playwrightcapture/capture.py
@@ -14,6 +14,7 @@
 import time
 
 from base64 import b64decode, b64encode
+from datetime import datetime
 from io import BytesIO
 from logging import LoggerAdapter, Logger
 from tempfile import NamedTemporaryFile

From 18ca96ae158b017fa91f7e53ee82a2136c9abe47 Mon Sep 17 00:00:00 2001
From: Cormac Doherty <25778167+DocArmoryTech@users.noreply.github.com>
Date: Thu, 2 Apr 2026 08:27:05 +0100
Subject: [PATCH 4/8] fix(types): widen launch_env type to match Playwright
 BrowserType.launch signature

The env parameter of BrowserType.launch() expects
dict[str, str | float | bool] | None, not dict[str, str] | None.
Widen the launch_env annotation to satisfy mypy --strict.
---
 playwrightcapture/capture.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/playwrightcapture/capture.py b/playwrightcapture/capture.py
index f347637..f236e51 100644
--- a/playwrightcapture/capture.py
+++ b/playwrightcapture/capture.py
@@ -266,7 +266,7 @@ async def __aenter__(self) -> Capture:
 
         # Build a per-launch environment so concurrent captures each target
         # their own X11 display without mutating the process-global DISPLAY.
-        launch_env: dict[str, str] | None = None
+        launch_env: dict[str, str | float | bool] | None = None
         if self._display:
             launch_env = {**os.environ, 'DISPLAY': self._display}
             self.logger.info(f'Launching browser on DISPLAY {self._display}')

From 6589d1228ea6a83d79863a84497a9deab5daeb78 Mon Sep 17 00:00:00 2001
From: Cormac Doherty <25778167+DocArmoryTech@users.noreply.github.com>
Date: Thu, 2 Apr 2026 08:55:58 +0100
Subject: [PATCH 5/8] style: normalize default arg spacing in
 capture_current_page

Align keyword-only parameter defaults with the rest of the codebase
(no spaces around '=').
---
 playwrightcapture/capture.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/playwrightcapture/capture.py b/playwrightcapture/capture.py
index f236e51..0e8611c 100644
--- a/playwrightcapture/capture.py
+++ b/playwrightcapture/capture.py
@@ -1544,11 +1544,11 @@ async def capture_current_page(
         self,
         page: Page,
         *,
-        rendered_hostname_only: bool = True,
-        with_screenshot: bool = True,
-        with_favicon: bool = False,
-        with_trusted_timestamps: bool = False,
-        page_capture_state: PageCaptureState | None = None,
+        rendered_hostname_only: bool=True,
+        with_screenshot: bool=True,
+        with_favicon: bool=False,
+        with_trusted_timestamps: bool=False,
+        page_capture_state: PageCaptureState | None=None,
     ) -> CaptureResponse:
         """Capture the state of the current page only.
 

From d5507b9b16098e2ed3fa7b92e6a0a69e062b13a8 Mon Sep 17 00:00:00 2001
From: Cormac Doherty <25778167+DocArmoryTech@users.noreply.github.com>
Date: Thu, 2 Apr 2026 09:05:37 +0100
Subject: [PATCH 6/8] fix: ensure _finalize_capture runs on re-raised
 exceptions in capture_current_page

When the bare 'except Exception' handler re-raises, _finalize_capture
was skipped because it was called sequentially after the try/except
rather than in a finally block.  This left the page/context unclosed
and the HAR file unflushed.

Wrap the try/except in an outer try/finally, matching the pattern
already used by capture_page.
---
 playwrightcapture/capture.py | 149 ++++++++++++++++++-----------------
 1 file changed, 75 insertions(+), 74 deletions(-)

diff --git a/playwrightcapture/capture.py b/playwrightcapture/capture.py
index 0e8611c..7adb1ac 100644
--- a/playwrightcapture/capture.py
+++ b/playwrightcapture/capture.py
@@ -1564,86 +1564,87 @@ async def capture_current_page(
         errors: list[str] = []
 
         try:
-            # Build frame tree and extract main HTML / URL, similar to capture_page
-            to_return['frames'] = await self.make_frame_tree(page.main_frame)
+            try:
+                # Build frame tree and extract main HTML / URL, similar to capture_page
+                to_return['frames'] = await self.make_frame_tree(page.main_frame)
 
-            if frames := to_return.get('frames'):
-                # The first content is what we call rendered HTML, keep it as-is
-                if content := frames.get('content'):
-                    to_return['html'] = content
-                if u := frames.get('url'):
-                    if not u:
-                        self.logger.error('Unable to get the URL of the main frame.')
-                        u = '/!\\ Unknown /!\\'
-                    to_return['last_redirected_url'] = u
-
-            if 'html' in to_return and to_return['html'] is not None and with_favicon:
-                # We're probably (?) safe only looking for favicons in the main frame.
-                # TODO: check that?
-                try:
-                    to_return['potential_favicons'] = await self.get_favicons(page.url, to_return['html'])
-                    if page_capture_state is not None:
-                        page_capture_state['mark_favicons_done']()
-                except (TimeoutError, asyncio.TimeoutError) as e:
-                    self.logger.warning(f'[Timeout] Unable to get favicons on current page: {e}')
-                except Exception as e:
-                    self.logger.warning(f'Unable to get favicons on current page: {e}')
+                if frames := to_return.get('frames'):
+                    # The first content is what we call rendered HTML, keep it as-is
+                    if content := frames.get('content'):
+                        to_return['html'] = content
+                    if u := frames.get('url'):
+                        if not u:
+                            self.logger.error('Unable to get the URL of the main frame.')
+                            u = '/!\\ Unknown /!\\'
+                        to_return['last_redirected_url'] = u
 
-            if with_screenshot:
-                to_return['png'] = await self._failsafe_get_screenshot(page)
+                if 'html' in to_return and to_return['html'] is not None and with_favicon:
+                    # We're probably (?) safe only looking for favicons in the main frame.
+                    # TODO: check that?
+                    try:
+                        to_return['potential_favicons'] = await self.get_favicons(page.url, to_return['html'])
+                        if page_capture_state is not None:
+                            page_capture_state['mark_favicons_done']()
+                    except (TimeoutError, asyncio.TimeoutError) as e:
+                        self.logger.warning(f'[Timeout] Unable to get favicons on current page: {e}')
+                    except Exception as e:
+                        self.logger.warning(f'Unable to get favicons on current page: {e}')
 
-            # Keep that all the way down there in case the capture failed.
-            if url := to_return.get('last_redirected_url'):
-                self._already_captured.add(url)
-            else:
-                self._already_captured.add(page.url)
+                if with_screenshot:
+                    to_return['png'] = await self._failsafe_get_screenshot(page)
 
-        except PlaywrightTimeoutError as e:
-            errors.append(f"The capture took too long while capturing current page - {e.message}")
-            self.should_retry = True
-        except (asyncio.TimeoutError, TimeoutError):
-            errors.append("Something in the capture of the current page took too long")
-            self.should_retry = True
-        except TargetClosedError as e:
-            errors.append(f"The target was closed while capturing current page - {e}")
-            self.should_retry = True
-        except Error as e:
-            # NOTE: there are a lot of errors that look like duplicates and they are triggered at different times in the process.
-            # it is tricky to figure our which one should (and should not) trigger a retry. Below is our best guess and it will change over time.
-            self._update_exceptions(e)
-            errors.append(e.message)
-            to_return['error_name'] = e.name
-            # TODO: check e.message and figure out if it is worth retrying or not.
-            # NOTE: e.name is generally (always?) "Error"
-            if self._fatal_network_error(e) or self._fatal_auth_error(e) or self.fatal_browser_error(e):
-                self.logger.info(f'Unable to process current page: {e.name}')
-            elif self._retry_network_error(e) or self._retry_browser_error(e):
-                # this one sounds like something we can retry...
-                self.logger.info(f'Issue while capturing current page (retrying): {e.message}')
-                errors.append(f'Issue while capturing current page: {e.message}')
+                # Keep that all the way down there in case the capture failed.
+                if url := to_return.get('last_redirected_url'):
+                    self._already_captured.add(url)
+                else:
+                    self._already_captured.add(page.url)
+
+            except PlaywrightTimeoutError as e:
+                errors.append(f"The capture took too long while capturing current page - {e.message}")
                 self.should_retry = True
-            else:
-                # Unexpected ones
-                self.logger.exception(f'Something went poorly while capturing current page: "{e.name}" - {e.message}')
-        except Exception as e:
-            # we may get a non-playwright exception to.
-            # The ones we try to handle here should be treated as if they were.
-            errors.append(str(e))
-            if str(e) in ['Connection closed while reading from the driver']:
-                self.logger.info(f'Issue while capturing current page (retrying): {e}')
-                errors.append(f'Issue while capturing current page: {e}')
+            except (asyncio.TimeoutError, TimeoutError):
+                errors.append("Something in the capture of the current page took too long")
                 self.should_retry = True
-            else:
-                raise e
-
-        await self._finalize_capture(
-            page=page,
-            store_request=page_capture_state['store_request'] if page_capture_state is not None else None,
-            multiple_downloads=page_capture_state['multiple_downloads'] if page_capture_state is not None else None,
-            to_return=to_return,
-            errors=errors,
-            with_trusted_timestamps=with_trusted_timestamps,
-        )
+            except TargetClosedError as e:
+                errors.append(f"The target was closed while capturing current page - {e}")
+                self.should_retry = True
+            except Error as e:
+                # NOTE: there are a lot of errors that look like duplicates and they are triggered at different times in the process.
+                # it is tricky to figure our which one should (and should not) trigger a retry. Below is our best guess and it will change over time.
+                self._update_exceptions(e)
+                errors.append(e.message)
+                to_return['error_name'] = e.name
+                # TODO: check e.message and figure out if it is worth retrying or not.
+                # NOTE: e.name is generally (always?) "Error"
+                if self._fatal_network_error(e) or self._fatal_auth_error(e) or self.fatal_browser_error(e):
+                    self.logger.info(f'Unable to process current page: {e.name}')
+                elif self._retry_network_error(e) or self._retry_browser_error(e):
+                    # this one sounds like something we can retry...
+                    self.logger.info(f'Issue while capturing current page (retrying): {e.message}')
+                    errors.append(f'Issue while capturing current page: {e.message}')
+                    self.should_retry = True
+                else:
+                    # Unexpected ones
+                    self.logger.exception(f'Something went poorly while capturing current page: "{e.name}" - {e.message}')
+            except Exception as e:
+                # we may get a non-playwright exception to.
+                # The ones we try to handle here should be treated as if they were.
+                errors.append(str(e))
+                if str(e) in ['Connection closed while reading from the driver']:
+                    self.logger.info(f'Issue while capturing current page (retrying): {e}')
+                    errors.append(f'Issue while capturing current page: {e}')
+                    self.should_retry = True
+                else:
+                    raise e
+        finally:
+            await self._finalize_capture(
+                page=page,
+                store_request=page_capture_state['store_request'] if page_capture_state is not None else None,
+                multiple_downloads=page_capture_state['multiple_downloads'] if page_capture_state is not None else None,
+                to_return=to_return,
+                errors=errors,
+                with_trusted_timestamps=with_trusted_timestamps,
+            )
 
         self.logger.debug('Current-page capture done')
         return to_return

From b0b86d67622ac0736396a9a4adfe92bc8f04cb3c Mon Sep 17 00:00:00 2001
From: Cormac Doherty <25778167+DocArmoryTech@users.noreply.github.com>
Date: Sat, 4 Apr 2026 11:50:16 +0100
Subject: [PATCH 7/8] refactor(cookies): delegate coercion to
 Cookie.model_validate

Remove `_coerce_cookie_mapping()` and rely on the `lookyloo-models` `Cookie`
model to handle normalization.
---
 playwrightcapture/capture.py | 52 ++----------------------------------
 1 file changed, 2 insertions(+), 50 deletions(-)

diff --git a/playwrightcapture/capture.py b/playwrightcapture/capture.py
index 7adb1ac..c6b9640 100644
--- a/playwrightcapture/capture.py
+++ b/playwrightcapture/capture.py
@@ -19,7 +19,7 @@
 from logging import LoggerAdapter, Logger
 from tempfile import NamedTemporaryFile
 from typing import Any, Literal, TYPE_CHECKING
-from collections.abc import Awaitable, Callable, Mapping, MutableMapping
+from collections.abc import Awaitable, Callable, MutableMapping
 from urllib.parse import urlparse, unquote, urljoin, urlsplit, urlunsplit, parse_qs, unquote_plus
 from zipfile import ZipFile
 
@@ -480,49 +480,6 @@ def geolocation(self, geolocation: dict[str, str | int | float] | None) -> None:
     def cookies(self) -> list[Cookie]:
         return self._cookies
 
-    def _coerce_cookie_mapping(self, cookie: object) -> Mapping[str, Any] | None:
-        """Normalize supported cookie payload shapes to a mapping.
-
-        Accepts plain mappings, Pydantic-style models, and simple objects with
-        cookie attributes so older callers can keep passing their existing
-        cookie objects.
-        """
-        if isinstance(cookie, Mapping):
-            return cookie
-
-        model_dump = getattr(cookie, 'model_dump', None)
-        if callable(model_dump):
-            try:
-                dumped_cookie = model_dump(exclude_none=True)
-            except TypeError:
-                dumped_cookie = model_dump()
-            if isinstance(dumped_cookie, Mapping):
-                return dumped_cookie
-
-        dict_method = getattr(cookie, 'dict', None)
-        if callable(dict_method):
-            try:
-                dumped_cookie = dict_method(exclude_none=True)
-            except TypeError:
-                dumped_cookie = dict_method()
-            if isinstance(dumped_cookie, Mapping):
-                return dumped_cookie
-
-        cookie_name = getattr(cookie, 'name', None)
-        cookie_value = getattr(cookie, 'value', None)
-        if cookie_name is None or cookie_value is None:
-            return None
-
-        normalized_cookie: dict[str, Any] = {
-            'name': cookie_name,
-            'value': cookie_value,
-        }
-        for optional_key in ('url', 'domain', 'path', 'expires', 'httpOnly', 'secure', 'sameSite', 'partitionKey'):
-            optional_value = getattr(cookie, optional_key, None)
-            if optional_value is not None:
-                normalized_cookie[optional_key] = optional_value
-        return normalized_cookie
-
     @cookies.setter
     def cookies(self, cookies: list[Cookie | dict[str, Any] | object] | None) -> None:
         '''Cookies to send along to the initial request.
@@ -539,13 +496,8 @@ def cookies(self, cookies: list[Cookie | dict[str, Any] | object] | None) -> Non
             if isinstance(raw_cookie, Cookie):
                 self._cookies.append(raw_cookie)
                 continue
-
-            cookie = self._coerce_cookie_mapping(raw_cookie)
-            if cookie is None:
-                self.logger.warning(f'Ignoring unsupported cookie payload: {raw_cookie!r}')
-                continue
             try:
-                self._cookies.append(Cookie.model_validate(cookie))
+                self._cookies.append(Cookie.model_validate(raw_cookie))
             except Exception as e:
                 self.logger.warning(f'Invalid cookie: {e}')
 

From 32d3a717119ecae583f08dfae8a5e60b7633f9ff Mon Sep 17 00:00:00 2001
From: Cormac Doherty <25778167+DocArmoryTech@users.noreply.github.com>
Date: Sat, 4 Apr 2026 11:50:39 +0100
Subject: [PATCH 8/8] refactor(capture): move state to instance attrs & merge
 capture_current_page

- Replace `PageCaptureState` `TypedDict` with instance attributes
(`_multiple_downloads`, `_store_request`, `_mark_favicons_done`) on `Capture`.
- `setup_page_capture()` now returns `None` and `_finalize_capture()` reads
from `self`.
- Make `url` optional and add `current_page_only` flag to `capture_page()` - when `True`, the method snapshots the page as-is without navigation.
- Delete the now-redundant `capture_current_page()` method.
---
 playwrightcapture/capture.py | 548 ++++++++++++++++-------------------
 1 file changed, 246 insertions(+), 302 deletions(-)

diff --git a/playwrightcapture/capture.py b/playwrightcapture/capture.py
index c6b9640..537196b 100644
--- a/playwrightcapture/capture.py
+++ b/playwrightcapture/capture.py
@@ -106,14 +106,6 @@ class CaptureResponse(TypedDict, total=False):
     potential_favicons: set[bytes] | None
 
 
-class PageCaptureState(TypedDict):
-    """Per-page runtime state shared between setup and finalization."""
-
-    multiple_downloads: list[tuple[str, bytes]]
-    store_request: Callable[[Request], Awaitable[None]]
-    mark_favicons_done: Callable[[], None]
-
-
 class PlaywrightCaptureLogAdapter(LoggerAdapter):  # type: ignore[type-arg]
     """
     Prepend log entry with the UUID of the capture
@@ -224,6 +216,11 @@ def __init__(self, browser: BROWSER | None=None, device_name: str | None=None,
         # concurrent capture gets its own display without mutating os.environ.
         self._display = display
 
+        # Per-page capture state populated by setup_page_capture().
+        self._multiple_downloads: list[tuple[str, bytes]] = []
+        self._store_request: Callable[[Request], Awaitable[None]] | None = None
+        self._mark_favicons_done: Callable[[], None] | None = None
+
         # Initialize the magic DB
         self.magicdb = MagicDb()
 
@@ -310,7 +307,7 @@ async def __aexit__(self, exc_type: Any, exc: Any, tb: Any) -> bool:
 
         return True
 
-    async def setup_page_capture(self, page: Page, *, allow_tracking: bool=False) -> PageCaptureState:
+    async def setup_page_capture(self, page: Page, *, allow_tracking: bool=False) -> None:
         """Prepare a page for a single-page capture without changing capture semantics.
 
         This method preserves the existing per-page setup used by capture_page:
@@ -326,7 +323,7 @@ async def setup_page_capture(self, page: Page, *, allow_tracking: bool=False) ->
         self.wait_for_download = 0
 
         # We may have multiple download triggered via JS
-        multiple_downloads: list[tuple[str, bytes]] = []
+        self._multiple_downloads = []
 
         async def handle_download(download: Download) -> None:
             # This method is called when a download event is triggered from JS in a page that also renders
@@ -338,7 +335,7 @@ async def handle_download(download: Download) -> None:
                     filename = download.suggested_filename
                     with open(tmp_f.name, "rb") as f:
                         file_content = f.read()
-                    multiple_downloads.append((filename, file_content))
+                    self._multiple_downloads.append((filename, file_content))
                     self.logger.info('Done with download.')
             except Exception as e:
                 if download.page.is_closed():
@@ -409,16 +406,13 @@ async def _override_content_disposition_handler(route: Route, request: Request)
 
         page.set_default_timeout((self._capture_timeout - 2) * 1000)
         # trigger a callback on each request to store it in a dict indexed by URL to get it back from the favicon fetcher
+        self._store_request = store_request
+        self._mark_favicons_done = mark_favicons_done
+
         page.on("requestfinished", store_request)
         page.on("dialog", lambda dialog: dialog.accept())
         page.on("download", handle_download)
 
-        return {
-            'multiple_downloads': multiple_downloads,
-            'store_request': store_request,
-            'mark_favicons_done': mark_favicons_done,
-        }
-
     @property
     def locale(self) -> str:
         return self._locale
@@ -483,8 +477,6 @@ def cookies(self) -> list[Cookie]:
     @cookies.setter
     def cookies(self, cookies: list[Cookie | dict[str, Any] | object] | None) -> None:
         '''Cookies to send along to the initial request.
-        Accepts Playwright cookie dictionaries as well as model/object wrappers
-        exposing equivalent fields.
 
         :param cookies: The cookies, in this format: https://playwright.dev/python/docs/api/class-browsercontext#browser-context-add-cookies
         '''
@@ -1107,27 +1099,19 @@ async def _finalize_capture(
         self,
         *,
         page: Page,
-        store_request: Callable[[Request], Awaitable[None]] | None,
-        multiple_downloads: list[tuple[str, bytes]] | None,
         to_return: CaptureResponse,
         errors: list[str],
         with_trusted_timestamps: bool,
     ) -> None:
-        """Common finalization logic for captures (downloads, cookies, storage, HAR, socks5, timestamps).
-
-        This helper centralizes the tail of a capture, which previously lived at the end
-        of capture_page. It is now also used by capture_current_page, consuming the
-        state returned by setup_page_capture when available, to avoid code duplication
-        while keeping single-page finalization behavior aligned.
-        """
+        """Common finalization logic for captures (downloads, cookies, storage, HAR, socks5, timestamps)."""
 
         self.logger.debug('Finishing up capture (helper).')
 
         # We may have multiple downloads triggered via JS; if so, deduplicate them and,
         # when there is more than one, bundle them into a zip stored in-memory.
         # This mirrors the behavior previously implemented at the end of capture_page.
-        if multiple_downloads is not None:
-            if multiple_dls := set(multiple_downloads):
+        if self._multiple_downloads:
+            if multiple_dls := set(self._multiple_downloads):
                 if len(multiple_dls) == 1:
                     dl = multiple_dls.pop()
                     to_return["downloaded_filename"] = dl[0]
@@ -1173,9 +1157,9 @@ async def _finalize_capture(
             if not page.is_closed():
                 # Remove request listener if we set one; best-effort only as it is
                 # primarily used for favicon extraction and should not break captures.
-                if store_request is not None:
+                if self._store_request is not None:
                     try:
-                        page.remove_listener("requestfinished", store_request)
+                        page.remove_listener("requestfinished", self._store_request)
                     except Exception:
                         # Best-effort only
                         pass
@@ -1241,7 +1225,7 @@ async def _finalize_capture(
             except Exception as e:
                 self.logger.warning(f'Unable to get trusted timestamps: {e}')
 
-    async def capture_page(self, url: str, *, max_depth_capture_time: int,
+    async def capture_page(self, url: str | None=None, *, max_depth_capture_time: int,
                            referer: str | None=None,
                            page: Page | None=None, depth: int=0,
                            rendered_hostname_only: bool=True,
@@ -1249,6 +1233,7 @@ async def capture_page(self, url: str, *, max_depth_capture_time: int,
                            with_favicon: bool=False,
                            allow_tracking: bool=False,
                            with_trusted_timestamps: bool=False,
+                           current_page_only: bool=False,
                            final_wait: int=5
                            ) -> CaptureResponse:
         """Capture a URL and optionally recurse into child links.
@@ -1257,13 +1242,21 @@ async def capture_page(self, url: str, *, max_depth_capture_time: int,
         performs the navigation, and finalizes the capture before returning.
         Recursive child captures reuse the existing page and therefore skip the
         outer setup/finalization path.
+
+        When `current_page_only` is True the method snapshots the page as-is
+        (no navigation, no recursion) and then finalizes.  This is the path
+        used by interactive captures after setup_page_capture has already been
+        called by the caller.
         """
 
         to_return: CaptureResponse = {}
         errors: list[str] = []
-        page_capture_state: PageCaptureState | None = None
 
-        if page is not None:
+        if current_page_only:
+            if page is None:
+                raise InvalidPlaywrightParameter('current_page_only requires a page argument')
+            capturing_sub = False
+        elif page is not None:
             capturing_sub = True
         else:
             capturing_sub = False
@@ -1275,172 +1268,234 @@ async def capture_page(self, url: str, *, max_depth_capture_time: int,
                 to_return['error'] = f'Unable to create new page: {e}'
                 return to_return
 
-            page_capture_state = await self.setup_page_capture(page, allow_tracking=allow_tracking)
+            await self.setup_page_capture(page, allow_tracking=allow_tracking)
 
         try:
-            try:
-                await page.goto(url, wait_until='domcontentloaded', referer=referer if referer else '')
-            except Error as initial_error:
-                self._update_exceptions(initial_error)
-                # So this one is really annoying: chromium raises a net::ERR_ABORTED when it hits a download
-                if initial_error.name in ['Download is starting', 'net::ERR_ABORTED']:
-                    # page.goto failed, but it triggered a download event.
-                    # Let's re-trigger it.
-                    try:
-                        async with page.expect_download() as download_info:
+            if current_page_only:
+                # Snapshot the current page state without navigation or recursion.
+                try:
+                    to_return['frames'] = await self.make_frame_tree(page.main_frame)
+
+                    if frames := to_return.get('frames'):
+                        if content := frames.get('content'):
+                            to_return['html'] = content
+                        if u := frames.get('url'):
+                            if not u:
+                                self.logger.error('Unable to get the URL of the main frame.')
+                                u = '/!\\ Unknown /!\\'
+                            to_return['last_redirected_url'] = u
+
+                    if 'html' in to_return and to_return['html'] is not None and with_favicon:
+                        try:
+                            to_return['potential_favicons'] = await self.get_favicons(page.url, to_return['html'])
+                            if self._mark_favicons_done is not None:
+                                self._mark_favicons_done()
+                        except (TimeoutError, asyncio.TimeoutError) as e:
+                            self.logger.warning(f'[Timeout] Unable to get favicons on current page: {e}')
+                        except Exception as e:
+                            self.logger.warning(f'Unable to get favicons on current page: {e}')
+
+                    if with_screenshot:
+                        to_return['png'] = await self._failsafe_get_screenshot(page)
+
+                    if captured_url := to_return.get('last_redirected_url'):
+                        self._already_captured.add(captured_url)
+                    else:
+                        self._already_captured.add(page.url)
+                except PlaywrightTimeoutError as e:
+                    errors.append(f"The capture took too long while capturing current page - {e.message}")
+                    self.should_retry = True
+                except (asyncio.TimeoutError, TimeoutError):
+                    errors.append("Something in the capture of the current page took too long")
+                    self.should_retry = True
+                except TargetClosedError as e:
+                    errors.append(f"The target was closed while capturing current page - {e}")
+                    self.should_retry = True
+                except Error as e:
+                    self._update_exceptions(e)
+                    errors.append(e.message)
+                    to_return['error_name'] = e.name
+                    if self._fatal_network_error(e) or self._fatal_auth_error(e) or self.fatal_browser_error(e):
+                        self.logger.info(f'Unable to process current page: {e.name}')
+                    elif self._retry_network_error(e) or self._retry_browser_error(e):
+                        self.logger.info(f'Issue while capturing current page (retrying): {e.message}')
+                        errors.append(f'Issue while capturing current page: {e.message}')
+                        self.should_retry = True
+                    else:
+                        self.logger.exception(f'Something went poorly while capturing current page: "{e.name}" - {e.message}')
+                except Exception as e:
+                    errors.append(str(e))
+                    if str(e) in ['Connection closed while reading from the driver']:
+                        self.logger.info(f'Issue while capturing current page (retrying): {e}')
+                        errors.append(f'Issue while capturing current page: {e}')
+                        self.should_retry = True
+                    else:
+                        raise e
+            else:
+                # Standard navigation + capture path.
+                assert url is not None
+                try:
+                    await page.goto(url, wait_until='domcontentloaded', referer=referer if referer else '')
+                except Error as initial_error:
+                    self._update_exceptions(initial_error)
+                    # So this one is really annoying: chromium raises a net::ERR_ABORTED when it hits a download
+                    if initial_error.name in ['Download is starting', 'net::ERR_ABORTED']:
+                        # page.goto failed, but it triggered a download event.
+                        # Let's re-trigger it.
+                        try:
+                            async with page.expect_download() as download_info:
+                                try:
+                                    await page.goto(url, referer=referer if referer else '')
+                                except Exception:
+                                    pass
+                                with NamedTemporaryFile() as tmp_f:
+                                    download = await download_info.value
+                                    await download.save_as(tmp_f.name)
+                                    filename = download.suggested_filename
+                                    with open(tmp_f.name, "rb") as f:
+                                        file_content = f.read()
+                                    self._multiple_downloads.append((filename, file_content))
+                        except PlaywrightTimeoutError:
+                            self.logger.debug('No download has been triggered.')
+                            raise initial_error
+                        except Error as e:
                             try:
-                                await page.goto(url, referer=referer if referer else '')
+                                error_msg = download.failure()
+                                if not error_msg:
+                                    raise e
+                                errors.append(f"Error while downloading: {error_msg}")
+                                self.logger.info(f'Error while downloading: {error_msg}')
+                                self.should_retry = True
                             except Exception:
-                                pass
-                            with NamedTemporaryFile() as tmp_f:
-                                download = await download_info.value
-                                await download.save_as(tmp_f.name)
-                                filename = download.suggested_filename
-                                with open(tmp_f.name, "rb") as f:
-                                    file_content = f.read()
-                                if page_capture_state is not None:
-                                    page_capture_state['multiple_downloads'].append((filename, file_content))
-                    except PlaywrightTimeoutError:
-                        self.logger.debug('No download has been triggered.')
-                        raise initial_error
-                    except Error as e:
-                        try:
-                            error_msg = download.failure()
-                            if not error_msg:
                                 raise e
-                            errors.append(f"Error while downloading: {error_msg}")
-                            self.logger.info(f'Error while downloading: {error_msg}')
-                            self.should_retry = True
-                        except Exception:
-                            raise e
+                    else:
+                        raise initial_error
                 else:
-                    raise initial_error
-            else:
-                await self._wait_for_random_timeout(page, 5)  # Wait 5 sec after document loaded
-                try:
-                    await page.bring_to_front()
-                    self.logger.debug('Page moved to front.')
-                except Error as e:
-                    self.logger.warning(f'Unable to bring the page to the front: {e}.')
+                    await self._wait_for_random_timeout(page, 5)  # Wait 5 sec after document loaded
+                    try:
+                        await page.bring_to_front()
+                        self.logger.debug('Page moved to front.')
+                    except Error as e:
+                        self.logger.warning(f'Unable to bring the page to the front: {e}.')
 
-                try:
-                    if self.headless:
-                        await self.__instrumentation(page, url, allow_tracking, final_wait)
-                    else:
-                        self.logger.debug('Headed mode, skipping instrumentation.')
-                        await self._wait_for_random_timeout(page, self._capture_timeout - 5)
-                except Exception as e:
-                    self.logger.exception(f'Error during instrumentation: {e}')
-
-                # ### --------------------------------------
-                # NOTE 2025-11-12: disabling the offline setting as it doesn't seem
-                # to solve the issue with the frames, but causes some failure
-                # while getting the stored state
-
-                # Pass browser to offline mode to get content and make screenshot
-                # await self.context.set_offline(True)
-                # await self._safe_wait(page, 5)
-                # self.logger.info('Browser offline.')
-                # Abort everything
-                # await page.route("**/*", lambda route: route.abort())
-                # await self._safe_wait(page, 5)
-
-                to_return['frames'] = await self.make_frame_tree(page.main_frame)
-
-                # ### --------------------------------------
-
-                # The first content is what we call rendered HTML, keep it as-is
-                if frames := to_return.get('frames'):
-                    if content := frames.get('content'):
-                        to_return['html'] = content
-                    if u := frames.get('url'):
-                        if not u:
-                            self.logger.error('Unable to get the URL of the main frame.')
-                            u = '/!\\ Unknown /!\\'
-                        to_return['last_redirected_url'] = u
-
-                if 'html' in to_return and to_return['html'] is not None and with_favicon:
-                    # We're probably (?) safe only looking for favicons in the main frame.
-                    # TODO: check that?
                     try:
-                        to_return['potential_favicons'] = await self.get_favicons(page.url, to_return['html'])
-                        if page_capture_state is not None:
-                            page_capture_state['mark_favicons_done']()
-                    except (TimeoutError, asyncio.TimeoutError) as e:
-                        self.logger.warning(f'[Timeout] Unable to get favicons: {e}')
+                        if self.headless:
+                            await self.__instrumentation(page, url, allow_tracking, final_wait)
+                        else:
+                            self.logger.debug('Headed mode, skipping instrumentation.')
+                            await self._wait_for_random_timeout(page, self._capture_timeout - 5)
                     except Exception as e:
-                        self.logger.warning(f'Unable to get favicons: {e}')
-
-                if with_screenshot:
-                    to_return['png'] = await self._failsafe_get_screenshot(page)
-
-                # Keep that all the way down there in case the capture failed.
-                self._already_captured.add(url)
-
-                if depth > 0 and to_return.get('html') and to_return['html']:
-                    # TODO with children frames:
-                    # 1. if the frame hasa URL, use that as base URL/referer for the subsequent captures
-                    # 2. if it doesn't, the base URL is the url of the parent (which may or may not be the main frame)
-                    if child_urls := self._get_links_from_rendered_page(page.url, to_return['html'], rendered_hostname_only):
-                        to_return['children'] = []
-                        depth -= 1
-                        total_urls = len(child_urls)
-                        max_capture_time = max(int(max_depth_capture_time / total_urls), self._minimal_timeout)
-                        max_captures = int(max_depth_capture_time / max_capture_time)
-                        if max_captures < total_urls:
-                            self.logger.warning(f'Attempting to capture URLs from {page.url} but there are too many ({total_urls}) to capture in too little time. Only capturing the first {max_captures} URLs in the page.')
-                            if max_captures <= 0:
-                                # We don't really have time for even one capture, but let's try anyway.
-                                child_urls = child_urls[:1]
-                            else:
-                                child_urls = child_urls[:max_captures]
-                        self.logger.info(f'Capturing children, {max_captures} URLs')
-                        consecutive_errors = 0
-                        for index, url in enumerate(child_urls):
-                            self.logger.info(f'Capture child {url} - Timeout: {max_capture_time}s')
-                            start_time = time.time()
-                            if page.is_closed():
-                                self.logger.info('Page is closed, unable to capture children.')
-                                break
-                            try:
-                                async with timeout(max_capture_time + 1):  # just adding a bit of padding so playwright has the chance to raise the exception first
-                                    child_capture = await self.capture_page(
-                                        url=url, referer=page.url,
-                                        page=page, depth=depth,
-                                        rendered_hostname_only=rendered_hostname_only,
-                                        max_depth_capture_time=max_capture_time,
-                                        with_screenshot=with_screenshot,
-                                        final_wait=final_wait)
-                                    if with_trusted_timestamps:
-                                        try:
-                                            await self._get_trusted_timestamps(child_capture)
-                                        except Exception as e:
-                                            self.logger.warning(f'Unable to get the trusted timestamps for the clild capture : {e}.')
-                                    to_return['children'].append(child_capture)  # type: ignore[union-attr]
-                            except (TimeoutError, asyncio.TimeoutError):
-                                self.logger.info(f'Timeout error, took more than {max_capture_time}s. Unable to capture {url}.')
-                                consecutive_errors += 1
-                            except Exception as e:
-                                self.logger.warning(f'Error while capturing child "{url}": {e}. {len(child_urls) - index - 1} more to go.')
-                                consecutive_errors += 1
-                            else:
-                                consecutive_errors = 0
-                                runtime = int(time.time() - start_time)
-                                self.logger.info(f'Successfully captured child URL: {url} in {runtime}s. {len(child_urls) - index - 1} to go.')
-
-                            if consecutive_errors >= 5:
-                                # if we have more than 5 consecutive errors, the capture is most probably broken, breaking.
-                                self.logger.warning('Got more than 5 consecutive errors while capturing children, breaking.')
-                                errors.append("Got more than 5 consecutive errors while capturing children")
-                                self.should_retry = True
-                                break
+                        self.logger.exception(f'Error during instrumentation: {e}')
 
-                            try:
-                                await page.go_back()
-                            except PlaywrightTimeoutError:
-                                self.logger.info('Go back timed out, it is probably not a big deal.')
-                            except Exception as e:
-                                self.logger.info(f'Unable to go back: {e}.')
+                    # ### --------------------------------------
+                    # NOTE 2025-11-12: disabling the offline setting as it doesn't seem
+                    # to solve the issue with the frames, but causes some failure
+                    # while getting the stored state
+
+                    # Pass browser to offline mode to get content and make screenshot
+                    # await self.context.set_offline(True)
+                    # await self._safe_wait(page, 5)
+                    # self.logger.info('Browser offline.')
+                    # Abort everything
+                    # await page.route("**/*", lambda route: route.abort())
+                    # await self._safe_wait(page, 5)
+
+                    to_return['frames'] = await self.make_frame_tree(page.main_frame)
+
+                    # ### --------------------------------------
+
+                    # The first content is what we call rendered HTML, keep it as-is
+                    if frames := to_return.get('frames'):
+                        if content := frames.get('content'):
+                            to_return['html'] = content
+                        if u := frames.get('url'):
+                            if not u:
+                                self.logger.error('Unable to get the URL of the main frame.')
+                                u = '/!\\ Unknown /!\\'
+                            to_return['last_redirected_url'] = u
+
+                    if 'html' in to_return and to_return['html'] is not None and with_favicon:
+                        # We're probably (?) safe only looking for favicons in the main frame.
+                        # TODO: check that?
+                        try:
+                            to_return['potential_favicons'] = await self.get_favicons(page.url, to_return['html'])
+                            if self._mark_favicons_done is not None:
+                                self._mark_favicons_done()
+                        except (TimeoutError, asyncio.TimeoutError) as e:
+                            self.logger.warning(f'[Timeout] Unable to get favicons: {e}')
+                        except Exception as e:
+                            self.logger.warning(f'Unable to get favicons: {e}')
+
+                    if with_screenshot:
+                        to_return['png'] = await self._failsafe_get_screenshot(page)
+
+                    # Keep that all the way down there in case the capture failed.
+                    self._already_captured.add(url)
+
+                    if depth > 0 and to_return.get('html') and to_return['html']:
+                        # TODO with children frames:
+                        # 1. if the frame hasa URL, use that as base URL/referer for the subsequent captures
+                        # 2. if it doesn't, the base URL is the url of the parent (which may or may not be the main frame)
+                        if child_urls := self._get_links_from_rendered_page(page.url, to_return['html'], rendered_hostname_only):
+                            to_return['children'] = []
+                            depth -= 1
+                            total_urls = len(child_urls)
+                            max_capture_time = max(int(max_depth_capture_time / total_urls), self._minimal_timeout)
+                            max_captures = int(max_depth_capture_time / max_capture_time)
+                            if max_captures < total_urls:
+                                self.logger.warning(f'Attempting to capture URLs from {page.url} but there are too many ({total_urls}) to capture in too little time. Only capturing the first {max_captures} URLs in the page.')
+                                if max_captures <= 0:
+                                    # We don't really have time for even one capture, but let's try anyway.
+                                    child_urls = child_urls[:1]
+                                else:
+                                    child_urls = child_urls[:max_captures]
+                            self.logger.info(f'Capturing children, {max_captures} URLs')
+                            consecutive_errors = 0
+                            for index, url in enumerate(child_urls):
+                                self.logger.info(f'Capture child {url} - Timeout: {max_capture_time}s')
+                                start_time = time.time()
+                                if page.is_closed():
+                                    self.logger.info('Page is closed, unable to capture children.')
+                                    break
+                                try:
+                                    async with timeout(max_capture_time + 1):  # just adding a bit of padding so playwright has the chance to raise the exception first
+                                        child_capture = await self.capture_page(
+                                            url=url, referer=page.url,
+                                            page=page, depth=depth,
+                                            rendered_hostname_only=rendered_hostname_only,
+                                            max_depth_capture_time=max_capture_time,
+                                            with_screenshot=with_screenshot,
+                                            final_wait=final_wait)
+                                        if with_trusted_timestamps:
+                                            try:
+                                                await self._get_trusted_timestamps(child_capture)
+                                            except Exception as e:
+                                                self.logger.warning(f'Unable to get the trusted timestamps for the clild capture : {e}.')
+                                        to_return['children'].append(child_capture)  # type: ignore[union-attr]
+                                except (TimeoutError, asyncio.TimeoutError):
+                                    self.logger.info(f'Timeout error, took more than {max_capture_time}s. Unable to capture {url}.')
+                                    consecutive_errors += 1
+                                except Exception as e:
+                                    self.logger.warning(f'Error while capturing child "{url}": {e}. {len(child_urls) - index - 1} more to go.')
+                                    consecutive_errors += 1
+                                else:
+                                    consecutive_errors = 0
+                                    runtime = int(time.time() - start_time)
+                                    self.logger.info(f'Successfully captured child URL: {url} in {runtime}s. {len(child_urls) - index - 1} to go.')
+
+                                if consecutive_errors >= 5:
+                                    # if we have more than 5 consecutive errors, the capture is most probably broken, breaking.
+                                    self.logger.warning('Got more than 5 consecutive errors while capturing children, breaking.')
+                                    errors.append("Got more than 5 consecutive errors while capturing children")
+                                    self.should_retry = True
+                                    break
+
+                                try:
+                                    await page.go_back()
+                                except PlaywrightTimeoutError:
+                                    self.logger.info('Go back timed out, it is probably not a big deal.')
+                                except Exception as e:
+                                    self.logger.info(f'Unable to go back: {e}.')
 
         except PlaywrightTimeoutError as e:
             errors.append(f"The capture took too long - {e.message}")
@@ -1483,8 +1538,6 @@ async def capture_page(self, url: str, *, max_depth_capture_time: int,
             if not capturing_sub:
                 await self._finalize_capture(
                     page=page,
-                    store_request=page_capture_state['store_request'] if page_capture_state is not None else None,
-                    multiple_downloads=page_capture_state['multiple_downloads'] if page_capture_state is not None else None,
                     to_return=to_return,
                     errors=errors,
                     with_trusted_timestamps=with_trusted_timestamps,
@@ -1492,115 +1545,6 @@ async def capture_page(self, url: str, *, max_depth_capture_time: int,
         self.logger.debug('Capture done')
         return to_return
 
-    async def capture_current_page(
-        self,
-        page: Page,
-        *,
-        rendered_hostname_only: bool=True,
-        with_screenshot: bool=True,
-        with_favicon: bool=False,
-        with_trusted_timestamps: bool=False,
-        page_capture_state: PageCaptureState | None=None,
-    ) -> CaptureResponse:
-        """Capture the state of the current page only.
-
-        This method is the final-page path used by interactive captures. It does
-        not navigate, recurse into links, or perform crawler-style expansion.
-        It snapshots the page as it exists when called, then runs the normal
-        single-page finalization steps. If the caller already ran
-        setup_page_capture, pass its state so download and favicon bookkeeping are
-        finalized consistently.
-        """
-
-        to_return: CaptureResponse = {}
-        errors: list[str] = []
-
-        try:
-            try:
-                # Build frame tree and extract main HTML / URL, similar to capture_page
-                to_return['frames'] = await self.make_frame_tree(page.main_frame)
-
-                if frames := to_return.get('frames'):
-                    # The first content is what we call rendered HTML, keep it as-is
-                    if content := frames.get('content'):
-                        to_return['html'] = content
-                    if u := frames.get('url'):
-                        if not u:
-                            self.logger.error('Unable to get the URL of the main frame.')
-                            u = '/!\\ Unknown /!\\'
-                        to_return['last_redirected_url'] = u
-
-                if 'html' in to_return and to_return['html'] is not None and with_favicon:
-                    # We're probably (?) safe only looking for favicons in the main frame.
-                    # TODO: check that?
-                    try:
-                        to_return['potential_favicons'] = await self.get_favicons(page.url, to_return['html'])
-                        if page_capture_state is not None:
-                            page_capture_state['mark_favicons_done']()
-                    except (TimeoutError, asyncio.TimeoutError) as e:
-                        self.logger.warning(f'[Timeout] Unable to get favicons on current page: {e}')
-                    except Exception as e:
-                        self.logger.warning(f'Unable to get favicons on current page: {e}')
-
-                if with_screenshot:
-                    to_return['png'] = await self._failsafe_get_screenshot(page)
-
-                # Keep that all the way down there in case the capture failed.
-                if url := to_return.get('last_redirected_url'):
-                    self._already_captured.add(url)
-                else:
-                    self._already_captured.add(page.url)
-
-            except PlaywrightTimeoutError as e:
-                errors.append(f"The capture took too long while capturing current page - {e.message}")
-                self.should_retry = True
-            except (asyncio.TimeoutError, TimeoutError):
-                errors.append("Something in the capture of the current page took too long")
-                self.should_retry = True
-            except TargetClosedError as e:
-                errors.append(f"The target was closed while capturing current page - {e}")
-                self.should_retry = True
-            except Error as e:
-                # NOTE: there are a lot of errors that look like duplicates and they are triggered at different times in the process.
-                # it is tricky to figure our which one should (and should not) trigger a retry. Below is our best guess and it will change over time.
-                self._update_exceptions(e)
-                errors.append(e.message)
-                to_return['error_name'] = e.name
-                # TODO: check e.message and figure out if it is worth retrying or not.
-                # NOTE: e.name is generally (always?) "Error"
-                if self._fatal_network_error(e) or self._fatal_auth_error(e) or self.fatal_browser_error(e):
-                    self.logger.info(f'Unable to process current page: {e.name}')
-                elif self._retry_network_error(e) or self._retry_browser_error(e):
-                    # this one sounds like something we can retry...
-                    self.logger.info(f'Issue while capturing current page (retrying): {e.message}')
-                    errors.append(f'Issue while capturing current page: {e.message}')
-                    self.should_retry = True
-                else:
-                    # Unexpected ones
-                    self.logger.exception(f'Something went poorly while capturing current page: "{e.name}" - {e.message}')
-            except Exception as e:
-                # we may get a non-playwright exception to.
-                # The ones we try to handle here should be treated as if they were.
-                errors.append(str(e))
-                if str(e) in ['Connection closed while reading from the driver']:
-                    self.logger.info(f'Issue while capturing current page (retrying): {e}')
-                    errors.append(f'Issue while capturing current page: {e}')
-                    self.should_retry = True
-                else:
-                    raise e
-        finally:
-            await self._finalize_capture(
-                page=page,
-                store_request=page_capture_state['store_request'] if page_capture_state is not None else None,
-                multiple_downloads=page_capture_state['multiple_downloads'] if page_capture_state is not None else None,
-                to_return=to_return,
-                errors=errors,
-                with_trusted_timestamps=with_trusted_timestamps,
-            )
-
-        self.logger.debug('Current-page capture done')
-        return to_return
-
     async def _get_trusted_timestamps(self, capture_response: CaptureResponse) -> None:
         """Get trusted timestamps for the relevant values in the response"""
         if not self.tt_settings: