DataDog · pawelchcki · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026 · xlamorlette-datadog
@@ -53,6 +53,18 @@ stages:
   script:
     - cd $CI_PROJECT_DIR/test/integration-test && uv sync && uv run pytest scenarios --module-path=$CI_PROJECT_DIR/artifacts/$ARCH/mod_datadog.so --bin-path=/httpd/httpd-build/bin/apachectl --log-dir=$CI_PROJECT_DIR/logs -m requires_rum -v
 
+# Runs the full integration-test suite via the same entrypoint local
+# developers use (`make test-integration` -> run-integration-tests.sh).
+# The script is pointed at the artifact from build:$ARCH via MODULE_PATH so
+# the cmake build is skipped and only pytest runs.
+.test-integration-template:
+  stage: test
+  image: $DEVCONTAINER_IMAGE
+  variables:
+    MODULE_PATH: $CI_PROJECT_DIR/artifacts/$ARCH/mod_datadog.so
+  script:
+    - .devcontainer/run-integration-tests.sh
+
 build:amd64:
   extends: .build-template
   needs:
@@ -109,6 +121,34 @@ test-rum:arm64:
     ARCH: arm64
   tags: ["arch:arm64"]
 
+test-integration:amd64:
+  extends: .test-integration-template
+  needs:
+    - job: devcontainer-image
+      parallel:
+        matrix:
+          - ARCH: amd64
+            TOOLCHAIN_ARCH: x86_64
+      artifacts: true
+    - job: "build:amd64"
+  variables:
+    ARCH: amd64
+  tags: ["arch:amd64"]
+
+test-integration:arm64:
+  extends: .test-integration-template
+  needs:
+    - job: devcontainer-image
+      parallel:
+        matrix:
+          - ARCH: arm64
+            TOOLCHAIN_ARCH: aarch64
+      artifacts: true
+    - job: "build:arm64"
+  variables:
+    ARCH: arm64
+  tags: ["arch:arm64"]
+
 .upload-template:
   stage: aws
   needs:
@@ -118,6 +158,8 @@ test-rum:arm64:
       artifacts: true
     - job: "test-rum:amd64"
     - job: "test-rum:arm64"
+    - job: "test-integration:amd64"
+    - job: "test-integration:arm64"
   tags: ["arch:amd64"]
   image: registry.ddbuild.io/images/aws-cli:2.33.12
   script:

@@ -94,22 +94,22 @@ def load_configuration(self, conf_path: str) -> bool:
             # Continue anyway - sometimes apachectl returns non-zero but Apache starts
             # We'll verify below with HTTP requests
 
-        # Wait for Apache to fully start and verify it's running
-        # Give Apache up to 5 seconds to start
+        # Probe readiness with a bare TCP connect rather than an HTTP GET —
+        # mod_datadog's `DatadogTracing Off` inside a <Location> is silently
+        # dropped by the config merge in common_conf.cpp (child Off is ORed
+        # with parent On), so any HTTP probe would emit a trace and pollute
+        # the per-test agent session.
+        import socket
         max_wait = 5
         start_time = time.time()
 
         while time.time() - start_time < max_wait:
-            # Check if Apache is responding by making a simple HTTP request
             try:
-                import requests
-                response = requests.get(self.make_url("/"), timeout=1)
-                # If we get any response, Apache is running
-                print(f"[debug] Apache started successfully, responding with status {response.status_code}")
-                return True
-            except requests.exceptions.RequestException:
-                # Apache not ready yet, wait a bit
-                time.sleep(0.2)
+                with socket.create_connection((self.host, int(self.port)), timeout=0.5):
+                    print(f"[debug] Apache accepting connections on {self.host}:{self.port}")
+                    return True
+            except (ConnectionRefusedError, OSError):
+                time.sleep(0.1)
 
         # If we get here, Apache didn't start properly
         print(f"[error] Apache failed to respond after {max_wait} seconds")
@@ -198,19 +198,29 @@ def __init__(self, host: str, port: int) -> None:
     def internal_run(self) -> None:
         runner = web.AppRunner(self._app)
         self.loop.run_until_complete(runner.setup())
-        site = web.TCPSite(runner, self.host, self.port)
+        # shutdown_timeout bounds how long aiohttp will wait for active
+        # keep-alive connections to close on teardown. The default is 60s;
+        # Apache workers that got SIGKILL leave sockets lingering and make
+        # that timeout show up as a 60s hang at end-of-session.
+        site = web.TCPSite(runner, self.host, self.port, shutdown_timeout=1.0)
         self.loop.run_until_complete(site.start())
         self.loop.run_until_complete(self._stop.wait())
-        self.loop.run_until_complete(self._app.cleanup())
+        # runner.cleanup() stops sites, closes the TCP listener, and runs the
+        # app's cleanup handlers. Calling only app.cleanup() leaves the
+        # listener open and the event loop blocks forever on exit.
+        self.loop.run_until_complete(runner.cleanup())
         self.loop.close()
 
     def run(self) -> None:
-        self._thread = threading.Thread(target=self.internal_run)
+        # daemon=True so a stuck event loop (e.g. aiohttp refusing to close)
+        # can't block interpreter shutdown after pytest_sessionfinish. The
+        # explicit stop() path below is still the intended cleanup.
+        self._thread = threading.Thread(target=self.internal_run, daemon=True)
         self._thread.start()
 
     def stop(self) -> None:
         self.loop.call_soon_threadsafe(self._stop.set)
-        self._thread.join()
+        self._thread.join(timeout=3.0)
 
     def new_session(self, token=None) -> AgentSession:
         if token is None:

@@ -18,8 +18,3 @@ DatadogAddTag foo bar
 DatadogPropagationStyle datadog
 
 ProxyPass "/http" "${upstream_url}"
-
-<Location "/delegate">
-  DatadogDelegateSampling On
-  ProxyPass "${upstream_url}"
-</Location>
@@ -34,19 +34,30 @@ def internal_run(self) -> None:
         self.loop.close()
 
     def run(self) -> None:
-        self._thread = threading.Thread(target=self.internal_run)
+        # daemon=True so an early test failure that skips stop() can't keep
+        # the interpreter alive at pytest shutdown — the __exit__ path below
+        # is the intended cleanup, this is just a safety net.
+        self._thread = threading.Thread(target=self.internal_run, daemon=True)
         self._thread.start()
 
     def stop(self) -> None:
         self.loop.call_soon_threadsafe(self._stop.set)
+        if self._thread is not None:
+            self._thread.join(timeout=3.0)
-            self._thread.join(timeout=3.0)
+            self._thread.join(timeout=3.0)
+            if self._thread.is_alive():
+                raise RuntimeError(
+                    f"AioHTTPServer failed to stop within 3.0 seconds for "
+                    f"{self._host}:{self._port}; the server thread is still running."
+                )
-            self._thread.join(timeout=3.0)
+            self._thread.join(timeout=3.0)
+            if self._thread.is_alive():
+                raise RuntimeError(
+                    f"AioHTTPServer failed to stop within 3.0 seconds for "
+                    f"{self._host}:{self._port}; the server thread is still running."
+                )
+
+    def __enter__(self) -> "AioHTTPServer":
+        self.run()
+        return self
+
+    def __exit__(self, exc_type, exc, tb) -> None:
+        self.stop()
 
 
 def test_http_proxy(server, agent, log_dir, module_path):
     """
     Verify proxified HTTP requests propagate tracing context
     """
 
-    # TODO: support `with` syntax
     def make_temporary_http_server(host: str, port: int):
         q = Queue()
 
@@ -62,7 +73,6 @@ async def index(request):
     host = "127.0.0.1"
     port = 8081
     queue, http_server = make_temporary_http_server(host, port)
-    http_server.run()
 
     config = {
         "path": relpath("conf/proxy.conf"),
@@ -72,20 +82,20 @@ async def index(request):
     conf_path = os.path.join(log_dir, "httpd.conf")
     save_configuration(make_configuration(config, log_dir, module_path), conf_path)
 
-    assert server.check_configuration(conf_path)
-    assert server.load_configuration(conf_path)
+    with http_server:
+        assert server.check_configuration(conf_path)
+        assert server.load_configuration(conf_path)
 
-    r = requests.get(server.make_url("/http"), timeout=2)
-    assert r.status_code == 200
+        r = requests.get(server.make_url("/http"), timeout=2)
+        assert r.status_code == 200
 
-    http_server.stop()
-    server.stop(conf_path)
+        server.stop(conf_path)
 
-    assert not queue.empty()
+        assert not queue.empty()
 
-    upstream_headers = queue.get()
-    assert "x-datadog-trace-id" in upstream_headers
-    assert "x-datadog-parent-id" in upstream_headers
+        upstream_headers = queue.get()
+        assert "x-datadog-trace-id" in upstream_headers
+        assert "x-datadog-parent-id" in upstream_headers
 
     traces = agent.get_traces(timeout=5)
     assert len(traces) == 1