diff --git a/docs/understand/weblogs/end-to-end_weblog.md b/docs/understand/weblogs/end-to-end_weblog.md index c5fb2a076cc..2b72952e9cf 100644 --- a/docs/understand/weblogs/end-to-end_weblog.md +++ b/docs/understand/weblogs/end-to-end_weblog.md @@ -899,6 +899,21 @@ It supports the following body fields: This endpoint is OPTIONAL and not related to any test, but to the testing process. When called, it should flush any remaining data from the library to the respective outputs, usually the agent. See more in `docs/edit/flushing.md`. +### GET /spawn_child + +This endpoint is used for telemetry session ID header tests (Stable Service Instance Identifier RFC). It must fork or exec a child process, pass in the required arguments, wait for the child, and return a response. Used to validate `DD-Session-ID`, `DD-Root-Session-ID`, and `DD-Parent-Session-ID` headers in instrumentation telemetry across process forks. +RFC: https://docs.google.com/document/d/1ECKj9_NnwaKYtFqm3p3Rlpicx5d-OQcdj9kI2jvRqVU/edit?tab=t.0#heading=h.ojliy5oytqgg + +Required query parameters: + +- `sleep`: number of seconds the child process should sleep before exiting +- `crash`: boolean (required) — `true` to kill the child with SIGSEGV after sleep, `false` to let it exit gracefully +- `fork`: boolean (required) — `true` to use fork (parent-child), `false` to use exec. Runtimes that do not support fork (e.g. Java, C#) return 400 if `fork=true` is passed. + +Returns 200 status code on success. Response body may contain a message such as `Child process {pid} exited`. Returns 400 if `sleep`, `crash`, or `fork` is missing or invalid, or if `fork=true` is passed on a runtime that does not support forking. + +Note: `/fork_and_crash` exists only in lib-injection weblogs, not in end-to-end weblogs. + ### \[GET,POST\] /rasp/lfi This endpoint is used to test for local file inclusion / path traversal attacks, consequently it must perform an operation on a file or directory, e.g. `open` with a relative path. The chosen operation must be injected with the `GET` or `POST` parameter. diff --git a/manifests/cpp.yml b/manifests/cpp.yml index 71007d8c424..6279255f297 100644 --- a/manifests/cpp.yml +++ b/manifests/cpp.yml @@ -310,5 +310,7 @@ manifest: tests/test_library_logs.py::Test_NoExceptions::test_dotnet: irrelevant (only for .NET) tests/test_library_logs.py::Test_NoExceptions::test_java_logs: irrelevant (only for Java) tests/test_library_logs.py::Test_NoExceptions::test_java_telemetry_logs: irrelevant (only for Java) + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_forks: missing_feature + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_spawned: missing_feature tests/test_telemetry.py::Test_Telemetry::test_telemetry_message_has_datadog_container_id: "irrelevant (cgroup in weblog is 0::/, so this test can't work)" tests/test_telemetry.py::Test_Telemetry::test_telemetry_message_required_headers: missing_feature diff --git a/manifests/cpp_httpd.yml b/manifests/cpp_httpd.yml index 5c4a70c7ee1..d57c93f6763 100644 --- a/manifests/cpp_httpd.yml +++ b/manifests/cpp_httpd.yml @@ -205,6 +205,8 @@ manifest: tests/test_telemetry.py::Test_Telemetry::test_app_started_sent_exactly_once: - declaration: flaky (APMAPI-1876) component_version: ">1.0.4" + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_forks: missing_feature + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_spawned: missing_feature tests/test_telemetry.py::Test_Telemetry::test_telemetry_message_has_datadog_container_id: "irrelevant (cgroup in weblog is 0::/, so this test can't work)" tests/test_telemetry.py::Test_TelemetryEnhancedConfigReporting: '>=1.0.3' # Modified by easy win activation script tests/test_telemetry.py::Test_TelemetryEnhancedConfigReporting::test_telemetry_enhanced_config_reporting_precedence: missing_feature # Created by easy win activation script diff --git a/manifests/cpp_kong.yml b/manifests/cpp_kong.yml index 07c1d7bdfef..3d1372ca2fa 100644 --- a/manifests/cpp_kong.yml +++ b/manifests/cpp_kong.yml @@ -36,5 +36,7 @@ manifest: tests/test_span_events.py: missing_feature tests/test_standard_tags.py: irrelevant tests/test_telemetry.py: missing_feature + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_forks: missing_feature + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_spawned: missing_feature tests/test_telemetry.py::Test_Telemetry::test_telemetry_message_has_datadog_container_id: "irrelevant (cgroup in weblog is 0::/, so this test can't work)" tests/test_v1_payloads.py: missing_feature diff --git a/manifests/cpp_nginx.yml b/manifests/cpp_nginx.yml index bafae7da243..c4d5ef56e15 100644 --- a/manifests/cpp_nginx.yml +++ b/manifests/cpp_nginx.yml @@ -427,6 +427,8 @@ manifest: component_version: <1.12.0 tests/test_telemetry.py::Test_Telemetry::test_app_product_change: missing_feature (Weblog GET/enable_product and app-product-change event is not implemented yet.) tests/test_telemetry.py::Test_Telemetry::test_proxy_forwarding: missing_feature # Created by easy win activation script + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_forks: '>=1.12.0' + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_spawned: '>=1.12.0' tests/test_telemetry.py::Test_Telemetry::test_telemetry_message_has_datadog_container_id: "irrelevant (cgroup in weblog is 0::/, so this test can't work)" tests/test_telemetry.py::Test_Telemetry::test_telemetry_proxy_enrichment: missing_feature # Created by easy win activation script tests/test_telemetry.py::Test_TelemetryEnhancedConfigReporting: '>=1.12.0' # Modified by easy win activation script diff --git a/manifests/dotnet.yml b/manifests/dotnet.yml index c1cbe2a901e..459558e6a41 100644 --- a/manifests/dotnet.yml +++ b/manifests/dotnet.yml @@ -1184,6 +1184,8 @@ manifest: tests/test_telemetry.py::Test_Telemetry::test_app_started_is_first_message: # Easy win for poc, uds and version 3.36.0 - declaration: bug (APMAPI-728) component_version: '>=3.4.0' + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_forks: irrelevant (.NET does not support fork) + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_spawned: v3.41.0 tests/test_telemetry.py::Test_Telemetry::test_telemetry_message_has_datadog_container_id: "irrelevant (cgroup in weblog is 0::/, so this test can't work)" tests/test_telemetry.py::Test_TelemetryEnhancedConfigReporting: v3.25.0 tests/test_telemetry.py::Test_TelemetrySCAEnvVar: missing_feature diff --git a/manifests/golang.yml b/manifests/golang.yml index 15781f0d552..b319bcf3046 100644 --- a/manifests/golang.yml +++ b/manifests/golang.yml @@ -1511,6 +1511,11 @@ manifest: tests/test_telemetry.py::Test_Telemetry::test_api_still_v1: irrelevant tests/test_telemetry.py::Test_Telemetry::test_app_dependencies_loaded: irrelevant tests/test_telemetry.py::Test_Telemetry::test_app_product_change: missing_feature (Weblog GET/enable_product and app-product-change event is not implemented yet.) + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_forks: irrelevant (Go does not support fork; use test_session_id_headers_across_spawned instead) + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_spawned: + - weblog_declaration: + '*': irrelevant + net-http: '>=2.8.0' tests/test_telemetry.py::Test_Telemetry::test_telemetry_message_has_datadog_container_id: "irrelevant (cgroup in weblog is 0::/, so this test can't work)" tests/test_telemetry.py::Test_TelemetryEnhancedConfigReporting: missing_feature tests/test_telemetry.py::Test_TelemetrySCAEnvVar: missing_feature diff --git a/manifests/java.yml b/manifests/java.yml index 8d4afd6f3c0..55f26a462e9 100644 --- a/manifests/java.yml +++ b/manifests/java.yml @@ -4338,6 +4338,11 @@ manifest: tests/test_telemetry.py::Test_Telemetry::test_seq_id: # Created by easy win activation script - weblog_declaration: spring-boot-3-native: missing_feature (GraalVM. Tracing support only) + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_forks: irrelevant (Java does not support fork) + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_spawned: + - weblog_declaration: + "*": irrelevant + spring-boot: '>=1.61.0' tests/test_telemetry.py::Test_Telemetry::test_status_ok: # Created by easy win activation script - weblog_declaration: spring-boot-3-native: missing_feature (GraalVM. Tracing support only) diff --git a/manifests/nodejs.yml b/manifests/nodejs.yml index 9802fcaabc3..fa7c4027be1 100644 --- a/manifests/nodejs.yml +++ b/manifests/nodejs.yml @@ -87,6 +87,7 @@ refs: - &ref_5_88_0 '>=5.88.0' - &ref_5_89_0 '>=5.89.0' - &ref_5_90_0 '>=5.90.0' + - &ref_5_93_0 '>=5.93.0' - &ref_6_0_0 '>=6.0.0-pre' manifest: tests/ai_guard/test_ai_guard_sdk.py::Test_ContentParts: @@ -2489,6 +2490,14 @@ manifest: tests/test_telemetry.py::Test_Telemetry::test_proxy_forwarding: - weblog_declaration: nextjs: missing_feature + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_forks: + - weblog_declaration: + "*": missing_feature (spawn_child endpoint not implemented) + express4: *ref_5_93_0 + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_spawned: + - weblog_declaration: + "*": missing_feature (spawn_child endpoint not implemented) + express4: *ref_5_93_0 tests/test_telemetry.py::Test_Telemetry::test_telemetry_message_has_datadog_container_id: "irrelevant (cgroup in weblog is 0::/, so this test can't work)" tests/test_telemetry.py::Test_Telemetry::test_telemetry_proxy_enrichment: - weblog_declaration: diff --git a/manifests/php.yml b/manifests/php.yml index a793b55f53d..564f5a71ad9 100644 --- a/manifests/php.yml +++ b/manifests/php.yml @@ -1470,6 +1470,8 @@ manifest: tests/test_telemetry.py::Test_Telemetry::test_app_started_client_configuration: missing_feature (Telemetry is not implemented yet.) tests/test_telemetry.py::Test_Telemetry::test_app_started_sent_exactly_once: irrelevant (PHP registers 2 telemetry services) tests/test_telemetry.py::Test_Telemetry::test_seq_id: irrelevant (PHP registers 2 telemetry services) + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_forks: missing_feature + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_spawned: missing_feature tests/test_telemetry.py::Test_Telemetry::test_telemetry_message_has_datadog_container_id: "irrelevant (cgroup in weblog is 0::/, so this test can't work)" tests/test_telemetry.py::Test_TelemetryEnhancedConfigReporting: missing_feature tests/test_telemetry.py::Test_TelemetrySCAEnvVar: v0.99.1 diff --git a/manifests/python.yml b/manifests/python.yml index fd107d87f20..88d216cbf35 100644 --- a/manifests/python.yml +++ b/manifests/python.yml @@ -2358,6 +2358,14 @@ manifest: - declaration: flaky (APMRP-360) component_version: <=1.20.2 - declaration: bug (APMAPI-1858) + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_forks: + - weblog_declaration: + "*": missing_feature (not implemented yet) + flask-poc: '>=4.8.0' + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_spawned: + - weblog_declaration: + flask-poc: missing_feature (not implemented yet) + "*": missing_feature (spawn_child endpoint not implemented) tests/test_telemetry.py::Test_Telemetry::test_telemetry_message_has_datadog_container_id: "irrelevant (cgroup in weblog is 0::/, so this test can't work)" tests/test_telemetry.py::Test_TelemetryEnhancedConfigReporting: - weblog_declaration: diff --git a/manifests/ruby.yml b/manifests/ruby.yml index 144872bdde7..c3b1210d6cc 100644 --- a/manifests/ruby.yml +++ b/manifests/ruby.yml @@ -2277,6 +2277,14 @@ manifest: tests/test_telemetry.py::Test_Telemetry::test_app_started_sent_exactly_once: - declaration: missing_feature (app-started not sent) component_version: <1.22.0 + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_forks: + - weblog_declaration: + '*': missing_feature + rails72: '>=2.31.0' + tests/test_telemetry.py::Test_Telemetry::test_session_id_headers_across_spawned: + - weblog_declaration: + '*': missing_feature + rails72: '>=2.31.0' tests/test_telemetry.py::Test_Telemetry::test_telemetry_message_has_datadog_container_id: "irrelevant (cgroup in weblog is 0::/, so this test can't work)" tests/test_telemetry.py::Test_TelemetryEnhancedConfigReporting: missing_feature (Temporarily disabled, will be re-enabled once dd-trace-rb#5483 is merged) tests/test_telemetry.py::Test_TelemetrySCAEnvVar: missing_feature diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py index c4e31ae7ed5..578834e5680 100644 --- a/tests/test_telemetry.py +++ b/tests/test_telemetry.py @@ -580,6 +580,83 @@ def test_app_product_change(self): if app_product_change_event_found is False: raise Exception("app-product-change is not emitted when product change is enabled") + def setup_session_id_headers_across_forks(self): + """Trigger spawn_child endpoint to create a fork tree for session ID header validation.""" + weblog.get("/spawn_child", params={"sleep": 2, "crash": False, "fork": True}) + + def setup_session_id_headers_across_spawned(self): + """Trigger spawn_child endpoint with exec (fork=false) for session ID header validation.""" + weblog.get("/spawn_child", params={"sleep": 2, "crash": False, "fork": False}) + + def _validate_session_id_headers_across_processes(self) -> None: + """Validate DD-Session-ID, DD-Root-Session-ID, DD-Parent-Session-ID in telemetry. + + Stable Service Instance Identifier RFC: each app instance has one root runtime_id. + DD-Session-ID (instance id) must equal runtime_id. When only DD-Session-ID is sent + (no DD-Root-Session-ID), the process is treated as the root. This test confirms + at least two different runtimes are captured (parent and child from spawn_child). + """ + # Use lifecycle events only; metrics and log events from lib-datadog can contain + # runtime/session_ids that do not map to tracer-generated telemetry. + telemetry_data = list(interfaces.library.get_lifecycle_events()) + if not telemetry_data: + raise ValueError("No telemetry data to validate on") + + assert len(telemetry_data) > 1, ( + f"Expected multiple telemetry events to verify consistency, got {len(telemetry_data)}" + ) + + runtime_ids = set[str]() + parent_runtime_ids = set[str]() + root_runtime_ids = set[str]() + + for data in telemetry_data: + # Headers are not case sensitive + curr_sid = get_header(data, "request", "dd-session-id") + curr_rid = get_header(data, "request", "dd-root-session-id") + curr_pid = get_header(data, "request", "dd-parent-session-id") + curr_id = data["request"]["content"].get("runtime_id") + + # Instance id (DD-Session-ID) must be present in all lifecycle events and equal to runtime_id + assert curr_sid is not None, f"DD-Session-ID is required in telemetry data: {data}" + assert curr_sid == curr_id, f"DD-Session-ID must match runtime_id: {curr_sid} != {curr_id}" + + runtime_ids.add(curr_id) + if curr_pid is not None: + parent_runtime_ids.add(curr_pid) + if curr_rid is not None: + root_runtime_ids.add(curr_rid) + else: + # If dd-root-session-id is not set, dd-session-id is treated as root + root_runtime_ids.add(curr_id) + + # One root per app instance: all processes share the same root session ID + assert len(root_runtime_ids) == 1, f"Expected 1 root runtime_id, got {root_runtime_ids}" + + if len(runtime_ids) > 1: + # Multiple runtimes (per-process tracers): root must be consistent + # across all payloads from all processes + if parent_runtime_ids: + # DD-Parent-Session-ID is optional but must reference a known runtime if present + missing_parent_runtime_ids = parent_runtime_ids.difference(runtime_ids) + assert not missing_parent_runtime_ids, ( + f"Parent runtime_id with no telemetry data: {missing_parent_runtime_ids}" + ) + else: + # Single runtime (e.g. nginx workers sharing one tracer): session ID + # must be consistent across all events + sole_rid = next(iter(runtime_ids)) + sole_root = next(iter(root_runtime_ids)) + assert sole_rid == sole_root, f"Single runtime_id {sole_rid} does not match root {sole_root}" + + def test_session_id_headers_across_forks(self): + """Test session ID headers in telemetry (fork=true). Stable Service Instance Identifier RFC.""" + self._validate_session_id_headers_across_processes() + + def test_session_id_headers_across_spawned(self): + """Test session ID headers in telemetry (fork=false, exec). Stable Service Instance Identifier RFC.""" + self._validate_session_id_headers_across_processes() + @features.telemetry_app_started_event @scenarios.telemetry_enhanced_config_reporting diff --git a/utils/build/docker/cpp_nginx/nginx/backend.c b/utils/build/docker/cpp_nginx/nginx/backend.c index ad18e5a7c15..0c2b896d5c7 100644 --- a/utils/build/docker/cpp_nginx/nginx/backend.c +++ b/utils/build/docker/cpp_nginx/nginx/backend.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #define PORT 7778 @@ -310,6 +311,85 @@ static enum MHD_Result answer_to_connection(void *cls, struct MHD_Connection *co return ret; } + if (strcmp(url, "/spawn_child") == 0) { + const char *sleep_str = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "sleep"); + const char *crash_str = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "crash"); + const char *fork_str = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "fork"); + + if (!sleep_str || !crash_str || !fork_str) { + const char *msg = "sleep, crash, and fork parameters required"; + struct MHD_Response *response = MHD_create_response_from_buffer( + strlen(msg), (void *)msg, MHD_RESPMEM_PERSISTENT); + int ret = MHD_queue_response(connection, 400, response); + MHD_destroy_response(response); + return ret; + } + + int sleep_secs = atoi(sleep_str); + bool do_crash = strcmp(crash_str, "true") == 0; + bool use_fork = strcmp(fork_str, "true") == 0; + + if (use_fork) { + pid_t pid = fork(); + if (pid < 0) { + const char *msg = "fork failed"; + struct MHD_Response *response = MHD_create_response_from_buffer( + strlen(msg), (void *)msg, MHD_RESPMEM_PERSISTENT); + int ret = MHD_queue_response(connection, 500, response); + MHD_destroy_response(response); + return ret; + } + if (pid == 0) { + sleep(sleep_secs); + if (do_crash) { + raise(SIGSEGV); + } + _exit(0); + } + int wstatus; + waitpid(pid, &wstatus, 0); + char buf[128]; + snprintf(buf, sizeof(buf), "Child process %d exited with status %d", pid, WEXITSTATUS(wstatus)); + struct MHD_Response *response = MHD_create_response_from_buffer( + strlen(buf), buf, MHD_RESPMEM_MUST_COPY); + int ret = MHD_queue_response(connection, 200, response); + MHD_destroy_response(response); + return ret; + } + + /* exec path: fork + exec a child process */ + { + pid_t pid = fork(); + if (pid < 0) { + const char *msg = "fork failed"; + struct MHD_Response *response = MHD_create_response_from_buffer( + strlen(msg), (void *)msg, MHD_RESPMEM_PERSISTENT); + int ret = MHD_queue_response(connection, 500, response); + MHD_destroy_response(response); + return ret; + } + if (pid == 0) { + if (do_crash) { + execlp("sh", "sh", "-c", + sleep_str[0] ? "sleep $0 && kill -SEGV $$" : "kill -SEGV $$", + sleep_str, (char *)NULL); + } else { + execlp("sleep", "sleep", sleep_str, (char *)NULL); + } + _exit(1); + } + int wstatus; + waitpid(pid, &wstatus, 0); + char buf[128]; + snprintf(buf, sizeof(buf), "Child process %d exited with status %d", pid, WEXITSTATUS(wstatus)); + struct MHD_Response *response = MHD_create_response_from_buffer( + strlen(buf), buf, MHD_RESPMEM_MUST_COPY); + int ret = MHD_queue_response(connection, 200, response); + MHD_destroy_response(response); + return ret; + } + } + if (strcmp(url, "/content") != 0 || !status_str || !value) return MHD_NO; // Only respond to the correct URL and if all parameters are present diff --git a/utils/build/docker/cpp_nginx/nginx/nginx-waf.conf b/utils/build/docker/cpp_nginx/nginx/nginx-waf.conf index 3de9e6b716b..0cbea98a35a 100644 --- a/utils/build/docker/cpp_nginx/nginx/nginx-waf.conf +++ b/utils/build/docker/cpp_nginx/nginx/nginx-waf.conf @@ -1,6 +1,7 @@ error_log /var/log/nginx/error.log info; load_module modules/ngx_http_datadog_module.so; +worker_processes 2; thread_pool waf_thread_pool threads=2 max_queue=1000; events { diff --git a/utils/build/docker/cpp_nginx/nginx/nginx.conf b/utils/build/docker/cpp_nginx/nginx/nginx.conf index bedb6d6d4b3..3f5d6c3fe2e 100644 --- a/utils/build/docker/cpp_nginx/nginx/nginx.conf +++ b/utils/build/docker/cpp_nginx/nginx/nginx.conf @@ -1,5 +1,7 @@ load_module modules/ngx_http_datadog_module.so; +worker_processes 2; + events { worker_connections 1024; } @@ -44,6 +46,10 @@ http { proxy_pass http://127.0.0.1:7778; } + location /spawn_child { + proxy_pass http://127.0.0.1:7778; + } + location / { root /builds; try_files /hello.html =404; diff --git a/utils/build/docker/dotnet/weblog/Endpoints/SpawnChildEndpoint.cs b/utils/build/docker/dotnet/weblog/Endpoints/SpawnChildEndpoint.cs new file mode 100644 index 00000000000..afdd7d38ec8 --- /dev/null +++ b/utils/build/docker/dotnet/weblog/Endpoints/SpawnChildEndpoint.cs @@ -0,0 +1,83 @@ +using System; +using System.Diagnostics; +using System.Linq; +using Microsoft.AspNetCore.Builder; +using Microsoft.AspNetCore.Http; + +namespace weblog +{ + /// + /// Spawn child for telemetry session ID header tests. Inspired by lib-injection fork_and_crash: + /// fork=true spawns same process with env vars; fork=false uses exec (shell). + /// + public class SpawnChildEndpoint : ISystemTestEndpoint + { + public void Register(Microsoft.AspNetCore.Routing.IEndpointRouteBuilder routeBuilder) + { + routeBuilder.MapGet("/spawn_child", async context => + { + var sleepStr = context.Request.Query["sleep"].ToString(); + var crashStr = (context.Request.Query["crash"].ToString() ?? "").ToLowerInvariant(); + var forkStr = (context.Request.Query["fork"].ToString() ?? "").ToLowerInvariant(); + + if (string.IsNullOrEmpty(sleepStr) || !int.TryParse(sleepStr, out int sleep) || sleep < 0) + { + context.Response.StatusCode = 400; + await context.Response.WriteAsync("sleep required"); + return; + } + if (crashStr != "true" && crashStr != "false") + { + context.Response.StatusCode = 400; + await context.Response.WriteAsync("crash required (boolean)"); + return; + } + if (forkStr != "true" && forkStr != "false") + { + context.Response.StatusCode = 400; + await context.Response.WriteAsync("fork required (boolean)"); + return; + } + + if (forkStr == "true") + { + context.Response.StatusCode = 400; + await context.Response.WriteAsync("fork not supported in .NET"); + return; + } + + var crash = crashStr == "true"; + + // Re-exec the weblog binary as a child process. The CLR profiler + // auto-attaches dd-trace-dotnet, so the child emits its own telemetry. + var cmdArgs = Environment.GetCommandLineArgs(); + var args = cmdArgs.Length > 1 ? string.Join(" ", cmdArgs.Skip(1)) : "app.dll"; + var startInfo = new ProcessStartInfo + { + FileName = Environment.ProcessPath ?? "/usr/share/dotnet/dotnet", + Arguments = args, + WorkingDirectory = Environment.CurrentDirectory, + }; + startInfo.Environment["SPAWN_CHILD_FORKED"] = "1"; + startInfo.Environment["SPAWN_CHILD_SLEEP"] = sleep.ToString(); + startInfo.Environment["SPAWN_CHILD_CRASH"] = crash ? "1" : "0"; + + var process = Process.Start(startInfo); + + if (process == null) + { + context.Response.StatusCode = 500; + await context.Response.WriteAsync("Failed to start child process"); + return; + } + + using (process) + { + await process.WaitForExitAsync(); + context.Response.ContentType = "text/plain"; + await context.Response.WriteAsync($"Process {process.Id} has exited with code {process.ExitCode}"); + } + }); + } + } +} diff --git a/utils/build/docker/dotnet/weblog/Program.cs b/utils/build/docker/dotnet/weblog/Program.cs index d6b1cba06d0..684ffb681ab 100644 --- a/utils/build/docker/dotnet/weblog/Program.cs +++ b/utils/build/docker/dotnet/weblog/Program.cs @@ -1,4 +1,5 @@ using System; +using System.Threading; using Microsoft.AspNetCore.Hosting; using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Http; @@ -11,6 +12,24 @@ public class Program { public static void Main(string[] args) { + // Spawn-child forked mode (inspired by lib-injection fork_and_crash): sleep then optionally crash + if (Environment.GetEnvironmentVariable("SPAWN_CHILD_FORKED") != null) + { + var sleepSec = int.TryParse(Environment.GetEnvironmentVariable("SPAWN_CHILD_SLEEP"), out var s) ? s : 0; + var doCrash = Environment.GetEnvironmentVariable("SPAWN_CHILD_CRASH") == "1"; + if (sleepSec > 0) + { + Thread.Sleep(sleepSec * 1000); + } + if (doCrash) + { + var t = new Thread(() => throw new BadImageFormatException("spawn_child crash")); + t.Start(); + t.Join(); + } + return; + } + // Enable Datadog log injection only if CONFIG_CHAINING_TEST is set to "true" if (Environment.GetEnvironmentVariable("CONFIG_CHAINING_TEST") == "true") { diff --git a/utils/build/docker/golang/app/_shared/common/spawn_child.go b/utils/build/docker/golang/app/_shared/common/spawn_child.go new file mode 100644 index 00000000000..6c167073733 --- /dev/null +++ b/utils/build/docker/golang/app/_shared/common/spawn_child.go @@ -0,0 +1,80 @@ +package common + +import ( + "fmt" + "net/http" + "os" + "os/exec" + "strconv" + "strings" + "syscall" + "time" + + "github.com/DataDog/dd-trace-go/v2/ddtrace/tracer" +) + +// RunAsChildIfRequested checks if the process was re-exec'd in child mode. +// If so, it initializes the tracer, sleeps, optionally crashes, then exits. +// Call this at the top of main() before any other initialization. +func RunAsChildIfRequested() { + sleepStr := os.Getenv("DD_SYSTEM_TEST_CHILD_SLEEP") + if sleepStr == "" { + return + } + sleep, _ := strconv.Atoi(sleepStr) + crash := os.Getenv("DD_SYSTEM_TEST_CHILD_CRASH") == "true" + + tracer.Start() + time.Sleep(time.Duration(sleep) * time.Second) + tracer.Stop() + + if crash { + syscall.Kill(syscall.Getpid(), syscall.SIGSEGV) + } + os.Exit(0) +} + +// SpawnChild handles GET /spawn_child for telemetry session ID header tests. +// Go does not support fork; returns 400 when fork=true. Otherwise re-execs +// the current binary in child mode, which initializes dd-trace-go and emits +// its own telemetry. The SDK propagates _DD_ROOT_GO_SESSION_ID via the process +// environment so that child processes inherit the root session ID automatically. +func SpawnChild(w http.ResponseWriter, r *http.Request) { + sleepStr := r.URL.Query().Get("sleep") + crashStr := strings.ToLower(r.URL.Query().Get("crash")) + forkStr := strings.ToLower(r.URL.Query().Get("fork")) + + sleep, err := strconv.Atoi(sleepStr) + if err != nil || sleep < 0 { + http.Error(w, "sleep required", http.StatusBadRequest) + return + } + if crashStr != "true" && crashStr != "false" { + http.Error(w, "crash required (boolean)", http.StatusBadRequest) + return + } + if forkStr != "true" && forkStr != "false" { + http.Error(w, "fork required (boolean)", http.StatusBadRequest) + return + } + if forkStr == "true" { + http.Error(w, "fork not supported", http.StatusBadRequest) + return + } + + cmd := exec.Command(os.Args[0]) + cmd.Env = append(os.Environ(), + "DD_SYSTEM_TEST_CHILD_SLEEP="+sleepStr, + "DD_SYSTEM_TEST_CHILD_CRASH="+crashStr, + ) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + _ = cmd.Run() + status := 0 + if cmd.ProcessState != nil { + status = cmd.ProcessState.ExitCode() + } + w.Header().Set("Content-Type", "text/plain") + w.Write([]byte(fmt.Sprintf("Child process exited with status %d", status))) +} diff --git a/utils/build/docker/golang/app/net-http/main.go b/utils/build/docker/golang/app/net-http/main.go index 66674e90132..f1456570712 100644 --- a/utils/build/docker/golang/app/net-http/main.go +++ b/utils/build/docker/golang/app/net-http/main.go @@ -42,6 +42,8 @@ import ( ) func main() { + common.RunAsChildIfRequested() + logrus.SetFormatter(&logrus.JSONFormatter{}) logrus.SetOutput(os.Stdout) logrus.SetLevel(logrus.DebugLevel) @@ -187,6 +189,8 @@ func main() { w.Write([]byte("OK")) }) + mux.HandleFunc("/spawn_child", common.SpawnChild) + mux.HandleFunc("/make_distant_call", func(w http.ResponseWriter, r *http.Request) { url := r.URL.Query().Get("url") if url == "" { diff --git a/utils/build/docker/java/spring-boot/src/main/java/com/datadoghq/system_tests/springboot/App.java b/utils/build/docker/java/spring-boot/src/main/java/com/datadoghq/system_tests/springboot/App.java index 1e371c34983..db98aca09de 100644 --- a/utils/build/docker/java/spring-boot/src/main/java/com/datadoghq/system_tests/springboot/App.java +++ b/utils/build/docker/java/spring-boot/src/main/java/com/datadoghq/system_tests/springboot/App.java @@ -321,6 +321,39 @@ ResponseEntity status(@RequestParam Integer code) { return new ResponseEntity<>(HttpStatus.valueOf(code)); } + @GetMapping("/spawn_child") + ResponseEntity spawnChild( + @RequestParam(required = false) Integer sleep, + @RequestParam(required = false) String crash, + @RequestParam(required = false) String fork) { + if (sleep == null || sleep < 0) { + return ResponseEntity.badRequest().body("sleep required"); + } + if (crash == null || (!crash.equalsIgnoreCase("true") && !crash.equalsIgnoreCase("false"))) { + return ResponseEntity.badRequest().body("crash required (boolean)"); + } + if (fork == null || (!fork.equalsIgnoreCase("true") && !fork.equalsIgnoreCase("false"))) { + return ResponseEntity.badRequest().body("fork required (boolean)"); + } + if (fork.equalsIgnoreCase("true")) { + return ResponseEntity.badRequest().body("fork not supported"); + } + try { + ProcessBuilder pb = new ProcessBuilder( + "java", "-Xmx128m", + "-javaagent:/app/dd-java-agent.jar", + "-jar", "/app/app.jar"); + pb.environment().put("DD_SYSTEM_TEST_CHILD_SLEEP", String.valueOf(sleep)); + pb.environment().put("DD_SYSTEM_TEST_CHILD_CRASH", crash.toLowerCase()); + pb.inheritIO(); + Process p = pb.start(); + int exitCode = p.waitFor(); + return ResponseEntity.ok("Process " + p.pid() + " has exited with code " + exitCode); + } catch (Exception e) { + return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body("Failed: " + e.getMessage()); + } + } + @RequestMapping("/stats-unique") ResponseEntity statsUnique(@RequestParam(defaultValue = "200") Integer code) { return new ResponseEntity<>(HttpStatus.valueOf(code)); @@ -1440,7 +1473,16 @@ private void setRootSpanTag(final String key, final String value) { } } - public static void main(String[] args) { + public static void main(String[] args) throws Exception { + String childSleep = System.getenv("DD_SYSTEM_TEST_CHILD_SLEEP"); + if (childSleep != null) { + int sleep = Integer.parseInt(childSleep); + Thread.sleep(sleep * 1000L); + if ("true".equals(System.getenv("DD_SYSTEM_TEST_CHILD_CRASH"))) { + Runtime.getRuntime().halt(139); + } + return; + } SpringApplication.run(App.class, args); } diff --git a/utils/build/docker/nodejs/express/app.js b/utils/build/docker/nodejs/express/app.js index c459280f101..f4a9e7a0516 100644 --- a/utils/build/docker/nodejs/express/app.js +++ b/utils/build/docker/nodejs/express/app.js @@ -72,6 +72,43 @@ app.get('/', (req, res) => { res.send('Hello world!\n') }) +function subprocessAndExitHandler (req, res) { + const path = require('path') + const { spawn } = require('child_process') + const sleep = req.query.sleep != null ? String(req.query.sleep) : null + const crash = req.query.crash + if (sleep == null || sleep === '') { + res.status(400).send('sleep required') + return + } + const crashStr = String(crash || '').toLowerCase() + const forkStr = String(req.query.fork || '').toLowerCase() + if (crashStr !== 'true' && crashStr !== 'false') { + res.status(400).send('crash required (boolean)') + return + } + if (forkStr !== 'true' && forkStr !== 'false') { + res.status(400).send('fork required (boolean)') + return + } + const useFork = forkStr === 'true' + + if (useFork) { + const child = require('child_process').fork(path.join(__dirname, 'fork_child.js'), [sleep, crashStr]) + child.on('close', (code, signal) => { + res.send(`Child process ${child.pid} exited with code ${code}, signal ${signal}`) + }) + } else { + const child = spawn(process.execPath, [path.join(__dirname, 'fork_child.js'), sleep, crashStr], { + stdio: 'inherit' + }) + child.on('close', (code, signal) => { + res.send(`Child process ${child.pid} exited with code ${code}, signal ${signal}`) + }) + } +} +app.get('/spawn_child', subprocessAndExitHandler) + app.get('/healthcheck', (req, res) => { res.json({ status: 'ok', diff --git a/utils/build/docker/nodejs/express/fork_child.js b/utils/build/docker/nodejs/express/fork_child.js new file mode 100644 index 00000000000..2543235155c --- /dev/null +++ b/utils/build/docker/nodejs/express/fork_child.js @@ -0,0 +1,12 @@ +#!/usr/bin/env node +// Child process for spawn_child endpoint. Args: sleep (seconds), crash (true|false). +require('dd-trace').init() +const sleepSec = parseInt(process.argv[2] || '2', 10) * 1000 +const crash = process.argv[3] === 'true' +setTimeout(() => { + if (crash) { + process.kill(process.pid, 'SIGSEGV') + } else { + process.exit(0) + } +}, sleepSec) diff --git a/utils/build/docker/nodejs/install_ddtrace.sh b/utils/build/docker/nodejs/install_ddtrace.sh index 057f0a0b373..dc5ce480717 100755 --- a/utils/build/docker/nodejs/install_ddtrace.sh +++ b/utils/build/docker/nodejs/install_ddtrace.sh @@ -21,12 +21,12 @@ if [ -e /binaries/nodejs-load-from-local ]; then echo "using local version that will be mounted at runtime" else if [ -e /binaries/nodejs-load-from-npm ]; then - target=$(&1', $output, $returnVar); + +header('Content-Type: text/plain'); +echo 'Child process exited with status ' . $returnVar; diff --git a/utils/build/docker/php/php-fpm/php-fpm.conf b/utils/build/docker/php/php-fpm/php-fpm.conf index 8f928cf4d06..974b1c0911d 100644 --- a/utils/build/docker/php/php-fpm/php-fpm.conf +++ b/utils/build/docker/php/php-fpm/php-fpm.conf @@ -23,6 +23,7 @@ RewriteRule "^/identify-propagate$" "/identify-propagate/" RewriteRule "^/headers$" "/headers/" RewriteRule "^/status$" "/status/" + RewriteRule "^/spawn_child$" "/spawn_child/" RewriteRule "^/read_file$" "/read_file/" RewriteRule "^/make_distant_call$" "/make_distant_call/" RewriteRule "^/log/library$" "/log-library/" diff --git a/utils/build/docker/python/flask/app.py b/utils/build/docker/python/flask/app.py index 7744830b9d0..b94dc6dfe71 100644 --- a/utils/build/docker/python/flask/app.py +++ b/utils/build/docker/python/flask/app.py @@ -14,6 +14,8 @@ import json import logging import os +import signal +import time import random import shlex import subprocess @@ -322,6 +324,43 @@ def healthcheck(): } +@app.route("/spawn_child") +def spawn_child(): + """Spawn child via fork or exec. Params: sleep, crash, fork. Used for telemetry session ID header tests.""" + sleep_arg = request.args.get("sleep", type=int) + crash_arg = request.args.get("crash", "").lower() + fork_arg = (request.args.get("fork") or "").lower() + if sleep_arg is None: + return "sleep required", 400 + if crash_arg not in ("true", "false"): + return "crash required (boolean)", 400 + if fork_arg not in ("true", "false"): + return "fork required (boolean)", 400 + crash = crash_arg == "true" + use_fork = fork_arg == "true" + + if use_fork: + pid = os.fork() + if pid > 0: + _, status = os.waitpid(pid, 0) + return f"Child process {pid} exited with status {status}" + time.sleep(sleep_arg) + if crash: + os.kill(os.getpid(), signal.SIGSEGV) + sys.exit(0) + + # exec path: spawn subprocess + proc = subprocess.run( + [ + sys.executable, + "-c", + f"import time, sys, os, signal; time.sleep({sleep_arg}); os.kill(os.getpid(), signal.SIGSEGV) if {crash} else sys.exit(0)", + ], + timeout=sleep_arg + 5, + ) + return f"Child process exited with status {proc.returncode}" + + @app.route("/sample_rate_route/") def sample_rate(i): return "OK" diff --git a/utils/build/docker/ruby/rails72/app/controllers/system_test_controller.rb b/utils/build/docker/ruby/rails72/app/controllers/system_test_controller.rb index 3ff404b3557..ad85152715c 100644 --- a/utils/build/docker/ruby/rails72/app/controllers/system_test_controller.rb +++ b/utils/build/docker/ruby/rails72/app/controllers/system_test_controller.rb @@ -9,6 +9,39 @@ def root render plain: "Hello world!\n" end + def spawn_child + sleep_sec = params[:sleep]&.to_i + crash = params[:crash].to_s.downcase + fork_param = (params[:fork] || '').to_s.downcase + if sleep_sec.nil? || sleep_sec.negative? + render plain: 'sleep required', status: 400 + return + end + unless %w[true false].include?(crash) + render plain: 'crash required (boolean)', status: 400 + return + end + unless %w[true false].include?(fork_param) + render plain: 'fork required (boolean)', status: 400 + return + end + do_crash = crash == 'true' + use_fork = fork_param == 'true' + + if use_fork + pid = Process.fork do + sleep(sleep_sec) + do_crash ? Process.kill('SEGV', Process.pid) : exit(0) + end + Process.wait(pid) + render plain: "Child process #{pid} exited" + else + pid = Process.spawn('ruby', '-e', "sleep(#{sleep_sec}); #{do_crash ? "Process.kill('SEGV', Process.pid)" : 'exit(0)'}") + Process.wait(pid) + render plain: "Child process #{pid} exited" + end + end + def waf render plain: 'Hello, world!' end diff --git a/utils/build/docker/ruby/rails72/config/routes.rb b/utils/build/docker/ruby/rails72/config/routes.rb index 3301f2cb357..31c45233a38 100644 --- a/utils/build/docker/ruby/rails72/config/routes.rb +++ b/utils/build/docker/ruby/rails72/config/routes.rb @@ -16,6 +16,7 @@ def call(_env) get '/healthcheck' => 'internal#healthcheck' get '/flush' => 'internal#flush' + get '/spawn_child' => 'system_test#spawn_child' get '/waf' => 'system_test#waf' post '/waf' => 'system_test#waf' diff --git a/utils/interfaces/_library/core.py b/utils/interfaces/_library/core.py index 3a15879d376..94f89ef8d34 100644 --- a/utils/interfaces/_library/core.py +++ b/utils/interfaces/_library/core.py @@ -22,6 +22,15 @@ from utils._weblog import HttpResponse, GrpcResponse from utils.interfaces._misc_validators import HeadersPresenceValidator +LIFECYCLE_EVENTS = [ + "app-started", + "app-closing", + "app-integrations-change", + "app-dependencies-loaded", + "app-client-configuration-change", + "app-product-change", +] + class LibraryInterfaceValidator(ProxyBasedInterfaceValidator): """Validate library/agent interface""" @@ -217,6 +226,13 @@ def get_telemetry_data(self, *, flatten_message_batches: bool = True): else: yield data + def get_lifecycle_events(self): + for data in self.get_telemetry_data(flatten_message_batches=True): + content = data["request"]["content"] + if content.get("request_type") not in LIFECYCLE_EVENTS: + continue + yield data + def get_telemetry_configurations(self) -> list[dict]: """Extract and sort configuration entries from telemetry events.""" configurations = []