From 7e1a83391a6a55c6db7d054fb0f96b8d2497459d Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Wed, 19 Jun 2024 16:24:02 +0200 Subject: [PATCH] fix: regression in sampling delegation handling This fixes a regression introduced by Sampling Delegation (#59). When the sampling delegation header is present during the trace context extraction, the sampling priority is discarded even if the feature is disabled. This change of behaviour can potentially result in an increase of ingested spans. Additionnally, the tracer should behave as before the introduction of this feature when it is not enabled. Changes: - Sampling priority is not longer discard when `x-datadog-delegate-trace-sampling` is present. - Update sampling delegation tests. --- src/datadog/extraction_util.cpp | 40 +++++-------- src/datadog/tracer.cpp | 81 +++++++++++++------------ test/test_tracer.cpp | 101 ++++++++++++++++++++------------ 3 files changed, 125 insertions(+), 97 deletions(-) diff --git a/src/datadog/extraction_util.cpp b/src/datadog/extraction_util.cpp index 4eedcd7b..f8aeb521 100644 --- a/src/datadog/extraction_util.cpp +++ b/src/datadog/extraction_util.cpp @@ -17,9 +17,6 @@ namespace datadog { namespace tracing { namespace { -constexpr StringView sampling_delegation_request_header = - "x-datadog-delegate-trace-sampling"; - // Decode the specified `trace_tags` and integrate them into the specified // `result`. If an error occurs, add a `tags::internal::propagation_error` tag // to the specified `span_tags` and log a diagnostic using the specified @@ -129,29 +126,24 @@ Expected extract_datadog( } result.parent_id = *parent_id; + if (auto found = headers.lookup("x-datadog-sampling-priority")) { + auto sampling_priority = parse_int(*found, 10); + if (auto* error = sampling_priority.if_error()) { + std::string prefix; + prefix += + "Could not extract Datadog-style sampling priority from " + "x-datadog-sampling-priority: "; + append(prefix, *found); + prefix += ' '; + return error->with_prefix(prefix); + } + + result.sampling_priority = *sampling_priority; + } + if (auto sampling_delegation_header = - headers.lookup(sampling_delegation_request_header)) { + headers.lookup("x-datadog-delegate-trace-sampling")) { result.delegate_sampling_decision = true; - // If the trace sampling decision is being delegated to us, then we don't - // interpret the sampling priority (if any) included in the request. - } else { - result.delegate_sampling_decision = false; - - const StringView sampling_priority_header = "x-datadog-sampling-priority"; - if (auto found = headers.lookup(sampling_priority_header)) { - auto sampling_priority = parse_int(*found, 10); - if (auto* error = sampling_priority.if_error()) { - std::string prefix; - prefix += "Could not extract Datadog-style sampling priority from "; - append(prefix, sampling_priority_header); - prefix += ": "; - append(prefix, *found); - prefix += ' '; - return error->with_prefix(prefix); - } - - result.sampling_priority = *sampling_priority; - } } auto origin = headers.lookup("x-datadog-origin"); diff --git a/src/datadog/tracer.cpp b/src/datadog/tracer.cpp index 76bb19a5..ebc37bf6 100644 --- a/src/datadog/tracer.cpp +++ b/src/datadog/tracer.cpp @@ -176,10 +176,7 @@ Expected Tracer::extract_span(const DictReader& reader, extracted_contexts.back().headers_examined = audited_reader.entries_found; } - auto [trace_id, parent_id, origin, trace_tags, delegate_sampling_decision, - sampling_priority, datadog_w3c_parent_id, additional_w3c_tracestate, - additional_datadog_w3c_tracestate, style, headers_examined] = - merge(extracted_contexts); + auto merged_context = merge(extracted_contexts); // Some information might be missing. // Here are the combinations considered: @@ -196,56 +193,60 @@ Expected Tracer::extract_span(const DictReader& reader, // - trace ID and parent ID means we're extracting a child span // - if trace ID is zero, then that's an error. - if (!trace_id && !parent_id) { + if (!merged_context.trace_id && !merged_context.parent_id) { return Error{Error::NO_SPAN_TO_EXTRACT, "There's neither a trace ID nor a parent span ID to extract."} - .with_prefix(extraction_error_prefix(style, headers_examined)); + .with_prefix(extraction_error_prefix(merged_context.style, + merged_context.headers_examined)); } - if (!trace_id) { + if (!merged_context.trace_id) { std::string message; message += "There's no trace ID to extract, but there is a parent span ID: "; - message += std::to_string(*parent_id); + message += std::to_string(*merged_context.parent_id); return Error{Error::MISSING_TRACE_ID, std::move(message)}.with_prefix( - extraction_error_prefix(style, headers_examined)); + extraction_error_prefix(merged_context.style, + merged_context.headers_examined)); } - if (!parent_id && !origin) { + if (!merged_context.parent_id && !merged_context.origin) { std::string message; message += "There's no parent span ID to extract, but there is a trace ID: "; message += "[hexadecimal = "; - message += trace_id->hex_padded(); - if (trace_id->high == 0) { + message += merged_context.trace_id->hex_padded(); + if (merged_context.trace_id->high == 0) { message += ", decimal = "; - message += std::to_string(trace_id->low); + message += std::to_string(merged_context.trace_id->low); } message += ']'; return Error{Error::MISSING_PARENT_SPAN_ID, std::move(message)}.with_prefix( - extraction_error_prefix(style, headers_examined)); + extraction_error_prefix(merged_context.style, + merged_context.headers_examined)); } - if (!parent_id) { + if (!merged_context.parent_id) { // We have a trace ID, but not parent ID. We're meant to be the root, and // whoever called us already created a trace ID for us (to correlate with // whatever they're doing). - parent_id = 0; + merged_context.parent_id = 0; } - assert(parent_id); - assert(trace_id); + assert(merged_context.parent_id); + assert(merged_context.trace_id); - if (*trace_id == 0) { + if (*merged_context.trace_id == 0) { return Error{Error::ZERO_TRACE_ID, "extracted zero value for trace ID, which is invalid"} - .with_prefix(extraction_error_prefix(style, headers_examined)); + .with_prefix(extraction_error_prefix(merged_context.style, + merged_context.headers_examined)); } // We're done extracting fields. Now create the span. // This is similar to what we do in `create_span`. span_data->apply_config(*config_manager_->span_defaults(), config, clock_); span_data->span_id = generator_->span_id(); - span_data->trace_id = *trace_id; - span_data->parent_id = *parent_id; + span_data->trace_id = *merged_context.trace_id; + span_data->parent_id = *merged_context.parent_id; if (span_data->trace_id.high) { // The trace ID has some bits set in the higher 64 bits. Set the @@ -255,12 +256,14 @@ Expected Tracer::extract_span(const DictReader& reader, // First, though, if the `trace_id_high` tag is already set and has a // bogus value or a value inconsistent with the trace ID, tag an error. const auto hex_high = hex_padded(span_data->trace_id.high); - const auto extant = std::find_if( - trace_tags.begin(), trace_tags.end(), [&](const auto& pair) { - return pair.first == tags::internal::trace_id_high; - }); - if (extant == trace_tags.end()) { - trace_tags.emplace_back(tags::internal::trace_id_high, hex_high); + const auto extant = + std::find_if(merged_context.trace_tags.begin(), + merged_context.trace_tags.end(), [&](const auto& pair) { + return pair.first == tags::internal::trace_id_high; + }); + if (extant == merged_context.trace_tags.end()) { + merged_context.trace_tags.emplace_back(tags::internal::trace_id_high, + hex_high); } else { // There is already a `trace_id_high` tag. `hex_high` is its proper // value. Check if the extant value is malformed or different from @@ -279,14 +282,18 @@ Expected Tracer::extract_span(const DictReader& reader, } } - if (datadog_w3c_parent_id) { - span_data->tags[tags::internal::w3c_parent_id] = *datadog_w3c_parent_id; + if (merged_context.datadog_w3c_parent_id) { + span_data->tags[tags::internal::w3c_parent_id] = + *merged_context.datadog_w3c_parent_id; } + const bool delegate_sampling_decision = + sampling_delegation_enabled_ && merged_context.delegate_sampling_decision; + Optional sampling_decision; - if (sampling_priority) { + if (!delegate_sampling_decision && merged_context.sampling_priority) { SamplingDecision decision; - decision.priority = *sampling_priority; + decision.priority = *merged_context.sampling_priority; // `decision.mechanism` is null. We might be able to infer it once we // extract `trace_tags`, but we would have no use for it, so we won't. decision.origin = SamplingDecision::Origin::EXTRACTED; @@ -300,10 +307,12 @@ Expected Tracer::extract_span(const DictReader& reader, logger_, collector_, tracer_telemetry_, config_manager_->trace_sampler(), span_sampler_, config_manager_->span_defaults(), config_manager_, runtime_id_, sampling_delegation_enabled_, delegate_sampling_decision, - injection_styles_, hostname_, std::move(origin), tags_header_max_size_, - std::move(trace_tags), std::move(sampling_decision), - std::move(additional_w3c_tracestate), - std::move(additional_datadog_w3c_tracestate), std::move(span_data)); + injection_styles_, hostname_, std::move(merged_context.origin), + tags_header_max_size_, std::move(merged_context.trace_tags), + std::move(sampling_decision), + std::move(merged_context.additional_w3c_tracestate), + std::move(merged_context.additional_datadog_w3c_tracestate), + std::move(span_data)); Span span{span_data_ptr, segment, [generator = generator_]() { return generator->span_id(); }, clock_}; diff --git a/test/test_tracer.cpp b/test/test_tracer.cpp index e108d100..e8285cf5 100644 --- a/test/test_tracer.cpp +++ b/test/test_tracer.cpp @@ -445,16 +445,6 @@ TEST_CASE("span extraction") { TraceID(123), 456, 2}, - {__LINE__, - "datadog style with delegate header", - {PropagationStyle::DATADOG}, - {{"x-datadog-trace-id", "123"}, - {"x-datadog-parent-id", "456"}, - {"x-datadog-delegate-trace-sampling", "delegate"}, - {"x-datadog-sampling-priority", "3"}}, - TraceID(123), - 456, - nullopt}, {__LINE__, "datadog style without sampling priority", {PropagationStyle::DATADOG}, @@ -1204,7 +1194,12 @@ TEST_CASE("span extraction") { MockDictReader reader{headers}; auto span = tracer.extract_span(reader); REQUIRE(span); - REQUIRE(!span->trace_segment().sampling_decision()); + + if (*config.delegate_trace_sampling) { + REQUIRE(!span->trace_segment().sampling_decision()); + } else { + REQUIRE(span->trace_segment().sampling_decision()); + } MockDictWriter writer; span->inject(writer); @@ -1409,6 +1404,49 @@ TEST_CASE( test_case.expected_error_prefix + test_case.tid_tag_value); } +TEST_CASE("sampling delegation extraction") { + const bool enable_sampling_delegation = GENERATE(true, false); + + CAPTURE(enable_sampling_delegation); + + const auto logger = std::make_shared(); + const auto collector = std::make_shared(); + + TracerConfig config; + config.service = "test-sampling-delegation"; + config.logger = logger; + config.collector = collector; + config.extraction_styles = {PropagationStyle::DATADOG}; + config.trace_sampler.sample_rate = 1.; + config.delegate_trace_sampling = enable_sampling_delegation; + + auto validated_config = finalize_config(config); + REQUIRE(validated_config); + + Tracer tracer(*validated_config); + + const std::unordered_map headers{ + {"x-datadog-trace-id", "17491188783264004180"}, + {"x-datadog-parent-id", "3390700340160032468"}, + {"x-datadog-sampling-priority", "-1"}, + {"x-datadog-tags", "_dd.p.tid=66718e8c00000000"}, + {"x-datadog-delegate-trace-sampling", "delegate"}, + }; + + MockDictReader propagation_reader{headers}; + const auto maybe_span = tracer.extract_span(propagation_reader); + REQUIRE(maybe_span); + + auto sampling_decision = maybe_span->trace_segment().sampling_decision(); + if (enable_sampling_delegation) { + CHECK(!sampling_decision.has_value()); + } else { + REQUIRE(sampling_decision.has_value()); + CHECK(sampling_decision->origin == SamplingDecision::Origin::EXTRACTED); + CHECK(sampling_decision->priority == int(SamplingPriority::USER_DROP)); + } +} + TEST_CASE("_dd.is_sampling_decider") { // This test involves three tracers: "service1", "service2", and "service3". // Each calls the next, and each produces two spans: "local_root" and "child". @@ -1451,6 +1489,7 @@ TEST_CASE("_dd.is_sampling_decider") { config2.collector = collector; config2.logger = logger; config2.service = "service2"; + config2.trace_sampler.sample_rate = 1; // keep all traces config2.delegate_trace_sampling = true; TracerConfig config3; @@ -1580,7 +1619,9 @@ TEST_CASE("_dd.is_sampling_decider") { } REQUIRE(span.service != "service1"); if (span.service == "service2" && span.name == "local_root") { - REQUIRE(span.tags.count(tags::internal::sampling_decider) == 0); + const bool made_the_decision = service3_delegation_enabled ? 0 : 1; + REQUIRE(span.tags.count(tags::internal::sampling_decider) == + made_the_decision); REQUIRE(span.numeric_tags.count(tags::internal::sampling_priority) == 1); REQUIRE(span.numeric_tags.at(tags::internal::sampling_priority) == @@ -1595,8 +1636,9 @@ TEST_CASE("_dd.is_sampling_decider") { } REQUIRE(span.service != "service2"); if (span.service == "service3" && span.name == "local_root") { - REQUIRE(span.tags.count(tags::internal::sampling_decider) == 1); - REQUIRE(span.tags.at(tags::internal::sampling_decider) == "1"); + const bool made_the_decision = service3_delegation_enabled ? 1 : 0; + REQUIRE(span.tags.count(tags::internal::sampling_decider) == + made_the_decision); REQUIRE(span.numeric_tags.count(tags::internal::sampling_priority) == 1); REQUIRE(span.numeric_tags.at(tags::internal::sampling_priority) == @@ -1649,27 +1691,13 @@ TEST_CASE("sampling delegation is not an override") { // // The idea is that service2 does not perform delegation when service1 already // made a decision and did not request delegation. - auto service1_sampling_priority = - GENERATE(values>({nullopt, -1, 0, 1, 2})); auto service1_delegate = GENERATE(true, false); auto service3_sample_rate = GENERATE(0.0, 1.0); - int expected_sampling_priority; - if (service1_sampling_priority.has_value() && !service1_delegate) { - // Service1 made a decision and didn't ask for delegation, so that's the - // decision. - expected_sampling_priority = *service1_sampling_priority; - } else { - // Service1 either didn't make a decision or did request delegation, so the - // decision will be delegated through service2 to service3, whose decision - // depends on its configured sample rate. - expected_sampling_priority = service3_sample_rate == 0.0 ? -1 : 2; - } + const int service3_sampling_priority = service3_sample_rate == 0.0 ? -1 : 2; - CAPTURE(service1_sampling_priority); CAPTURE(service1_delegate); CAPTURE(service3_sample_rate); - CAPTURE(expected_sampling_priority); const auto collector = std::make_shared(); const auto logger = std::make_shared(); @@ -1697,6 +1725,7 @@ TEST_CASE("sampling delegation is not an override") { config3.logger = logger; config3.extraction_styles = config1.injection_styles = styles; config3.service = "service3"; + config3.delegate_trace_sampling = true; config3.trace_sampler.sample_rate = service3_sample_rate; auto valid_config = finalize_config(config1); @@ -1725,18 +1754,16 @@ TEST_CASE("sampling delegation is not an override") { span_config.name = "local_root"; Span span1 = tracer1.create_span(span_config); span1.inject(propagation_writer); - if (service1_sampling_priority.has_value()) { - propagation_writer.items["x-datadog-sampling-priority"] = - std::to_string(*service1_sampling_priority); - } else { - propagation_writer.items.erase("x-datadog-sampling-priority"); - } { auto span2 = tracer2.extract_span(propagation_reader, span_config); REQUIRE(span2); propagation_writer.items.clear(); span2->inject(propagation_writer); + const bool expected_delegate_header = service1_delegate ? 1 : 0; + CHECK( + propagation_writer.items.count("x-datadog-delegate-trace-sampling") == + expected_delegate_header); { auto span3 = tracer3.extract_span(propagation_reader, span_config); @@ -1767,11 +1794,11 @@ TEST_CASE("sampling delegation is not an override") { // If `service1_delegate` is false, then service1's sampling decision was // made by the service1's sampler, which will result in priority 2. // Otherwise, it's the same priority expected for the other spans. - if (span.service == "service1" && !service1_delegate) { + if (!service1_delegate) { REQUIRE(span.numeric_tags.at(tags::internal::sampling_priority) == 2); } else { REQUIRE(span.numeric_tags.at(tags::internal::sampling_priority) == - expected_sampling_priority); + service3_sampling_priority); } } }