From b1e1df84f071f8f0a69f332bebfb7cffd08923e3 Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Mon, 25 Mar 2024 11:51:05 -0400 Subject: [PATCH 1/3] fix: split DD_TAGS either by space or comma --- src/datadog/curl.cpp | 1 - src/datadog/parse_util.cpp | 78 +++++++++++++++++++++++---- src/datadog/parse_util.h | 9 ++-- src/datadog/string_util.cpp | 7 +++ src/datadog/string_util.h | 3 ++ test/test_parse_util.cpp | 104 +++++++++++++++++++++++++++++++++++- test/test_tracer_config.cpp | 42 +++++++++++---- 7 files changed, 216 insertions(+), 28 deletions(-) diff --git a/src/datadog/curl.cpp b/src/datadog/curl.cpp index a13557f0..dd4c4e79 100644 --- a/src/datadog/curl.cpp +++ b/src/datadog/curl.cpp @@ -225,7 +225,6 @@ class CurlImpl { void *user_data); static bool is_non_whitespace(unsigned char); static char to_lower(unsigned char); - static StringView trim(StringView); public: explicit CurlImpl(const std::shared_ptr &, const Clock &, diff --git a/src/datadog/parse_util.cpp b/src/datadog/parse_util.cpp index 7632419f..8738ded4 100644 --- a/src/datadog/parse_util.cpp +++ b/src/datadog/parse_util.cpp @@ -9,6 +9,7 @@ #include #include "error.h" +#include "string_util.h" namespace datadog { namespace tracing { @@ -150,26 +151,27 @@ std::vector parse_list(StringView input) { return items; } +// TODO: normalize key and value Expected> parse_tags( std::vector list) { std::unordered_map tags; + std::string key; + std::string value; + for (const StringView &token : list) { const auto separator = std::find(token.begin(), token.end(), ':'); + if (separator == token.end()) { - std::string message; - message += "Unable to parse a key/value from the tag text \""; - append(message, token); - // message += - // "\" because it does not contain the separator character \":\". " - // "Error occurred in list of tags \""; - // append(message, input); - message += "."; - return Error{Error::TAG_MISSING_SEPARATOR, std::move(message)}; + key = std::string{trim(token)}; + } else { + key = std::string{trim(range(token.begin(), separator))}; + if (key.empty()) { + continue; + } + value = std::string{trim(range(separator + 1, token.end()))}; } - std::string key{token.begin(), separator}; - std::string value{separator + 1, token.end()}; // If there are duplicate values, then the last one wins. tags.insert_or_assign(std::move(key), std::move(value)); } @@ -177,5 +179,59 @@ Expected> parse_tags( return tags; } +// This function scans the input string to identify a separator (',' or ' '). +// Then, split tags using the identified separator and call `parse_tags` with +// the resulting unordered_map. +// +// Why? +// RFC: DD_TAGS - support space separation +// The trace agent parses DD_TAGS as a space separated list of tags. The +// tracers parse this as a comma-separated list. We need to have the tracers +// parse DD_TAGS as a space-separated list when possible so that the agent and +// tracers can use the same DD_TAGS strings while maintaining backwards +// compatibility with comma-separated lists. +Expected> parse_tags( + StringView input) { + std::vector tags; + + size_t beg = 0; + const size_t end = input.size(); + + char separator = 0; + size_t i = beg; + + for (; i < end; ++i) { + if (input[i] == ',') { + separator = ','; + break; + } else if (input[i] == ' ') { + separator = ' '; + break; + } + } + + if (separator == 0) { + goto capture_all; + } + + for (; i < end; ++i) { + if (input[i] == separator) { + auto tag = input.substr(beg, i - beg); + if (tag.size() > 0) { + tags.emplace_back(tag); + } + beg = i + 1; + } + } + +capture_all: + auto tag = input.substr(beg, i - beg); + if (tag.size() > 0) { + tags.emplace_back(tag); + } + + return parse_tags(tags); +} + } // namespace tracing } // namespace datadog diff --git a/src/datadog/parse_util.h b/src/datadog/parse_util.h index c2104447..063e307e 100644 --- a/src/datadog/parse_util.h +++ b/src/datadog/parse_util.h @@ -41,6 +41,8 @@ bool starts_with(StringView subject, StringView prefix); // Convert the specified `text` to lower case in-place. void to_lower(std::string& text); +std::vector parse_tags_env(StringView input); + // List items are separated by an optional comma (",") and any amount of // whitespace. // Leading and trailing whitespace are ignored. @@ -49,11 +51,8 @@ std::vector parse_list(StringView input); Expected> parse_tags( std::vector list); -inline Expected> parse_tags( - StringView input) { - // Within a tag, the key and value are separated by a colon (":"). - return parse_tags(parse_list(input)); -} +Expected> parse_tags( + StringView input); } // namespace tracing } // namespace datadog diff --git a/src/datadog/string_util.cpp b/src/datadog/string_util.cpp index 9384c81f..0cf67b59 100644 --- a/src/datadog/string_util.cpp +++ b/src/datadog/string_util.cpp @@ -70,5 +70,12 @@ std::string join_tags( }); } +StringView trim(StringView str) { + str.remove_prefix(std::min(str.find_first_not_of(' '), str.size())); + const auto pos = str.find_last_not_of(' '); + if (pos != str.npos) str.remove_suffix(str.size() - pos - 1); + return str; +} + } // namespace tracing } // namespace datadog diff --git a/src/datadog/string_util.h b/src/datadog/string_util.h index 255a4b3c..a9985b0a 100644 --- a/src/datadog/string_util.h +++ b/src/datadog/string_util.h @@ -4,6 +4,7 @@ #include #include "propagation_style.h" +#include "string_view.h" namespace datadog { namespace tracing { @@ -25,5 +26,7 @@ std::string join_propagation_styles(const std::vector&); std::string join_tags( const std::unordered_map& values); +StringView trim(StringView); + } // namespace tracing } // namespace datadog diff --git a/test/test_parse_util.cpp b/test/test_parse_util.cpp index c8ca3434..40b76e09 100644 --- a/test/test_parse_util.cpp +++ b/test/test_parse_util.cpp @@ -11,7 +11,9 @@ using namespace datadog::tracing; -TEST_CASE("parse_int") { +#define PARSE_UTIL_TEST(x) TEST_CASE(x, "[parse_util]") + +PARSE_UTIL_TEST("parse_int") { struct TestCase { int line; std::string name; @@ -74,7 +76,7 @@ TEST_CASE("parse_int") { // This test case is similar to the one above, except that negative numbers are // not supported, and the underflow and overflow values are different. -TEST_CASE("parse_uint64") { +PARSE_UTIL_TEST("parse_uint64") { struct TestCase { int line; std::string name; @@ -133,3 +135,101 @@ TEST_CASE("parse_uint64") { REQUIRE(result.error().code == expected); } } + +PARSE_UTIL_TEST("parse_tags") { + struct TestCase { + int line; + StringView name; + StringView input; + std::unordered_map expected; + }; + + auto test_case = GENERATE(values({ + {__LINE__, + "space separated tags", + "env:test aKey:aVal bKey:bVal cKey:", + { + {"env", "test"}, + {"aKey", "aVal"}, + {"bKey", "bVal"}, + {"cKey", ""}, + }}, + {__LINE__, + "comma separated tags", + "env:test aKey:aVal bKey:bVal cKey:", + { + {"env", "test"}, + {"aKey", "aVal"}, + {"bKey", "bVal"}, + {"cKey", ""}, + }}, + {__LINE__, + "mixed separator but comma first", + "env:test,aKey:aVal bKey:bVal cKey:", + { + {"env", "test"}, + {"aKey", "aVal bKey:bVal cKey:"}, + }}, + {__LINE__, + "mixed separator but space first", + "env:test bKey :bVal dKey: dVal cKey:", + { + {"env", "test"}, + {"bKey", ""}, + {"dKey", ""}, + {"dVal", ""}, + {"cKey", ""}, + }}, + {__LINE__, + "mixed separator but space first", + "env:keyWithA:Semicolon bKey:bVal cKey", + { + {"env", "keyWithA:Semicolon"}, + {"bKey", "bVal"}, + {"cKey", ""}, + }}, + // {__LINE__, + // "mixed separator edge case", + // "env:keyWith: , , Lots:Of:Semicolons ", + // { + // {"env", "keyWith:"}, + // {"Lots", "Of:Semicolons"}, + // }}, + {__LINE__, + "comma separated but some tags without value", + "a:b,c,d", + { + {"a", "b"}, + {"c", ""}, + {"d", ""}, + }}, + {__LINE__, + "one separator without value", + "a,1", + { + {"a", ""}, + {"1", ""}, + }}, + {__LINE__, + "no separator", + "a:b:c:d", + { + {"a", "b:c:d"}, + }}, + {__LINE__, + "input is trimed", + "key1:val1, key2 : val2 ", + { + {"key1", "val1"}, + {"key2", "val2"}, + }}, + })); + + CAPTURE(test_case.line); + CAPTURE(test_case.name); + CAPTURE(test_case.input); + + const auto tags = parse_tags(test_case.input); + REQUIRE(tags); + CHECK(*tags == test_case.expected); +} diff --git a/test/test_tracer_config.cpp b/test/test_tracer_config.cpp index 736dec9c..fb9f72c0 100644 --- a/test/test_tracer_config.cpp +++ b/test/test_tracer_config.cpp @@ -213,21 +213,45 @@ TEST_CASE("TracerConfig::defaults") { auto test_case = GENERATE(values({ {"empty", "", {}, nullopt}, - {"missing colon", "foo", {}, Error::TAG_MISSING_SEPARATOR}, + {"missing colon", + "foo", + { + {"foo", ""}, + }, + nullopt}, {"trailing comma", "foo:bar, baz:123,", - {}, - Error::TAG_MISSING_SEPARATOR}, - {"overwrite value", "foo:baz", {{"foo", "baz"}}, nullopt}, + { + {"foo", "bar"}, + {"baz", "123"}, + }, + nullopt}, + {"overwrite value", + "foo:baz", + { + {"foo", "baz"}, + }, + nullopt}, {"additional values", "baz:123, bam:three", - {{"baz", "123"}, {"bam", "three"}}, + { + {"baz", "123"}, + {"bam", "three"}, + }, nullopt}, {"commas optional", "baz:123 bam:three", - {{"baz", "123"}, {"bam", "three"}}, + { + {"baz", "123"}, + {"bam", "three"}, + }, + nullopt}, + {"last one wins", + "baz:123 baz:three", + { + {"baz", "three"}, + }, nullopt}, - {"last one wins", "baz:123 baz:three", {{"baz", "three"}}, nullopt}, })); // This will be overriden by the DD_TAGS environment variable. @@ -1053,8 +1077,8 @@ TEST_CASE("TracerConfig::span_sampler") { REQUIRE(file.is_open()); // We could do any of the failures tested in the "must be valid" // section, since it's the same parser. Instead, just to cover the - // code path specific to DD_SPAN_SAMPLING_RULES_FILE, pick any error, - // e.g. invalid JSON. + // code path specific to DD_SPAN_SAMPLING_RULES_FILE, pick any + // error, e.g. invalid JSON. file << "this is clearly not JSON"; file.close(); const EnvGuard guard{"DD_SPAN_SAMPLING_RULES_FILE", From 5342235d01456f6ebd4787937e2d828ad765ea6f Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Mon, 25 Mar 2024 15:00:25 -0400 Subject: [PATCH 2/3] Update src/datadog/parse_util.cpp --- src/datadog/parse_util.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/datadog/parse_util.cpp b/src/datadog/parse_util.cpp index 8738ded4..dae6e595 100644 --- a/src/datadog/parse_util.cpp +++ b/src/datadog/parse_util.cpp @@ -151,7 +151,6 @@ std::vector parse_list(StringView input) { return items; } -// TODO: normalize key and value Expected> parse_tags( std::vector list) { std::unordered_map tags; From b42e82da6d9443fce2cb026fa50d977537393cc7 Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Tue, 26 Mar 2024 15:26:05 -0400 Subject: [PATCH 3/3] code review --- src/datadog/parse_util.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/datadog/parse_util.h b/src/datadog/parse_util.h index 063e307e..9a98b7d9 100644 --- a/src/datadog/parse_util.h +++ b/src/datadog/parse_util.h @@ -41,8 +41,6 @@ bool starts_with(StringView subject, StringView prefix); // Convert the specified `text` to lower case in-place. void to_lower(std::string& text); -std::vector parse_tags_env(StringView input); - // List items are separated by an optional comma (",") and any amount of // whitespace. // Leading and trailing whitespace are ignored.