diff --git a/.github/workflows/test_workflow.yml b/.github/workflows/test_workflow.yml index 67f3ef944..73820fb89 100644 --- a/.github/workflows/test_workflow.yml +++ b/.github/workflows/test_workflow.yml @@ -57,7 +57,7 @@ jobs: uses: actions/cache@v4 with: path: ddprof-lib/build/async-profiler - key: async-profiler-${{ runner.os }}-${{ hashFiles('gradle/ap-lock.properties') }} + key: async-profiler-${{ runner.os }}-${{ hashFiles('gradle/lock.properties') }} enableCrossOsArchive: true restore-keys: | async-profiler-${{ runner.os }}- @@ -156,7 +156,7 @@ jobs: uses: actions/cache@v4 with: path: ddprof-lib/build/async-profiler - key: async-profiler-${{ runner.os }}-${{ hashFiles('gradle/ap-lock.properties') }} + key: async-profiler-${{ runner.os }}-${{ hashFiles('gradle/lock.properties') }} enableCrossOsArchive: true restore-keys: | async-profiler-${{ runner.os }}- @@ -276,7 +276,7 @@ jobs: uses: actions/cache@v4 with: path: ddprof-lib/build/async-profiler - key: async-profiler-${{ runner.os }}-${{ hashFiles('gradle/ap-lock.properties') }} + key: async-profiler-${{ runner.os }}-${{ hashFiles('gradle/lock.properties') }} enableCrossOsArchive: true restore-keys: | async-profiler-${{ runner.os }}- @@ -372,7 +372,7 @@ jobs: uses: actions/cache@v4 with: path: ddprof-lib/build/async-profiler - key: async-profiler-${{ runner.os }}-${{ hashFiles('gradle/ap-lock.properties') }} + key: async-profiler-${{ runner.os }}-${{ hashFiles('gradle/lock.properties') }} enableCrossOsArchive: true restore-keys: | async-profiler-${{ runner.os }}- diff --git a/README.md b/README.md index 42fd214bd..36eedbf56 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ The resulting artifact will be in `ddprof-lib/build/libs/ddprof-.jar` To smoothen the absorption of the upstream changes, we are using parts of the upstream codebase in (mostly) vanilla form. For this, we have four new gradle tasks in [ddprof-lib/build.gradle](ddprof-lib/build.gradle): -- `cloneAsyncProfiler` - clones the [DataDog/async-profiler](https://github.com/DataDog/async-profiler) repository into `ddprof-lib/build/async-profiler` using the commit lock specified in [gradle/ap-lock.properties](gradle/ap-lock.properties) +- `cloneAsyncProfiler` - clones the [DataDog/async-profiler](https://github.com/DataDog/async-profiler) repository into `ddprof-lib/build/async-profiler` using the commit lock specified in [gradle/ap-lock.properties](gradle/lock.properties) - in that repository, we are maintainin a branch called `dd/master` where we keep the upstream code in sync with the 'safe' changes from the upstream `master` branch - cherry-picks into that branch should be rare and only done for critical fixes that are needed in the project - otherwise, we should wait for the next upstream release to avoid conflicts diff --git a/ddprof-lib/build.gradle b/ddprof-lib/build.gradle index 275c4e313..0e2c6432a 100644 --- a/ddprof-lib/build.gradle +++ b/ddprof-lib/build.gradle @@ -181,12 +181,12 @@ description = "Datadog Java Profiler Library" def component_version = project.hasProperty("ddprof_version") ? project.ddprof_version : project.version def props = new Properties() -file("${rootDir}/gradle/ap-lock.properties").withInputStream { stream -> +file("${rootDir}/gradle/lock.properties").withInputStream { stream -> props.load(stream) } -def branch_lock = props.getProperty("branch") -def commit_lock = props.getProperty("commit") +def ap_branch_lock = props.getProperty("ap.branch") +def ap_commit_lock = props.getProperty("ap.commit") // this feels weird but it is the only way invoking `./gradlew :ddprof-lib:*` tasks will work if (rootDir.toString().endsWith("ddprof-lib")) { @@ -267,7 +267,7 @@ tasks.register('copyExternalLibs', Copy) { def cloneAPTask = tasks.register('cloneAsyncProfiler') { description = 'Clones async-profiler repo if directory is missing or updates it if commit hash differs' - inputs.file("${rootDir}/gradle/ap-lock.properties") + inputs.file("${rootDir}/gradle/lock.properties") outputs.dir("${projectDir}/build/async-profiler") outputs.upToDateWhen { def targetDir = file("${projectDir}/build/async-profiler") @@ -284,7 +284,7 @@ def cloneAPTask = tasks.register('cloneAsyncProfiler') { } currentCommit = os.toString().trim() } - return currentCommit == commit_lock + return currentCommit == ap_commit_lock } catch (Exception e) { return false } @@ -300,11 +300,11 @@ def cloneAPTask = tasks.register('cloneAsyncProfiler') { if (!targetDir.exists()) { println "Cloning missing async-profiler git subdirectory..." exec { - commandLine 'git', 'clone', '--branch', branch_lock, 'https://github.com/datadog/async-profiler.git', targetDir.absolutePath + commandLine 'git', 'clone', '--branch', ap_branch_lock, 'https://github.com/datadog/async-profiler.git', targetDir.absolutePath } exec { workingDir targetDir.absolutePath - commandLine 'git', 'checkout', commit_lock + commandLine 'git', 'checkout', ap_commit_lock } } else { // Also fix git ownership for existing directory @@ -324,18 +324,18 @@ def cloneAPTask = tasks.register('cloneAsyncProfiler') { currentCommit = os.toString().trim() } - if (currentCommit != commit_lock) { - println "async-profiler commit hash differs (current: ${currentCommit}, expected: ${commit_lock}), updating..." + if (currentCommit != ap_commit_lock) { + println "async-profiler commit hash differs (current: ${currentCommit}, expected: ${ap_commit_lock}), updating..." exec { workingDir targetDir.absolutePath commandLine 'rm', '-rf', targetDir.absolutePath } exec { - commandLine 'git', 'clone', '--branch', branch_lock, 'https://github.com/datadog/async-profiler.git', targetDir.absolutePath + commandLine 'git', 'clone', '--branch', ap_branch_lock, 'https://github.com/datadog/async-profiler.git', targetDir.absolutePath } exec { workingDir targetDir.absolutePath - commandLine 'git', 'checkout', commit_lock + commandLine 'git', 'checkout', ap_commit_lock } } else { println "async-profiler git subdirectory present with correct commit hash." @@ -373,7 +373,7 @@ def patchStackFrame = tasks.register("patchStackFrame") { configure { dependsOn copyUpstreamFiles } - inputs.files copyUpstreamFiles + inputs.file("${projectDir}/src/main/cpp-external/stackFrame_x64.cpp") outputs.file("${projectDir}/src/main/cpp-external/stackFrame_x64.cpp") doLast { @@ -428,7 +428,7 @@ def patchStackWalker = tasks.register("patchStackWalker") { configure { dependsOn copyUpstreamFiles, patchStackFrame } - inputs.files copyUpstreamFiles + inputs.file("${projectDir}/src/main/cpp-external/stackWalker.cpp") outputs.file("${projectDir}/src/main/cpp-external/stackWalker.cpp") doLast { diff --git a/ddprof-lib/src/main/cpp/javaApi.cpp b/ddprof-lib/src/main/cpp/javaApi.cpp index 017f23c45..6033255de 100644 --- a/ddprof-lib/src/main/cpp/javaApi.cpp +++ b/ddprof-lib/src/main/cpp/javaApi.cpp @@ -21,6 +21,7 @@ #include "engine.h" #include "incbin.h" #include "os.h" +#include "otel_process_ctx.h" #include "profiler.h" #include "thread.h" #include "tsc.h" @@ -110,7 +111,7 @@ Java_com_datadoghq_profiler_JavaProfiler_execute0(JNIEnv *env, jobject unused, } extern "C" DLLEXPORT jstring JNICALL -Java_com_datadoghq_profiler_JavaProfiler_getStatus0(JNIEnv* env, +Java_com_datadoghq_profiler_JavaProfiler_getStatus0(JNIEnv* env, jobject unused) { char msg[2048]; int ret = Profiler::instance()->status((char*)msg, sizeof(msg) - 1); @@ -427,3 +428,93 @@ Java_com_datadoghq_profiler_ActiveBitmap_getActiveCountAddr0(JNIEnv *env, jclass unused) { return (jlong)Profiler::instance()->threadFilter()->addressOfSize(); } + +// Static variable to track the current published context +static otel_process_ctx_result* current_published_context = nullptr; + +extern "C" DLLEXPORT void JNICALL +Java_com_datadoghq_profiler_OTelContext_setProcessCtx0(JNIEnv *env, + jclass unused, + jstring env_data, + jstring hostname, + jstring runtime_id, + jstring service, + jstring version, + jstring tracer_version + ) { + JniString env_str(env, env_data); + JniString hostname_str(env, hostname); + JniString runtime_id_str(env, runtime_id); + JniString service_str(env, service); + JniString version_str(env, version); + JniString tracer_version_str(env, tracer_version); + + otel_process_ctx_data data = { + .deployment_environment_name = const_cast(env_str.c_str()), + .host_name = const_cast(hostname_str.c_str()), + .service_instance_id = const_cast(runtime_id_str.c_str()), + .service_name = const_cast(service_str.c_str()), + .service_version = const_cast(version_str.c_str()), + .telemetry_sdk_language = const_cast("java"), + .telemetry_sdk_version = const_cast(tracer_version_str.c_str()), + .telemetry_sdk_name = const_cast("dd-trace-java"), + .resources = NULL // TODO: Arbitrary tags not supported yet for Java + }; + + otel_process_ctx_result result = otel_process_ctx_publish(&data); +} + +extern "C" DLLEXPORT jobject JNICALL +Java_com_datadoghq_profiler_OTelContext_readProcessCtx0(JNIEnv *env, jclass unused) { +#ifndef OTEL_PROCESS_CTX_NO_READ + otel_process_ctx_read_result result = otel_process_ctx_read(); + + if (!result.success) { + // Return null if reading failed + return nullptr; + } + + // Convert C strings to Java strings + jstring jDeploymentEnvironmentName = result.data.deployment_environment_name ? + env->NewStringUTF(result.data.deployment_environment_name) : nullptr; + jstring jHostName = result.data.host_name ? + env->NewStringUTF(result.data.host_name) : nullptr; + jstring jServiceInstanceId = result.data.service_instance_id ? + env->NewStringUTF(result.data.service_instance_id) : nullptr; + jstring jServiceName = result.data.service_name ? + env->NewStringUTF(result.data.service_name) : nullptr; + jstring jServiceVersion = result.data.service_version ? + env->NewStringUTF(result.data.service_version) : nullptr; + jstring jTelemetrySdkLanguage = result.data.telemetry_sdk_language ? + env->NewStringUTF(result.data.telemetry_sdk_language) : nullptr; + jstring jTelemetrySdkVersion = result.data.telemetry_sdk_version ? + env->NewStringUTF(result.data.telemetry_sdk_version) : nullptr; + jstring jTelemetrySdkName = result.data.telemetry_sdk_name ? + env->NewStringUTF(result.data.telemetry_sdk_name) : nullptr; + // TODO: result.data.resources not supported yet for Java + + otel_process_ctx_read_drop(&result); + + // Find the ProcessContext class + jclass processContextClass = env->FindClass("com/datadoghq/profiler/OTelContext$ProcessContext"); + if (!processContextClass) { + return nullptr; + } + + // Find the constructor + jmethodID constructor = env->GetMethodID(processContextClass, "", + "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V"); + if (!constructor) { + return nullptr; + } + + // Create the ProcessContext object + jobject processContext = env->NewObject(processContextClass, constructor, + jDeploymentEnvironmentName, jHostName, jServiceInstanceId, jServiceName, jServiceVersion, jTelemetrySdkLanguage, jTelemetrySdkVersion, jTelemetrySdkName); + + return processContext; +#else + // If OTEL_PROCESS_CTX_NO_READ is defined, return null + return nullptr; +#endif +} diff --git a/ddprof-lib/src/main/cpp/otel_process_ctx.cpp b/ddprof-lib/src/main/cpp/otel_process_ctx.cpp new file mode 100644 index 000000000..d1cd08c18 --- /dev/null +++ b/ddprof-lib/src/main/cpp/otel_process_ctx.cpp @@ -0,0 +1,510 @@ +#include "otel_process_ctx.h" + +#include +#ifdef __cplusplus + #include + using std::atomic_thread_fence; + using std::memory_order_seq_cst; +#else + #include +#endif +#include +#include +#include +#include +#include +#include +#include + +#define ADD_QUOTES_HELPER(x) #x +#define ADD_QUOTES(x) ADD_QUOTES_HELPER(x) + +#ifndef PR_SET_VMA + #define PR_SET_VMA 0x53564d41 + #define PR_SET_VMA_ANON_NAME 0 +#endif + +static const otel_process_ctx_data empty_data = { + .deployment_environment_name = NULL, + .host_name = NULL, + .service_instance_id = NULL, + .service_name = NULL, + .service_version = NULL, + .telemetry_sdk_language = NULL, + .telemetry_sdk_version = NULL, + .telemetry_sdk_name = NULL, + .resources = NULL +}; + +#if (defined(OTEL_PROCESS_CTX_NOOP) && OTEL_PROCESS_CTX_NOOP) || !defined(__linux__) + // NOOP implementations when OTEL_PROCESS_CTX_NOOP is defined or not on Linux + + otel_process_ctx_result otel_process_ctx_publish(const otel_process_ctx_data *data) { + (void) data; // Suppress unused parameter warning + return (otel_process_ctx_result) {.success = false, .error_message = "OTEL_PROCESS_CTX_NOOP mode is enabled - no-op implementation (" __FILE__ ":" ADD_QUOTES(__LINE__) ")"}; + } + + bool otel_process_ctx_drop(void) { + return true; // Nothing to do, this always succeeds + } + + #ifndef OTEL_PROCESS_CTX_NO_READ + otel_process_ctx_read_result otel_process_ctx_read(void) { + return (otel_process_ctx_read_result) {.success = false, .error_message = "OTEL_PROCESS_CTX_NOOP mode is enabled - no-op implementation (" __FILE__ ":" ADD_QUOTES(__LINE__) ")", .data = empty_data}; + } + + bool otel_process_ctx_read_drop(otel_process_ctx_read_result *result) { + (void) result; // Suppress unused parameter warning + return false; + } + #endif // OTEL_PROCESS_CTX_NO_READ +#else // OTEL_PROCESS_CTX_NOOP + +/** + * The process context data that's written into the published anonymous mapping. + * + * An outside-of-process reader will read this struct + otel_process_payload to get the data. + */ +typedef struct __attribute__((packed, aligned(8))) { + char otel_process_ctx_signature[8]; // Always "OTEL_CTX" + // TODO: Is version useful? Should we just get rid of it? + uint32_t otel_process_ctx_version; // Always > 0, incremented when the data structure changes + // TODO: Is size useful? Should we just get rid of it? + uint32_t otel_process_payload_size; // Always > 0, size of storage + // TODO: Should we just inline the data in the mapping itself? + char *otel_process_payload; // Always non-null, points to the storage for the data; expected to be a msgpack map of string key/value pairs, null-terminated +} otel_process_ctx_mapping; + +/** + * The full state of a published process context. + * + * This is returned as an opaque type to the caller. + * + * It is used to store the all data for the process context and that needs to be kept around while the context is published. + */ +typedef struct { + // The pid of the process that published the context. + pid_t publisher_pid; + // The actual mapping of the process context. Note that because we `madvise(..., MADV_DONTFORK)` this mapping is not + // propagated to child processes and thus `mapping` is only valid on the process that published the context. + otel_process_ctx_mapping *mapping; + // The process context payload. + char *payload; +} otel_process_ctx_state; + +/** + * Only one context is active, so we keep its state as a global. + */ +static otel_process_ctx_state published_state; + +static otel_process_ctx_result otel_process_ctx_encode_payload(char **out, uint32_t *out_size, otel_process_ctx_data data); + +// We use a mapping size of 3 pages explicitly as a hint when running on legacy kernels that don't support the +// PR_SET_VMA_ANON_NAME prctl call; see below for more details. +static long size_for_mapping(void) { + long page_size_bytes = sysconf(_SC_PAGESIZE); + if (page_size_bytes < 4096) { + return -1; + } + return page_size_bytes * 2; +} + +// The process context is designed to be read by an outside-of-process reader. Thus, for concurrency purposes the steps +// on this method are ordered in a way to avoid races, or if not possible to avoid, to allow the reader to detect if there was a race. +otel_process_ctx_result otel_process_ctx_publish(const otel_process_ctx_data *data) { + // Step: Drop any previous context it if it exists + // No state should be around anywhere after this step. + if (!otel_process_ctx_drop_current()) { + return (otel_process_ctx_result) {.success = false, .error_message = "Failed to drop previous context (" __FILE__ ":" ADD_QUOTES(__LINE__) ")"}; + } + + // Step: Determine size for mapping + long mapping_size = size_for_mapping(); + if (mapping_size == -1) { + return (otel_process_ctx_result) {.success = false, .error_message = "Failed to get page size (" __FILE__ ":" ADD_QUOTES(__LINE__) ")"}; + } + + // Step: Prepare the payload to be published + // The payload SHOULD be ready and valid before trying to actually create the mapping. + if (!data) return (otel_process_ctx_result) {.success = false, .error_message = "otel_process_ctx_data is NULL (" __FILE__ ":" ADD_QUOTES(__LINE__) ")"}; + uint32_t payload_size = 0; + otel_process_ctx_result result = otel_process_ctx_encode_payload(&published_state.payload, &payload_size, *data); + if (!result.success) return result; + + // Step: Create the mapping + published_state.publisher_pid = getpid(); // This allows us to detect in forks that we shouldn't touch the mapping + published_state.mapping = (otel_process_ctx_mapping *) + mmap(NULL, mapping_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (published_state.mapping == MAP_FAILED) { + otel_process_ctx_drop_current(); + return (otel_process_ctx_result) {.success = false, .error_message = "Failed to allocate mapping (" __FILE__ ":" ADD_QUOTES(__LINE__) ")"}; + } + + // Step: Setup MADV_DONTFORK + // This ensures that the mapping is not propagated to child processes (they should call update/publish again). + if (madvise(published_state.mapping, mapping_size, MADV_DONTFORK) == -1) { + if (otel_process_ctx_drop_current()) { + return (otel_process_ctx_result) {.success = false, .error_message = "Failed to setup MADV_DONTFORK (" __FILE__ ":" ADD_QUOTES(__LINE__) ")"}; + } else { + return (otel_process_ctx_result) {.success = false, .error_message = "Failed to drop previous context (" __FILE__ ":" ADD_QUOTES(__LINE__) ")"}; + } + } + + // Step: Populate the mapping + // The payload and any extra fields must come first and not be reordered with the signature by the compiler. + *published_state.mapping = (otel_process_ctx_mapping) { + .otel_process_ctx_signature = {0}, // Set in "Step: Populate the signature into the mapping" below + .otel_process_ctx_version = 1, + .otel_process_payload_size = payload_size, + .otel_process_payload = published_state.payload + }; + + // Step: Synchronization - Mapping has been filled and is missing signature + // Make sure the initialization of the mapping + payload above does not get reordered with setting the signature below. Setting + // the signature is what tells an outside reader that the context is fully published. + atomic_thread_fence(memory_order_seq_cst); + + // Step: Populate the signature into the mapping + // The signature must come last and not be reordered with the fields above by the compiler. After this step, external readers + // can read the signature and know that the payload is ready to be read. + memcpy(published_state.mapping->otel_process_ctx_signature, "OTEL_CTX", sizeof(published_state.mapping->otel_process_ctx_signature)); + + // Step: Change permissions on the mapping to only read permission + // We've observed the combination of anonymous mapping + a given number of pages + read-only permission is not very common, + // so this is left as a hint for when running on older kernels and the naming the mapping feature below isn't available. + // For modern kernels, doing this is harmless so we do it unconditionally. + if (mprotect(published_state.mapping, mapping_size, PROT_READ) == -1) { + if (otel_process_ctx_drop_current()) { + return (otel_process_ctx_result) {.success = false, .error_message = "Failed to change permissions on mapping (" __FILE__ ":" ADD_QUOTES(__LINE__) ")"}; + } else { + return (otel_process_ctx_result) {.success = false, .error_message = "Failed to drop previous context (" __FILE__ ":" ADD_QUOTES(__LINE__) ")"}; + } + } + + // Step: Name the mapping so outside readers can: + // * Find it by name + // * Hook on prctl to detect when new mappings are published + if (prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, published_state.mapping, mapping_size, "OTEL_CTX") == -1) { + // Naming an anonymous mapping is a Linux 5.17+ feature. On earlier versions, this method call can fail. Thus it's OK + // for this to fail because: + // 1. Things that hook on prctl are still able to see this call, even though it's not supported (TODO: Confirm this is actually the case) + // 2. As a fallback, on older kernels, it's possible to scan the mappings and look for the "OTEL_CTX" signature in the memory itself, + // after observing the mapping has the expected number of pages and permissions. + } + + // All done! + + return (otel_process_ctx_result) {.success = true, .error_message = NULL}; +} + +bool otel_process_ctx_drop_current(void) { + otel_process_ctx_state state = published_state; + + // Zero out the state and make sure no operations below are reordered with zeroing + published_state = (otel_process_ctx_state) {.publisher_pid = 0, .mapping = NULL, .payload = NULL}; + atomic_thread_fence(memory_order_seq_cst); + + // The mapping only exists if it was created by the current process; if it was inherited by a fork it doesn't exist anymore + // (due to the MADV_DONTFORK) and we don't need to do anything to it. + if (state.mapping != NULL && state.mapping != MAP_FAILED && getpid() == state.publisher_pid) { + long mapping_size = size_for_mapping(); + if (mapping_size == -1 || munmap(published_state.mapping, mapping_size) == -1) return false; + } + + // The payload may have been inherited from a parent. This is a regular malloc so we need to free it so we don't leak. + if (state.payload) free(state.payload); + + return true; +} + +static otel_process_ctx_result validate_and_calculate_payload_size(size_t *out_pairs_size, size_t *out_num_pairs, char **pairs) { + size_t num_entries = 0; + for (size_t i = 0; pairs[i] != NULL; i++) num_entries++; + if (num_entries % 2 != 0) { + return (otel_process_ctx_result) {.success = false, .error_message = "Value in otel_process_ctx_data is NULL (" __FILE__ ":" ADD_QUOTES(__LINE__) ")"}; + } + *out_num_pairs = num_entries / 2; + + *out_pairs_size = 0; + for (size_t i = 0; i < *out_num_pairs; i++) { + size_t key_len = strlen(pairs[i * 2]); + if (key_len > INT16_MAX) { + return (otel_process_ctx_result) {.success = false, .error_message = "Length of key in otel_process_ctx_data exceeds INT16_MAX limit (" __FILE__ ":" ADD_QUOTES(__LINE__) ")"}; + } + size_t value_len = strlen(pairs[i * 2 + 1]); + if (value_len > INT16_MAX) { + return (otel_process_ctx_result) {.success = false, .error_message = "Length of value in otel_process_ctx_data exceeds INT16_MAX limit (" __FILE__ ":" ADD_QUOTES(__LINE__) ")"}; + } + *out_pairs_size += 1 + 2 + key_len; // str 16 for key + *out_pairs_size += 1 + 2 + value_len; // str 16 for value + } + + return (otel_process_ctx_result) {.success = true, .error_message = NULL}; +} + +static void write_msgpack_string(char **ptr, const char *str) { + size_t len = strlen(str); + // Write str 16 header + *(*ptr)++ = 0xda; + *(*ptr)++ = (len >> 8) & 0xFF; // high byte of length + *(*ptr)++ = len & 0xFF; // low byte of length + memcpy(*ptr, str, len); + *ptr += len; +} + +// TODO: The serialization format is still under discussion and is not considered stable yet. +// Comments **very** welcome: Should we use JSON instead? Or protobuf? +// +// Encode the payload as a msgpack map of string key/value pairs. +// +// This method implements an extremely compact but limited msgpack encoder. This encoder supports only encoding a single +// flat key-value map where every key and value is a string. +// For extra compact code, it uses only a "map 16" encoding format with only "str 16" strings, rather than attempting to +// use some of the other encoding alternatives. +static otel_process_ctx_result otel_process_ctx_encode_payload(char **out, uint32_t *out_size, otel_process_ctx_data data) { + const char *pairs[] = { + "deployment.environment.name", data.deployment_environment_name, + "host.name", data.host_name, + "service.instance.id", data.service_instance_id, + "service.name", data.service_name, + "service.version", data.service_version, + "telemetry.sdk.language", data.telemetry_sdk_language, + "telemetry.sdk.version", data.telemetry_sdk_version, + "telemetry.sdk.name", data.telemetry_sdk_name, + NULL + }; + + size_t num_pairs = 0, pairs_size = 0; + otel_process_ctx_result validation_result = validate_and_calculate_payload_size(&pairs_size, &num_pairs, (char **) pairs); + if (!validation_result.success) return validation_result; + + size_t resources_pairs_size = 0, resources_num_pairs = 0; + if (data.resources != NULL) { + validation_result = validate_and_calculate_payload_size(&resources_pairs_size, &resources_num_pairs, data.resources); + if (!validation_result.success) return validation_result; + } + + size_t total_pairs = num_pairs + resources_num_pairs; + size_t total_size = pairs_size + resources_pairs_size + 1 + 2; // map 16 header (1 byte + 2 bytes for count) + + if (total_pairs > INT16_MAX) { + return (otel_process_ctx_result) {.success = false, .error_message = "Total number of pairs exceeds INT16_MAX limit (" __FILE__ ":" ADD_QUOTES(__LINE__) ")"}; + } + + char *encoded = (char *) calloc(total_size, 1); + if (!encoded) { + return (otel_process_ctx_result) {.success = false, .error_message = "Failed to allocate memory for payload (" __FILE__ ":" ADD_QUOTES(__LINE__) ")"}; + } + char *ptr = encoded; + + // Write map 16 header (0xde) followed by count + *ptr++ = 0xde; + *ptr++ = (total_pairs >> 8) & 0xFF; // high byte of count + *ptr++ = total_pairs & 0xFF; // low byte of count + + for (size_t i = 0; i < num_pairs; i++) { + write_msgpack_string(&ptr, pairs[i * 2]); // Write key + write_msgpack_string(&ptr, pairs[i * 2 + 1]); // Write value + } + + if (data.resources != NULL) { + for (size_t i = 0; i < resources_num_pairs; i++) { + write_msgpack_string(&ptr, data.resources[i * 2]); // Write key + write_msgpack_string(&ptr, data.resources[i * 2 + 1]); // Write value + } + } + + *out = encoded; + *out_size = (uint32_t) total_size; + + return (otel_process_ctx_result) {.success = true, .error_message = NULL}; +} + +#ifndef OTEL_PROCESS_CTX_NO_READ + // Note: The below parsing code is only for otel_process_ctx_read and is only provided for debugging + // and testing purposes. + + static bool is_otel_process_ctx_mapping(char *line) { + size_t name_len = sizeof("[anon:OTEL_CTX]") - 1; + size_t line_len = strlen(line); + if (line_len < name_len) return false; + if (line[line_len-1] == '\n') line[--line_len] = '\0'; + return memcmp(line + (line_len - name_len), "[anon:OTEL_CTX]", name_len) == 0; + } + + static void *parse_mapping_start(char *line) { + char *endptr = NULL; + unsigned long long start = strtoull(line, &endptr, 16); + if (start == 0 || start == ULLONG_MAX) return NULL; + return (void *)(uintptr_t) start; + } + + static otel_process_ctx_mapping *try_finding_mapping(void) { + char line[8192]; + otel_process_ctx_mapping *result = NULL; + + FILE *fp = fopen("/proc/self/maps", "r"); + if (!fp) return result; + + while (fgets(line, sizeof(line), fp)) { + if (is_otel_process_ctx_mapping(line)) { + result = (otel_process_ctx_mapping *)parse_mapping_start(line); + break; + } + } + + fclose(fp); + return result; + } + + // Simplified msgpack decoder to match the exact encoder above. If the msgpack string doesn't match the encoder, this will + // return false. + static bool otel_process_ctx_decode_payload(char *payload, otel_process_ctx_data *data_out) { + char *ptr = payload; + + // Check map 16 header (0xde) + if ((unsigned char)*ptr++ != 0xde) return false; + + // Read count (2 bytes, big endian) + uint16_t count = ((uint8_t)*ptr << 8) | (uint8_t)*(ptr + 1); + ptr += 2; + + // We expect at least 8 pairs (the standard fields) + if (count < 8) return false; + + // Initialize output data + data_out->deployment_environment_name = NULL; + data_out->host_name = NULL; + data_out->service_instance_id = NULL; + data_out->service_name = NULL; + data_out->service_version = NULL; + data_out->telemetry_sdk_language = NULL; + data_out->telemetry_sdk_version = NULL; + data_out->telemetry_sdk_name = NULL; + data_out->resources = NULL; + + // Allocate resources array with space for all pairs as a simplification (2 entries per pair + 1 for NULL terminator) + data_out->resources = (char **) calloc(count * 2 + 1, sizeof(char *)); + if (!data_out->resources) return false; + + int resources_index = 0; + + // Decode each key-value pair + for (int i = 0; i < count; i++) { + // Check str 16 header for key (0xda) + if ((unsigned char)*ptr++ != 0xda) return false; + + // Read key length (2 bytes, big endian) + uint16_t key_len = ((uint8_t)*ptr << 8) | (uint8_t)*(ptr + 1); + ptr += 2; + + // Get pointer to key (not null-terminated) + char *key_not_terminated = ptr; + ptr += key_len; + + // Check str 16 header for value (0xda) + if ((unsigned char)*ptr++ != 0xda) return false; + + // Read value length (2 bytes, big endian) + uint16_t value_len = ((uint8_t)*ptr << 8) | (uint8_t)*(ptr + 1); + ptr += 2; + + // Read value + char *value = (char *) calloc(value_len + 1, 1); + if (!value) return false; + memcpy(value, ptr, value_len); + value[value_len] = '\0'; + ptr += value_len; + + // Assign to appropriate field based on key + if (key_len == strlen("deployment.environment.name") && memcmp(key_not_terminated, "deployment.environment.name", strlen("deployment.environment.name")) == 0) { + data_out->deployment_environment_name = value; + } else if (key_len == strlen("host.name") && memcmp(key_not_terminated, "host.name", strlen("host.name")) == 0) { + data_out->host_name = value; + } else if (key_len == strlen("service.instance.id") && memcmp(key_not_terminated, "service.instance.id", strlen("service.instance.id")) == 0) { + data_out->service_instance_id = value; + } else if (key_len == strlen("service.name") && memcmp(key_not_terminated, "service.name", strlen("service.name")) == 0) { + data_out->service_name = value; + } else if (key_len == strlen("service.version") && memcmp(key_not_terminated, "service.version", strlen("service.version")) == 0) { + data_out->service_version = value; + } else if (key_len == strlen("telemetry.sdk.language") && memcmp(key_not_terminated, "telemetry.sdk.language", strlen("telemetry.sdk.language")) == 0) { + data_out->telemetry_sdk_language = value; + } else if (key_len == strlen("telemetry.sdk.version") && memcmp(key_not_terminated, "telemetry.sdk.version", strlen("telemetry.sdk.version")) == 0) { + data_out->telemetry_sdk_version = value; + } else if (key_len == strlen("telemetry.sdk.name") && memcmp(key_not_terminated, "telemetry.sdk.name", strlen("telemetry.sdk.name")) == 0) { + data_out->telemetry_sdk_name = value; + } else { + // Unknown key, put it into resources + char *key = (char *) calloc(key_len + 1, 1); + if (!key) { + free(value); + return false; + } + memcpy(key, key_not_terminated, key_len); + key[key_len] = '\0'; + + data_out->resources[resources_index++] = key; + data_out->resources[resources_index++] = value; + } + } + + // Verify all required fields were found + return data_out->deployment_environment_name != NULL && + data_out->host_name != NULL && + data_out->service_instance_id != NULL && + data_out->service_name != NULL && + data_out->service_version != NULL && + data_out->telemetry_sdk_language != NULL && + data_out->telemetry_sdk_version != NULL && + data_out->telemetry_sdk_name != NULL; + } + + void otel_process_ctx_read_data_drop(otel_process_ctx_data data) { + if (data.deployment_environment_name) free(data.deployment_environment_name); + if (data.host_name) free(data.host_name); + if (data.service_instance_id) free(data.service_instance_id); + if (data.service_name) free(data.service_name); + if (data.service_version) free(data.service_version); + if (data.telemetry_sdk_language) free(data.telemetry_sdk_language); + if (data.telemetry_sdk_version) free(data.telemetry_sdk_version); + if (data.telemetry_sdk_name) free(data.telemetry_sdk_name); + if (data.resources) { + for (int i = 0; data.resources[i] != NULL; i++) free(data.resources[i]); + free(data.resources); + } + } + + otel_process_ctx_read_result otel_process_ctx_read(void) { + otel_process_ctx_mapping *mapping = try_finding_mapping(); + if (!mapping) { + return (otel_process_ctx_read_result) {.success = false, .error_message = "No OTEL_CTX mapping found (" __FILE__ ":" ADD_QUOTES(__LINE__) ")", .data = empty_data}; + } + + if (strncmp(mapping->otel_process_ctx_signature, "OTEL_CTX", sizeof(mapping->otel_process_ctx_signature)) != 0 || mapping->otel_process_ctx_version != 1) { + return (otel_process_ctx_read_result) {.success = false, .error_message = "Invalid OTEL_CTX signature or version (" __FILE__ ":" ADD_QUOTES(__LINE__) ")", .data = empty_data}; + } + + otel_process_ctx_data data = empty_data; + + if (!otel_process_ctx_decode_payload(mapping->otel_process_payload, &data)) { + otel_process_ctx_read_data_drop(data); + return (otel_process_ctx_read_result) {.success = false, .error_message = "Failed to decode payload (" __FILE__ ":" ADD_QUOTES(__LINE__) ")", .data = empty_data}; + } + + return (otel_process_ctx_read_result) {.success = true, .error_message = NULL, .data = data}; + } + + bool otel_process_ctx_read_drop(otel_process_ctx_read_result *result) { + if (!result || !result->success) return false; + + // Free allocated strings in the data + otel_process_ctx_read_data_drop(result->data); + + // Reset the result to empty state + *result = (otel_process_ctx_read_result) {.success = false, .error_message = "Data dropped", .data = empty_data}; + + return true; + } +#endif // OTEL_PROCESS_CTX_NO_READ + +#endif // OTEL_PROCESS_CTX_NOOP diff --git a/ddprof-lib/src/main/cpp/otel_process_ctx.h b/ddprof-lib/src/main/cpp/otel_process_ctx.h new file mode 100644 index 000000000..fbb1b0a30 --- /dev/null +++ b/ddprof-lib/src/main/cpp/otel_process_ctx.h @@ -0,0 +1,143 @@ +#pragma once + +#define OTEL_PROCESS_CTX_VERSION_MAJOR 0 +#define OTEL_PROCESS_CTX_VERSION_MINOR 0 +#define OTEL_PROCESS_CTX_VERSION_PATCH 5 +#define OTEL_PROCESS_CTX_VERSION_STRING "0.0.5" + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/** + * # OpenTelemetry Process Context reference implementation + * + * `otel_process_ctx.h` and `otel_process_ctx.c` provide a reference implementation for the OpenTelemetry + * process-level context sharing specification. (TODO Link) + * + * This reference implementation is Linux-only, as the specification currently only covers Linux. + * On non-Linux OS's (or when OTEL_PROCESS_CTX_NOOP is defined) no-op versions of functions are supplied. + */ + +/** + * Data that can be published as a process context. + * + * Every string MUST be valid for the duration of the call to `otel_process_ctx_publish`. + * Strings will be copied into the context. + * + * Strings MUST be: + * * Non-NULL + * * UTF-8 encoded + * * Not longer than INT16_MAX bytes + * + * Strings MAY be: + * * Empty + * + * The below fields map to usual datadog attributes as follows (TODO: Remove this once we share the header publicly) + * * deployment_environment_name -> env + * * host_name -> hostname + * * service_instance_id -> runtime-id + * * service_name -> service + * * service_version -> version + * * telemetry_sdk_language -> tracer_language + * * telemetry_sdk_version -> tracer_version + * * telemetry_sdk_name -> name of library (e.g. dd-trace-java) + */ +typedef struct { + // https://opentelemetry.io/docs/specs/semconv/registry/attributes/deployment/#deployment-environment-name + char *deployment_environment_name; + // https://opentelemetry.io/docs/specs/semconv/registry/attributes/host/#host-name + char *host_name; + // https://opentelemetry.io/docs/specs/semconv/registry/attributes/service/#service-instance-id + char *service_instance_id; + // https://opentelemetry.io/docs/specs/semconv/registry/attributes/service/#service-name + char *service_name; + // https://opentelemetry.io/docs/specs/semconv/registry/attributes/service/#service-version + char *service_version; + // https://opentelemetry.io/docs/specs/semconv/registry/attributes/telemetry/#telemetry-sdk-language + char *telemetry_sdk_language; + // https://opentelemetry.io/docs/specs/semconv/registry/attributes/telemetry/#telemetry-sdk-version + char *telemetry_sdk_version; + // https://opentelemetry.io/docs/specs/semconv/registry/attributes/telemetry/#telemetry-sdk-name + char *telemetry_sdk_name; + // Additional key/value pairs as resources https://opentelemetry.io/docs/specs/otel/resource/sdk/ + // Can be NULL if no resources are needed; if non-NULL, this array MUST be terminated with a NULL entry. + // Every even entry is a key, every odd entry is a value (E.g. "key1", "value1", "key2", "value2", NULL). + char **resources; +} otel_process_ctx_data; + +/** Number of entries in the `otel_process_ctx_data` struct. Can be used to easily detect when the struct is updated. */ +#define OTEL_PROCESS_CTX_DATA_ENTRIES sizeof(otel_process_ctx_data) / sizeof(char *) + +typedef struct { + bool success; + const char *error_message; // Static strings only, non-NULL if success is false +} otel_process_ctx_result; + +/** + * Publishes a OpenTelemetry process context with the given data. + * + * The context should remain alive until the application exits (or is just about to exit). + * This method is NOT thread-safe. + * + * Calling `publish` multiple times is supported and will replace a previous context (only one is published at any given + * time). Calling `publish` multiple times usually happens when: + * * Some of the `otel_process_ctx_data` changes due to a live system reconfiguration for the same process + * * The process is forked (to provide a new `service_instance_id`) + * + * This API can be called in a fork of the process that published the previous context, even though + * the context is not carried over into forked processes (although part of its memory allocations are). + * + * @param data Pointer to the data to publish. This data is copied into the context and only needs to be valid for the duration of + * the call. Must not be `NULL`. + * @return The result of the operation. + */ +otel_process_ctx_result otel_process_ctx_publish(const otel_process_ctx_data *data); + +/** + * Drops the current OpenTelemetry process context, if any. + * + * This method is safe to call even there's no current context. + * This method is NOT thread-safe. + * + * This API can be called in a fork of the process that published the current context to clean memory allocations + * related to the parent's context (even though the context itself is not carried over into forked processes). + * + * @return `true` if the context was successfully dropped or no context existed, `false` otherwise. + */ +bool otel_process_ctx_drop_current(void); + +/** This can be disabled if no read support is required. */ +#ifndef OTEL_PROCESS_CTX_NO_READ + typedef struct { + bool success; + const char *error_message; // Static strings only, non-NULL if success is false + otel_process_ctx_data data; // Strings are allocated using `malloc` and the caller is responsible for `free`ing them + } otel_process_ctx_read_result; + + /** + * Reads the current OpenTelemetry process context, if any. + * + * Useful for debugging and testing purposes. Underlying returned strings in `data` are dynamically allocated using + * `malloc` and `otel_process_ctx_read_drop` must be called to free them. + * + * Thread-safety: This function assumes there is no concurrent mutation of the process context. + * + * @return The result of the operation. If successful, `data` contains the retrieved context data. + */ + otel_process_ctx_read_result otel_process_ctx_read(void); + + /** + * Drops the data resulting from a previous call to `otel_process_ctx_read`. + * + * @param result The result of a previous call to `otel_process_ctx_read`. Must not be `NULL`. + * @return `true` if the data was successfully dropped, `false` otherwise. + */ + bool otel_process_ctx_read_drop(otel_process_ctx_read_result *result); +#endif + +#ifdef __cplusplus +} +#endif diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/OTelContext.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/OTelContext.java new file mode 100644 index 000000000..f76cfd578 --- /dev/null +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/OTelContext.java @@ -0,0 +1,228 @@ +package com.datadoghq.profiler; + +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.function.Consumer; + +/** + * OpenTelemetry Process Context API for sharing process-level context information. + * + *

This class provides functionality to publish OpenTelemetry semantic conventions + * compliant process context information that can be discovered and read by external + * monitoring tools and profilers. The context is shared via platform-specific + * mechanisms (currently Linux-only) and includes service identification metadata. + * + *

Platform Support: + *

    + *
  • Linux: Full support using anonymous memory mappings with prctl naming
  • + *
  • Others: Limited support - API calls are no-ops
  • + *
+ * + *

Thread Safety: This class is thread-safe. All public methods can be + * called concurrently from multiple threads. + * + *

Usage Example: + *

{@code
+ * // Get the singleton instance
+ * OTelContext context = OTelContext.getInstance();
+ * 
+ * // Set process context for external discovery
+ * context.setProcessContext(...);
+ * }
+ * + *

External Discovery: Once published, the process context can be + * discovered by external tools by scanning /proc/*/maps for mappings named + * [anon:OTEL_CTX] on Linux systems. + * + * @since 1.30.0 + */ +public final class OTelContext { + private static final class SingletonHolder { + static final OTelContext INSTANCE = new OTelContext(); + } + + /** + * Represents the OpenTelemetry process context data. + */ + public static final class ProcessContext { + public final String deploymentEnvironmentName; + public final String hostName; + public final String serviceInstanceId; + public final String serviceName; + public final String serviceVersion; + public final String telemetrySdkLanguage; + public final String telemetrySdkVersion; + public final String telemetrySdkName; + + public ProcessContext(String deploymentEnvironmentName, String hostName, String serviceInstanceId, String serviceName, String serviceVersion, String telemetrySdkLanguage, String telemetrySdkVersion, String telemetrySdkName) { + this.deploymentEnvironmentName = deploymentEnvironmentName; + this.hostName = hostName; + this.serviceInstanceId = serviceInstanceId; + this.serviceName = serviceName; + this.serviceVersion = serviceVersion; + this.telemetrySdkLanguage = telemetrySdkLanguage; + this.telemetrySdkVersion = telemetrySdkVersion; + this.telemetrySdkName = telemetrySdkName; + } + + @Override + public String toString() { + return String.format("ProcessContext{deploymentEnvironmentName='%s', hostName='%s', serviceInstanceId='%s', serviceName='%s', serviceVersion='%s', telemetrySdkLanguage='%s', telemetrySdkVersion='%s', telemetrySdkName='%s'}", + deploymentEnvironmentName, hostName, serviceInstanceId, serviceName, serviceVersion, telemetrySdkLanguage, telemetrySdkVersion, telemetrySdkName); + } + } + + /** + * Returns the singleton instance of the OpenTelemetry process context. + * + *

This method provides access to the globally shared OTelContext instance + * using the lazy initialization pattern. The instance is created on first access + * and reused for all subsequent calls. + * + *

Note: If library loading fails during initialization, a warning + * will be printed to System.out, but a valid (though non-functional) instance + * will still be returned. + * + * @return the singleton OTelContext instance, never null + */ + public static OTelContext getInstance() { + return SingletonHolder.INSTANCE; + } + + private final LibraryLoader.Result libraryLoadResult; + private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + + /** + * Private constructor for singleton instance. + * + *

Initializes the native library and handles any loading failures gracefully + * by printing warnings to System.out. + */ + private OTelContext() { + LibraryLoader.Result result = LibraryLoader.builder().load(); + if (!result.succeeded ) { + System.out.println("[WARNING] Failed to obtain OTel context.\n" + result.error); + } + libraryLoadResult = result; + } + + /** + * Creates a custom OTelContext instance with specific library loading configuration. + * + *

This constructor allows for advanced configuration of the native library loading + * process, including custom library locations and error handling. Most users should + * use {@link #getInstance()} instead. + * + *

Warning: Creating multiple instances may lead to undefined behavior + * as the underlying native library maintains global state. + * + * @param libLocation the custom library location, or null to use default discovery + * @param scratchDir the scratch directory for temporary files, or null for system default + * @param errorHandler custom error handler for library loading failures, or null + * to print warnings to System.out + */ + // @VisibleForTesting + OTelContext(String libLocation, String scratchDir, Consumer errorHandler) { + LibraryLoader.Result result = LibraryLoader.builder().withLibraryLocation(libLocation).withScratchDir(scratchDir).load(); + if (!result.succeeded && result.error != null) { + if (errorHandler != null) { + errorHandler.accept(result.error); + } else { + System.out.println("[WARNING] Failed to obtain OTelContext access.\n" + result.error); + } + } + libraryLoadResult = result; + } + + /** + * Reads the currently published OpenTelemetry process context, if any. + * + *

This method attempts to read back the process context that was previously + * published via {@link #setProcessContext(String, String, String, String, String, String)}. This is + * primarily useful for debugging and testing purposes. + * + *

Platform Support: Currently only supported on Linux. On other + * platforms, this method will return null. + * + * @return a ProcessContext object containing the current context data if + * successfully read, or null if no context is published or reading failed + * @since 1.30.0 + */ + public ProcessContext readProcessContext() { + if (!libraryLoadResult.succeeded) { + return null; + } + try { + lock.readLock().lock(); + return readProcessCtx0(); + } finally { + lock.readLock().unlock(); + } + } + + /** + * Sets the OpenTelemetry process context for external discovery and monitoring. + * + *

This method publishes process-level context information following OpenTelemetry + * semantic conventions. The context is made available to external monitoring tools + * and profilers through platform-specific mechanisms. + * + *

On Linux: Creates a named anonymous memory mapping that can be + * discovered by external tools scanning /proc/*/maps for [anon:OTEL_CTX] + * entries. + * + *

On other platforms: This method is a no-op as process context + * sharing is not currently supported. + * + *

Context Lifecycle: The published context remains active until + * the process exits. Calling this method multiple times will replace the previous + * context with the new values. + * + *

Usage Example: + *

{@code
+     * OTelContext.getInstance().setProcessContext(
+     *     "staging",           // env
+     *     "my-hostname",       // hostname
+     *     "instance-12345"     // runtime-id
+     *     "my-service",        // service
+     *     "1.0.0",             // version
+     *     "3.5.0"              // tracer-version
+     * );
+     * }
+ * + * @param env the deployment environment name as defined by OpenTelemetry + * semantic conventions (deployment.environment.name). Must not be null. + * Examples: "production", "staging", "development", "test" + * @param hostname the hostname of the service as defined by OpenTelemetry + * semantic conventions (host.name). Must not be null. + * Examples: "my-hostname", "my-hostname.example.com" + * @param runtimeId the unique identifier for this specific instance of the service + * as defined by OpenTelemetry semantic conventions (service.instance.id). + * Must not be null. + * @param service the logical name of the service as defined by OpenTelemetry + * semantic conventions (service.name). Must not be null. + * Examples: "order-service", "user-management", "payment-processor" + * @param version the version of the service as defined by OpenTelemetry + * semantic conventions (service.version). Must not be null. + * Examples: "1.0.0", "2.3.4" + * @param tracerVersion the version of the tracer as defined by OpenTelemetry + * semantic conventions (telemetry.sdk.version). Must not be null. + * Examples: "3.5.0", "4.2.0" + * * + * @see OpenTelemetry Service Attributes + * @see OpenTelemetry Deployment Attributes + */ + public void setProcessContext(String env, String hostname, String runtimeId, String service, String version, String tracerVersion) { + if (!libraryLoadResult.succeeded) { + return; + } + try { + lock.writeLock().lock(); + setProcessCtx0(env, hostname, runtimeId, service, version, tracerVersion); + } finally { + lock.writeLock().unlock(); + } + } + + private static native void setProcessCtx0(String env, String hostname, String runtimeId, String service, String version, String tracerVersion); + private static native ProcessContext readProcessCtx0(); +} diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/context/ProcessContextTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/context/ProcessContextTest.java new file mode 100644 index 000000000..fac9421a1 --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/context/ProcessContextTest.java @@ -0,0 +1,110 @@ +package com.datadoghq.profiler.context; + +import com.datadoghq.profiler.OTelContext; +import com.datadoghq.profiler.Platform; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.Test; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static org.junit.jupiter.api.Assertions.*; + +public class ProcessContextTest { + + @Test + public void testProcessContextMappingCreation() throws IOException { + Assumptions.assumeTrue(Platform.isLinux()); + + String env = "test-env"; + String hostname = "test-hostname"; + String runtimeId = "test-instance-123"; + String service = "test-service"; + String version = "1.0.0"; + String tracerVersion = "3.5.0"; + + OTelContext.getInstance().setProcessContext(env, hostname, runtimeId, service, version, tracerVersion); + + OtelMappingInfo mapping = findOtelMapping(); + assertNotNull(mapping, "OTEL mapping should exist after setProcessContext"); + + verifyMappingPermissions(mapping); + } + + private static class OtelMappingInfo { + final String startAddress; + final String endAddress; + final String permissions; + + OtelMappingInfo(String startAddress, String endAddress, String permissions) { + this.startAddress = startAddress; + this.endAddress = endAddress; + this.permissions = permissions; + } + } + + private OtelMappingInfo findOtelMapping() throws IOException { + Path mapsFile = Paths.get("/proc/self/maps"); + if (!Files.exists(mapsFile)) { + return null; + } + + Pattern otelPattern = Pattern.compile("^([0-9a-f]+)-([0-9a-f]+)\\s+(\\S+)\\s+\\S+\\s+\\S+\\s+\\S+\\s*\\[anon:OTEL_CTX\\].*$"); + + try (BufferedReader reader = Files.newBufferedReader(mapsFile)) { + String line; + while ((line = reader.readLine()) != null) { + Matcher matcher = otelPattern.matcher(line); + if (matcher.matches()) { + return new OtelMappingInfo( + matcher.group(1), + matcher.group(2), + matcher.group(3) + ); + } + } + } + return null; + } + + private void verifyMappingPermissions(OtelMappingInfo mapping) { + assertTrue(mapping.permissions.contains("r"), + "OTEL mapping should have read permission, got: " + mapping.permissions); + assertFalse(mapping.permissions.contains("w"), + "OTEL mapping should not have write permission, got: " + mapping.permissions); + assertFalse(mapping.permissions.contains("x"), + "OTEL mapping should not have execute permission, got: " + mapping.permissions); + } + + @Test + public void testNativeReadBackFunctionality() { + Assumptions.assumeTrue(Platform.isLinux()); + + String env = "test-env"; + String hostname = "test-hostname"; + String runtimeId = "test-instance-123"; + String service = "test-service"; + String version = "1.0.0"; + String tracerVersion = "3.5.0"; + + OTelContext context = OTelContext.getInstance(); + context.setProcessContext(env, hostname, runtimeId, service, version, tracerVersion); + + OTelContext.ProcessContext readContext = context.readProcessContext(); + + assertEquals(env, readContext.deploymentEnvironmentName, "Environment name should match"); + assertEquals(hostname, readContext.hostName, "Host name should match"); + assertEquals(runtimeId, readContext.serviceInstanceId, "Service instance ID should match"); + assertEquals(service, readContext.serviceName, "Service name should match"); + assertEquals(version, readContext.serviceVersion, "Service version should match"); + assertEquals("java", readContext.telemetrySdkLanguage, "Tracer language should match"); + assertEquals(tracerVersion, readContext.telemetrySdkVersion, "Tracer version should match"); + assertEquals("dd-trace-java", readContext.telemetrySdkName, "Tracer name should match"); + } +} diff --git a/gradle/ap-lock.properties b/gradle/ap-lock.properties deleted file mode 100644 index 84c171865..000000000 --- a/gradle/ap-lock.properties +++ /dev/null @@ -1,2 +0,0 @@ -branch=dd/master -commit=5930966a92860f6e5d2d89ab6faab5815720bad9 \ No newline at end of file diff --git a/gradle/lock.properties b/gradle/lock.properties new file mode 100644 index 000000000..7842d7640 --- /dev/null +++ b/gradle/lock.properties @@ -0,0 +1,5 @@ +ap.branch=dd/master +ap.commit=5930966a92860f6e5d2d89ab6faab5815720bad9 + +ctx_branch=main +ctx_commit=b33673d801b85a6c38fa0e9f1a139cb246737ce8