From bb000b7e2f092b4f5ce2ed5b06aa630450f2eb92 Mon Sep 17 00:00:00 2001 From: cloud-profiler-team Date: Tue, 2 Jul 2019 03:29:22 -0700 Subject: [PATCH] Update profiler Java agent to the latest version. PiperOrigin-RevId: 256133819 --- Dockerfile | 45 ++- Makefile | 15 +- src/cloud_env.cc | 65 +++- src/cloud_profiler_java_agent.lds | 10 +- src/entry.cc | 55 ++- src/globals.h | 1 + src/http.cc | 1 + src/{config_dataflow_jni.cc => jni.cc} | 21 +- src/pem_roots.cc | 23 -- src/profiler.cc | 24 +- src/profiler.h | 2 +- src/proto.cc | 21 +- src/proto.h | 3 +- src/throttler.h | 5 + src/throttler_api.cc | 82 +++- src/throttler_api.h | 19 +- src/throttler_timed.cc | 54 ++- src/throttler_timed.h | 10 +- src/worker.cc | 93 +++-- src/worker.h | 1 + third_party/javaprofiler/accessors.cc | 54 +++ third_party/javaprofiler/accessors.h | 104 +++++ .../javaprofiler/async_ref_counted_string.cc | 192 +++++++++ .../javaprofiler/async_ref_counted_string.h | 112 ++++++ third_party/javaprofiler/clock.cc | 10 + third_party/javaprofiler/clock.h | 15 +- third_party/javaprofiler/display.cc | 71 ++-- third_party/javaprofiler/display.h | 10 +- third_party/javaprofiler/globals.h | 52 +-- third_party/javaprofiler/heap_sampler.cc | 365 ++++++++++++++++++ third_party/javaprofiler/heap_sampler.h | 205 ++++++++++ third_party/javaprofiler/native.h | 2 +- .../javaprofiler/profile_proto_builder.cc | 106 +++-- .../javaprofiler/profile_proto_builder.h | 155 +++++--- third_party/javaprofiler/profile_test_lib.cc | 55 +++ third_party/javaprofiler/profile_test_lib.h | 42 +- third_party/javaprofiler/stacktraces.cc | 65 ++-- third_party/javaprofiler/stacktraces.h | 32 +- third_party/javaprofiler/tags.cc | 195 ++++++++++ third_party/javaprofiler/tags.h | 92 +++++ .../perftools/profiles/proto/builder.cc | 27 +- .../perftools/profiles/proto/builder.h | 43 ++- .../perftools/profiles/proto/profile.proto | 22 +- 43 files changed, 2146 insertions(+), 430 deletions(-) rename src/{config_dataflow_jni.cc => jni.cc} (68%) create mode 100644 third_party/javaprofiler/accessors.cc create mode 100644 third_party/javaprofiler/accessors.h create mode 100644 third_party/javaprofiler/async_ref_counted_string.cc create mode 100644 third_party/javaprofiler/async_ref_counted_string.h create mode 100644 third_party/javaprofiler/heap_sampler.cc create mode 100644 third_party/javaprofiler/heap_sampler.h create mode 100644 third_party/javaprofiler/tags.cc create mode 100644 third_party/javaprofiler/tags.h diff --git a/Dockerfile b/Dockerfile index 8164065c8..30f686dfd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,24 +15,34 @@ # # Base image # -FROM debian:jessie +FROM ubuntu:trusty # # Dependencies # +# Install JDK 11 as sampling heap profiler depends on the new JVMTI APIs. +RUN apt-get update && apt-get install -y software-properties-common +RUN add-apt-repository -y ppa:openjdk-r/ppa +RUN apt-get update + +# Installing openjdk-11-jdk-headless can be very slow due to repo download +# speed. + # Everything we can get through apt-get RUN apt-get update && apt-get -y -q install \ apt-utils \ autoconf \ automake \ + cmake \ curl \ g++ \ git \ + libcurl4-openssl-dev \ libssl-dev \ libtool \ make \ - openjdk-7-jdk \ + openjdk-11-jdk-headless \ python \ unzip \ zlib1g-dev @@ -47,13 +57,6 @@ RUN git clone --depth=1 -b curl-7_55_1 https://github.com/curl/curl.git /tmp/cur make -j && make install && \ cd ~ && rm -rf /tmp/curl -# cmake -RUN git clone -b v3.6.2 https://github.com/Kitware/CMake.git /tmp/cmake && \ - cd /tmp/cmake && \ - ./bootstrap && \ - make -j && make install && \ - cd ~ && rm -rf /tmp/cmake - # gflags RUN git clone --depth=1 -b v2.1.2 https://github.com/gflags/gflags.git /tmp/gflags && \ cd /tmp/gflags && \ @@ -90,12 +93,26 @@ RUN mkdir /tmp/openssl && cd /tmp/openssl && \ # process of gRPC puts the OpenSSL static object files into the gRPC archive # which causes link errors later when the agent is linked with the static # OpenSSL library itself. -RUN git clone --depth=1 --recursive -b v1.15.0 https://github.com/grpc/grpc.git /tmp/grpc && \ +RUN git clone --depth=1 --recursive -b v1.21.0 https://github.com/grpc/grpc.git /tmp/grpc && \ cd /tmp/grpc && \ + +# TODO: Remove patch when GKE Istio versions support HTTP 1.0 or +# gRPC http_cli supports HTTP 1.1 +# This sed command is needed until GKE provides Istio 1.1+ exclusively which +# supports HTTP 1.0. + sed -i 's/1\.0/1.1/g' src/core/lib/http/format_request.cc && \ + +# TODO: Remove patch when GKE Istio versions support unambiguous +# FQDNs in rule sets. +# https://github.com/istio/istio/pull/14405 is merged but wait till GKE +# Istio versions includes this PR + sed -i 's/metadata\.google\.internal\./metadata.google.internal/g' src/core/lib/security/credentials/google_default/google_default_credentials.cc && \ + sed -i 's/metadata\.google\.internal\./metadata.google.internal/g' src/core/lib/security/credentials/credentials.h && \ cd third_party/protobuf && \ - ./autogen.sh && ./configure --with-pic CXXFLAGS=-Os && make -j && make install && ldconfig && \ + ./autogen.sh && \ + ./configure --with-pic CXXFLAGS="$(pkg-config --cflags protobuf)" LIBS="$(pkg-config --libs protobuf)" LDFLAGS="-Wl,--no-as-needed" && \ + make -j && make install && ldconfig && \ cd ../.. && \ - CPPFLAGS="-I /usr/local/ssl/include" make -j CONFIG=opt EMBED_OPENSSL=false V=1 HAS_SYSTEM_OPENSSL_NPN=0 && \ - CPPFLAGS="-I /usr/local/ssl/include" make CONFIG=opt EMBED_OPENSSL=false V=1 HAS_SYSTEM_OPENSSL_NPN=0 install && \ + CPPFLAGS="-I /usr/local/ssl/include" LDFLAGS="-Wl,--no-as-needed" make -j CONFIG=opt EMBED_OPENSSL=false V=1 HAS_SYSTEM_OPENSSL_NPN=0 && \ + CPPFLAGS="-I /usr/local/ssl/include" LDFLAGS="-Wl,--no-as-needed" make CONFIG=opt EMBED_OPENSSL=false V=1 HAS_SYSTEM_OPENSSL_NPN=0 install && \ rm -rf /tmp/grpc - diff --git a/Makefile b/Makefile index cea17785a..75a144a81 100644 --- a/Makefile +++ b/Makefile @@ -29,10 +29,11 @@ CFLAGS = \ -g0 \ -DSTANDALONE_BUILD \ -D_GNU_SOURCE \ + -DENABLE_HEAP_SAMPLING SRC_ROOT_PATH=. -JAVA_PATH ?= /usr/lib/jvm/java-7-openjdk-amd64 +JAVA_PATH ?= /usr/lib/jvm/java-11-openjdk-amd64 PROTOC ?= /usr/local/bin/protoc PROTOC_GEN_GRPC ?= /usr/local/bin/grpc_cpp_plugin @@ -70,23 +71,33 @@ PROFILER_API_SOURCES = \ $(GENFILES_PATH)/google/rpc/error_details.pb.cc \ JAVAPROFILER_LIB_SOURCES = \ + $(JAVAPROFILER_LIB_PATH)/accessors.cc \ + $(JAVAPROFILER_LIB_PATH)/async_ref_counted_string.cc \ $(JAVAPROFILER_LIB_PATH)/clock.cc \ $(JAVAPROFILER_LIB_PATH)/display.cc \ + $(JAVAPROFILER_LIB_PATH)/heap_sampler.cc \ $(JAVAPROFILER_LIB_PATH)/native.cc \ + $(JAVAPROFILER_LIB_PATH)/profile_proto_builder.cc \ $(JAVAPROFILER_LIB_PATH)/stacktrace_fixer.cc \ $(JAVAPROFILER_LIB_PATH)/stacktraces.cc \ + $(JAVAPROFILER_LIB_PATH)/tags.cc \ # Add any header not already as a .cc in JAVAPROFILER_LIB_SOURCES. JAVAPROFILER_LIB_HEADERS = \ + $(JAVAPROFILER_LIB_PATH)/accessors.h \ + $(JAVAPROFILER_LIB_PATH)/async_ref_counted_string.h \ + $(JAVAPROFILER_LIB_PATH)/heap_sampler.h \ $(JAVAPROFILER_LIB_PATH)/jvmti_error.h \ + $(JAVAPROFILER_LIB_PATH)/profile_proto_builder.h \ $(JAVAPROFILER_LIB_PATH)/stacktrace_decls.h \ $(JAVAPROFILER_LIB_PATH)/stacktraces.h \ + $(JAVAPROFILER_LIB_PATH)/tags.h \ SOURCES = \ $(JAVA_AGENT_PATH)/cloud_env.cc \ - $(JAVA_AGENT_PATH)/config_dataflow_jni.cc \ $(JAVA_AGENT_PATH)/entry.cc \ $(JAVA_AGENT_PATH)/http.cc \ + $(JAVA_AGENT_PATH)/jni.cc \ $(JAVA_AGENT_PATH)/pem_roots.cc \ $(JAVA_AGENT_PATH)/profiler.cc \ $(JAVA_AGENT_PATH)/proto.cc \ diff --git a/src/cloud_env.cc b/src/cloud_env.cc index 607380ced..2e68d1b30 100644 --- a/src/cloud_env.cc +++ b/src/cloud_env.cc @@ -18,15 +18,19 @@ #include #include +#include "src/clock.h" #include "src/http.h" #include "src/string.h" +DEFINE_int32(cprof_gce_metadata_server_retry_count, 3, + "Number of retries to Google Compute Engine metadata host"); +DEFINE_int32( + cprof_gce_metadata_server_retry_sleep_sec, 1, + "Seconds to sleep between retries to Google Compute Engine metadata host"); DEFINE_string(cprof_gce_metadata_server_address, "169.254.169.254:80", "Google Compute Engine metadata host to use"); - DEFINE_string(cprof_access_token_test_only, "", "override OAuth2 access token for testing"); - DEFINE_string(cprof_project_id, "", "cloud project ID"); DEFINE_string(cprof_zone_name, "", "zone name"); DEFINE_string(cprof_service, "", "deployment service name"); @@ -47,23 +51,40 @@ const char kNoData[] = ""; namespace { string GceMetadataRequest(HTTPRequest* req, const string& path) { + Clock* clock = DefaultClock(); req->AddHeader("Metadata-Flavor", "Google"); req->SetTimeout(2); // seconds string url = FLAGS_cprof_gce_metadata_server_address + path, resp; - if (!req->DoGet(url, &resp)) { - LOG(ERROR) << "Error making HTTP request to the GCE metadata server"; - return kNoData; - } - int resp_code = req->GetResponseCode(); - if (resp_code != kHTTPStatusOK) { - LOG(ERROR) << "Request to the GCE metadata server failed, status code: " - << resp_code; - return kNoData; + int retry_sleep_sec = FLAGS_cprof_gce_metadata_server_retry_sleep_sec; + int retry_count = FLAGS_cprof_gce_metadata_server_retry_count; + struct timespec retry_ts = NanosToTimeSpec(kNanosPerSecond * retry_sleep_sec); + + for (int i = 0; i <= retry_count; i++) { + if (!req->DoGet(url, &resp)) { + if (i < retry_count) { + LOG(ERROR) << "Error making HTTP request for " << url + << " to the GCE metadata server. " + << "Will retry in " << retry_ts.tv_sec << "s"; + clock->SleepFor(retry_ts); + } else { + LOG(ERROR) << "Error making HTTP request for " << url + << " to the GCE metadata server."; + } + continue; + } + int resp_code = req->GetResponseCode(); + if (resp_code != kHTTPStatusOK) { + LOG(ERROR) << "Request to the GCE metadata server for " << url + << " failed, status code: " << resp_code; + return kNoData; + } + return resp; } - - return resp; + LOG(ERROR) << "Unable to contact GCE metadata server for " << url << " after " + << retry_count << " retries."; + return kNoData; } const char* Getenv(const string& var) { @@ -82,15 +103,25 @@ CloudEnv::CloudEnv() { } else if (!FLAGS_cprof_target.empty()) { service_ = FLAGS_cprof_target; } else { - const char* val = Getenv("GAE_SERVICE"); - service_ = val == nullptr ? "" : val; + for (const string& env_var : {"GAE_SERVICE", "K_SERVICE"}) { + const char* val = Getenv(env_var); + if (val != nullptr) { + service_ = val; + break; + } + } } if (!FLAGS_cprof_service_version.empty()) { service_version_ = FLAGS_cprof_service_version; } else { - const char* val = Getenv("GAE_VERSION"); - service_version_ = val == nullptr ? "" : val; + for (const string& env_var : {"GAE_VERSION", "K_REVISION"}) { + const char* val = Getenv(env_var); + if (val != nullptr) { + service_version_ = val; + break; + } + } } if (!FLAGS_cprof_project_id.empty()) { diff --git a/src/cloud_profiler_java_agent.lds b/src/cloud_profiler_java_agent.lds index 25fdfdaff..cb432ed37 100644 --- a/src/cloud_profiler_java_agent.lds +++ b/src/cloud_profiler_java_agent.lds @@ -2,11 +2,11 @@ VERS_1.0 { global: Agent_OnLoad; Agent_OnUnload; - Java_com_google_cloud_dataflow_worker_profiler_Profiler_disable; - Java_com_google_cloud_dataflow_worker_profiler_Profiler_enable; - Java_com_google_cloud_dataflow_worker_profiler_Profiler_getAttribute; - Java_com_google_cloud_dataflow_worker_profiler_Profiler_registerAttribute; - Java_com_google_cloud_dataflow_worker_profiler_Profiler_setAttribute; + Java_org_apache_beam_runners_dataflow_worker_profiler_Profiler_disable; + Java_org_apache_beam_runners_dataflow_worker_profiler_Profiler_enable; + Java_org_apache_beam_runners_dataflow_worker_profiler_Profiler_getAttribute; + Java_org_apache_beam_runners_dataflow_worker_profiler_Profiler_registerAttribute; + Java_org_apache_beam_runners_dataflow_worker_profiler_Profiler_setAttribute; local: *; }; diff --git a/src/entry.cc b/src/entry.cc index 4d5eb8aa6..501a14da4 100644 --- a/src/entry.cc +++ b/src/entry.cc @@ -18,7 +18,9 @@ #include "src/string.h" #include "src/worker.h" +#include "third_party/javaprofiler/accessors.h" #include "third_party/javaprofiler/globals.h" +#include "third_party/javaprofiler/heap_sampler.h" #include "third_party/javaprofiler/stacktraces.h" DEFINE_bool(cprof_cpu_use_per_thread_timers, false, @@ -28,6 +30,10 @@ DEFINE_bool(cprof_force_debug_non_safepoints, true, "when true, force DebugNonSafepoints flag by subscribing to the" "code generation events. This improves the accuracy of profiles," "but may incur a bit of overhead."); +DEFINE_bool(cprof_enable_heap_sampling, false, + "when unset, heap allocation sampling is disabled"); +DEFINE_int32(cprof_heap_sampling_interval, 512 * 1024, + "sampling interval for heap allocation sampling, 512k by default"); namespace cloud { namespace profiler { @@ -111,6 +117,12 @@ void JNICALL OnVMInit(jvmtiEnv *jvmti, JNIEnv *jni_env, jthread thread) { jclass klass = class_list[i]; CreateJMethodIDsForClass(jvmti, klass); } + + if (FLAGS_cprof_enable_heap_sampling) { + google::javaprofiler::HeapMonitor::Enable( + jvmti, jni_env, FLAGS_cprof_heap_sampling_interval); + } + worker->Start(jni_env); } @@ -132,9 +144,13 @@ void JNICALL OnVMDeath(jvmtiEnv *jvmti_env, JNIEnv *jni_env) { worker->Stop(); delete worker; worker = NULL; + + if (google::javaprofiler::HeapMonitor::Enabled()) { + google::javaprofiler::HeapMonitor::Disable(); + } } -static bool PrepareJvmti(jvmtiEnv *jvmti) { +static bool PrepareJvmti(JavaVM *vm, jvmtiEnv *jvmti) { LOG(INFO) << "Prepare JVMTI"; // Set the list of permissions to do the various internal VM things @@ -175,20 +191,23 @@ static bool PrepareJvmti(jvmtiEnv *jvmti) { return false; } } + return true; } static bool RegisterJvmti(jvmtiEnv *jvmti) { // Create the list of callbacks to be called on given events. - jvmtiEventCallbacks *callbacks = new jvmtiEventCallbacks(); - memset(callbacks, 0, sizeof(jvmtiEventCallbacks)); + jvmtiEventCallbacks callbacks; + memset(&callbacks, 0, sizeof(jvmtiEventCallbacks)); - callbacks->ThreadStart = &OnThreadStart; - callbacks->ThreadEnd = &OnThreadEnd; - callbacks->VMInit = &OnVMInit; - callbacks->VMDeath = &OnVMDeath; - callbacks->ClassLoad = &OnClassLoad; - callbacks->ClassPrepare = &OnClassPrepare; + callbacks.ThreadStart = &OnThreadStart; + callbacks.ThreadEnd = &OnThreadEnd; + callbacks.VMInit = &OnVMInit; + callbacks.VMDeath = &OnVMDeath; + callbacks.ClassLoad = &OnClassLoad; + callbacks.ClassPrepare = &OnClassPrepare; + + google::javaprofiler::HeapMonitor::AddCallback(&callbacks); std::vector events = { JVMTI_EVENT_CLASS_LOAD, JVMTI_EVENT_CLASS_PREPARE, @@ -197,12 +216,12 @@ static bool RegisterJvmti(jvmtiEnv *jvmti) { }; if (FLAGS_cprof_force_debug_non_safepoints) { - callbacks->CompiledMethodLoad = &OnCompiledMethodLoad; + callbacks.CompiledMethodLoad = &OnCompiledMethodLoad; events.push_back(JVMTI_EVENT_COMPILED_METHOD_LOAD); } JVMTI_ERROR_1( - (jvmti->SetEventCallbacks(callbacks, sizeof(jvmtiEventCallbacks))), + (jvmti->SetEventCallbacks(&callbacks, sizeof(jvmtiEventCallbacks))), false); // Enable the callbacks to be triggered when the events occur. @@ -248,16 +267,21 @@ jint JNICALL Agent_OnLoad(JavaVM *vm, char *options, void *reserved) { ParseArguments(options); // Initializes logger -- do not log before this call LOG(INFO) << "Profiler agent loaded"; - google::javaprofiler::Accessors::Init(); google::javaprofiler::AttributeTable::Init(); - if ((err = (vm->GetEnv(reinterpret_cast(&jvmti), JVMTI_VERSION))) != - JNI_OK) { + // Try to get the latest JVMTI_VERSION the agent was built with. + err = vm->GetEnv(reinterpret_cast(&jvmti), JVMTI_VERSION); + if (err == JNI_EVERSION) { + // The above call can fail if the VM is actually from an older VM, therefore + // try to get an older JVMTI (compatible with JDK8). + err = (vm->GetEnv(reinterpret_cast(&jvmti), JVMTI_VERSION_1_2)); + } + if (err != JNI_OK) { LOG(ERROR) << "JNI Error " << err; return 1; } - if (!PrepareJvmti(jvmti)) { + if (!PrepareJvmti(vm, jvmti)) { LOG(ERROR) << "Failed to initialize JVMTI. Continuing..."; return 0; } @@ -285,7 +309,6 @@ jint JNICALL Agent_OnLoad(JavaVM *vm, char *options, void *reserved) { void JNICALL Agent_OnUnload(JavaVM *vm) { IMPLICITLY_USE(vm); - google::javaprofiler::Accessors::Destroy(); } } // namespace profiler diff --git a/src/globals.h b/src/globals.h index 515f1b738..764a61bc3 100644 --- a/src/globals.h +++ b/src/globals.h @@ -26,6 +26,7 @@ #include "third_party/javaprofiler/jvmti_error.h" #include "third_party/javaprofiler/stacktraces.h" + #include #include diff --git a/src/http.cc b/src/http.cc index 444559b0f..42be74038 100644 --- a/src/http.cc +++ b/src/http.cc @@ -58,6 +58,7 @@ void HTTPRequest::SetTimeout(int timeout_sec) { bool HTTPRequest::DoGet(const string& url, string *data) { data->clear(); + curl_easy_setopt(curl_, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); curl_easy_setopt(curl_, CURLOPT_WRITEDATA, data); curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, ResponseCallback); return DoRequest(url); diff --git a/src/config_dataflow_jni.cc b/src/jni.cc similarity index 68% rename from src/config_dataflow_jni.cc rename to src/jni.cc index c9fb1a923..a85bdece0 100644 --- a/src/config_dataflow_jni.cc +++ b/src/jni.cc @@ -15,24 +15,23 @@ #include #include "src/worker.h" +#include "third_party/javaprofiler/accessors.h" #include "third_party/javaprofiler/stacktraces.h" -extern "C" AGENTEXPORT -void JNICALL -Java_com_google_cloud_dataflow_worker_profiler_Profiler_enable( - JNIEnv *, jclass) { +extern "C" AGENTEXPORT void JNICALL +Java_org_apache_beam_runners_dataflow_worker_profiler_Profiler_enable(JNIEnv *, + jclass) { cloud::profiler::Worker::EnableProfiling(); } -extern "C" AGENTEXPORT -void JNICALL -Java_com_google_cloud_dataflow_worker_profiler_Profiler_disable( - JNIEnv *, jclass) { +extern "C" AGENTEXPORT void JNICALL +Java_org_apache_beam_runners_dataflow_worker_profiler_Profiler_disable(JNIEnv *, + jclass) { cloud::profiler::Worker::DisableProfiling(); } extern "C" AGENTEXPORT jint JNICALL -Java_com_google_cloud_dataflow_worker_profiler_Profiler_registerAttribute( +Java_org_apache_beam_runners_dataflow_worker_profiler_Profiler_registerAttribute( JNIEnv *env, jclass, jstring value) { const char *value_utf = env->GetStringUTFChars(value, nullptr); int ret = google::javaprofiler::AttributeTable::RegisterString(value_utf); @@ -41,7 +40,7 @@ Java_com_google_cloud_dataflow_worker_profiler_Profiler_registerAttribute( } extern "C" AGENTEXPORT jint JNICALL -Java_com_google_cloud_dataflow_worker_profiler_Profiler_setAttribute( +Java_org_apache_beam_runners_dataflow_worker_profiler_Profiler_setAttribute( JNIEnv *env, jclass, jint attr) { int64_t ret = google::javaprofiler::Accessors::GetAttribute(); google::javaprofiler::Accessors::SetAttribute(static_cast(attr)); @@ -49,7 +48,7 @@ Java_com_google_cloud_dataflow_worker_profiler_Profiler_setAttribute( } extern "C" AGENTEXPORT jint JNICALL -Java_com_google_cloud_dataflow_worker_profiler_Profiler_getAttribute( +Java_org_apache_beam_runners_dataflow_worker_profiler_Profiler_getAttribute( JNIEnv *env, jclass) { int64_t ret = google::javaprofiler::Accessors::GetAttribute(); return static_cast(ret); diff --git a/src/pem_roots.cc b/src/pem_roots.cc index 7a2cb8c11..2225a1c91 100644 --- a/src/pem_roots.cc +++ b/src/pem_roots.cc @@ -973,29 +973,6 @@ JJUEeKgDu+6B5dpffItKoZB0JaezPkvILFa9x8jvOOJckvB595yEunQtYQEgfn7R # Note: "GlobalSign Root CA - R7" not added on purpose. It is P-521. -# Operating CA: GlobalSign -# Issuer: C=BE, O=GlobalSign nv-sa, OU=Root CA, CN=GlobalSign Root CA - R8 -# Subject: C=BE, O=GlobalSign nv-sa, OU=Root CA, CN=GlobalSign Root CA - R8 -# Label: "GlobalSign Root CA - R8" -# Serial: 1462505469299036457243287072048861 -# MD5 Fingerprint: 26:15:db:de:38:b4:45:5e:19:3f:1b:57:af:53:2b:36 -# SHA1 Fingerprint: 62:01:ff:ce:4f:09:cd:c7:e0:2f:e1:10:f4:fd:67:f0:37:1a:2f:2a -# SHA256 Fingerprint: ae:48:51:ff:42:03:9b:ad:e0:58:27:91:51:d8:26:83:04:1d:25:98:e2:40:68:3c:c5:6d:76:fb:8c:f5:3d:42 ------BEGIN CERTIFICATE----- -MIICMzCCAbmgAwIBAgIOSBtqCfT5YHE6/oHMht0wCgYIKoZIzj0EAwMwXDELMAkG -A1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExEDAOBgNVBAsTB1Jv -b3QgQ0ExIDAeBgNVBAMTF0dsb2JhbFNpZ24gUm9vdCBDQSAtIFI4MB4XDTE2MDYx -NTAwMDAwMFoXDTM2MDYxNTAwMDAwMFowXDELMAkGA1UEBhMCQkUxGTAXBgNVBAoT -EEdsb2JhbFNpZ24gbnYtc2ExEDAOBgNVBAsTB1Jvb3QgQ0ExIDAeBgNVBAMTF0ds -b2JhbFNpZ24gUm9vdCBDQSAtIFI4MHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEuO58 -MIfYlB9Ua22Ynfx1+1uIq0K6jX05ft1EPTk84QWhSmRgrDemc7D5yUVLCwbQOuDx -bV/6XltaUrV240bb1R6MdHpCyUE1T8bU4ihgqzSKzrFAI0alrhkkUnyQVUTOo0Iw -QDAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQULzoS -JoDoisJQeG0GxDR+4kk5V3YwCgYIKoZIzj0EAwMDaAAwZQIxAMehPbKSkPrKXeAn -hII7Icz0jfiUVvIgXxHArLxfFaULyBZDp/jFf40goH9e/BYcJwIwHoz1Vr8425zm -pteEKebfDVMu6CsBt30JPLEyahqauArq6K0I8nQ51SsiNtzvRmbY ------END CERTIFICATE----- - # Operating CA: GoDaddy # Issuer: CN=Go Daddy Root Certificate Authority - G2 O=GoDaddy.com, Inc. # Subject: CN=Go Daddy Root Certificate Authority - G2 O=GoDaddy.com, Inc. diff --git a/src/profiler.cc b/src/profiler.cc index df91023e5..90dde14a6 100644 --- a/src/profiler.cc +++ b/src/profiler.cc @@ -25,6 +25,7 @@ #include "src/clock.h" #include "src/globals.h" #include "src/proto.h" +#include "third_party/javaprofiler/accessors.h" DEFINE_int32(cprof_wall_num_threads_cutoff, 4096, "Do not take wall profiles if more than this # of threads exist."); @@ -37,6 +38,8 @@ DEFINE_bool(cprof_record_native_stack, false, namespace cloud { namespace profiler { +using google::javaprofiler::AlmostThere; + google::javaprofiler::AsyncSafeTraceMultiset *Profiler::fixed_traces_ = nullptr; std::atomic Profiler::unknown_stack_count_; @@ -151,8 +154,7 @@ bool SignalHandler::SetSigprofInterval(int64_t period_usec) { timer.it_interval.tv_usec = period_usec; timer.it_value = timer.it_interval; if (setitimer(ITIMER_PROF, &timer, 0) == -1) { - fprintf(stderr, "Scheduling profiler interval failed with error %d\n", - errno); + LOG(ERROR) << "Scheduling profiler interval failed with error " << errno; return false; } return true; @@ -198,24 +200,12 @@ void Profiler::Reset() { } string Profiler::SerializeProfile( - const google::javaprofiler::NativeProcessInfo &native_info) { + JNIEnv *jni, const google::javaprofiler::NativeProcessInfo &native_info) { return SerializeAndClearJavaCpuTraces( - jvmti_, native_info, ProfileType(), duration_nanos_, period_nanos_, + jni, jvmti_, native_info, ProfileType(), duration_nanos_, period_nanos_, &aggregated_traces_, unknown_stack_count_); } -bool AlmostThere(const struct timespec &finish, const struct timespec &lap) { - // Determine if there is time for another lap before reaching the - // finish line. Have a margin of multiple laps to ensure we do not - // overrun the finish line. - int64_t margin_laps = 2; - - struct timespec now = DefaultClock()->Now(); - struct timespec laps = {lap.tv_sec * margin_laps, lap.tv_nsec * margin_laps}; - - return TimeLessThan(finish, TimeAdd(now, laps)); -} - bool CPUProfiler::Collect() { Reset(); @@ -231,7 +221,7 @@ bool CPUProfiler::Collect() { // Sleep until finish_line, but wakeup periodically to flush the // internal tables. - while (!AlmostThere(finish_line, flush_interval)) { + while (!AlmostThere(clock, finish_line, flush_interval)) { clock->SleepFor(flush_interval); Flush(); } diff --git a/src/profiler.h b/src/profiler.h index 78fdd1901..c76640094 100644 --- a/src/profiler.h +++ b/src/profiler.h @@ -57,7 +57,7 @@ class Profiler { // Serialize the collected traces into a compressed serialized profile.proto string SerializeProfile( - const google::javaprofiler::NativeProcessInfo &native_info); + JNIEnv *jni, const google::javaprofiler::NativeProcessInfo &native_info); // Signal handler, which records the current stack trace into the profile. static void Handle(int signum, siginfo_t *info, void *context); diff --git a/src/proto.cc b/src/proto.cc index 2b7d3d25b..3cd1bc1ff 100644 --- a/src/proto.cc +++ b/src/proto.cc @@ -41,7 +41,7 @@ class ProfileProtoBuilder { } // Populate the profile with a set of traces - void Populate(const char *profile_type, + void Populate(JNIEnv *jni, const char *profile_type, const google::javaprofiler::TraceMultiset &traces, int64_t duration_ns, int64_t period_ns); void AddArtificialSample(const string &name, int64_t count, int64_t weight); @@ -60,7 +60,8 @@ class ProfileProtoBuilder { private: void AddSample(const std::vector &locations, int64_t count, int64_t weight, int64_t attr); - uint64_t LocationID(const google::javaprofiler::JVMPI_CallFrame &frame); + uint64_t LocationID(JNIEnv *jni, + const google::javaprofiler::JVMPI_CallFrame &frame); uint64_t LocationID(uint64_t address); uint64_t LocationID(const string &name); uint64_t LocationID(const string &class_name, const string &method_name, @@ -130,7 +131,7 @@ int64_t ProfileProtoBuilder::TotalCount() const { return total_count_; } int64_t ProfileProtoBuilder::TotalWeight() const { return total_weight_; } uint64_t ProfileProtoBuilder::LocationID( - const google::javaprofiler::JVMPI_CallFrame &frame) { + JNIEnv *jni, const google::javaprofiler::JVMPI_CallFrame &frame) { if (frame.lineno == google::javaprofiler::kNativeFrameLineNum) { return LocationID(reinterpret_cast(frame.method_id)); } @@ -142,7 +143,7 @@ uint64_t ProfileProtoBuilder::LocationID( string method_name, class_name, file_name, signature; int line_number = 0; - google::javaprofiler::GetStackFrameElements(jvmti_, frame, &file_name, + google::javaprofiler::GetStackFrameElements(jni, jvmti_, frame, &file_name, &class_name, &method_name, &signature, &line_number); google::javaprofiler::FixMethodParameters(&signature); @@ -211,8 +212,9 @@ uint64_t ProfileProtoBuilder::LocationID(const string &class_name, } void ProfileProtoBuilder::Populate( - const char *profile_type, const google::javaprofiler::TraceMultiset &traces, - int64_t duration_ns, int64_t period_ns) { + JNIEnv *jni, const char *profile_type, + const google::javaprofiler::TraceMultiset &traces, int64_t duration_ns, + int64_t period_ns) { perftools::profiles::Profile *profile = builder_.mutable_profile(); profile->mutable_period_type()->set_type(builder_.StringId(profile_type)); @@ -233,7 +235,7 @@ void ProfileProtoBuilder::Populate( if (count != 0) { std::vector locations; for (const auto &frame : trace.first.frames) { - locations.push_back(LocationID(frame)); + locations.push_back(LocationID(jni, frame)); } AddSample(locations, count, count * period_ns, trace.first.attr); } @@ -271,11 +273,12 @@ void ProfileProtoBuilder::AddSample(const std::vector &locations, } string SerializeAndClearJavaCpuTraces( - jvmtiEnv *jvmti, const google::javaprofiler::NativeProcessInfo &native_info, + JNIEnv *env, jvmtiEnv *jvmti, + const google::javaprofiler::NativeProcessInfo &native_info, const char *profile_type, int64_t duration_ns, int64_t period_ns, google::javaprofiler::TraceMultiset *traces, int64_t unknown_count) { ProfileProtoBuilder b(jvmti, native_info); - b.Populate(profile_type, *traces, duration_ns, period_ns); + b.Populate(env, profile_type, *traces, duration_ns, period_ns); b.AddArtificialSample("[Unknown]", unknown_count, unknown_count * period_ns); LOG(INFO) << "Collected a profile: total count=" << b.TotalCount() << ", weight=" << b.TotalWeight(); diff --git a/src/proto.h b/src/proto.h index 1a0a0f589..f760de48b 100644 --- a/src/proto.h +++ b/src/proto.h @@ -27,7 +27,8 @@ namespace profiler { // from a collection of java stack traces, symbolized using the jvmti. // Data in traces will be cleared. string SerializeAndClearJavaCpuTraces( - jvmtiEnv *jvmti, const google::javaprofiler::NativeProcessInfo &native_info, + JNIEnv *jni, jvmtiEnv *jvmti, + const google::javaprofiler::NativeProcessInfo &native_info, const char *profile_type, int64_t duration_nanos, int64_t period_nanos, google::javaprofiler::TraceMultiset *traces, int64_t unknown_count); diff --git a/src/throttler.h b/src/throttler.h index 40efe2d1c..816991cdd 100644 --- a/src/throttler.h +++ b/src/throttler.h @@ -27,6 +27,7 @@ namespace profiler { // Supported profile types. constexpr char kTypeCPU[] = "cpu"; constexpr char kTypeWall[] = "wall"; +constexpr char kTypeHeap[] = "heap"; // Iterator-like abstraction used to guide a profiling loop comprising of // waiting for when the next profile may be collected and saving its data once @@ -60,6 +61,10 @@ class Throttler { // Upload the compressed profile proto bytes. Returns false on error. virtual bool Upload(string profile) = 0; + + // Closes the throttler by trying to cancel WaitNext() / Upload() in flight. + // Those calls may return cancellation error. This method is thread-safe. + virtual void Close() = 0; }; } // namespace profiler diff --git a/src/throttler_api.cc b/src/throttler_api.cc index fdd351ccb..538f40e3b 100644 --- a/src/throttler_api.cc +++ b/src/throttler_api.cc @@ -33,6 +33,8 @@ #include "grpcpp/security/credentials.h" #include "grpcpp/support/channel_arguments.h" +#include "third_party/javaprofiler/heap_sampler.h" + // API curated profiling configuration. DEFINE_string(cprof_api_address, "cloudprofiler.googleapis.com", "API server address"); @@ -165,7 +167,13 @@ bool InitializeDeployment(CloudEnv* env, const string& labels, string service = env->Service(); if (service.empty()) { - LOG(ERROR) << "Deployment service is not configured"; + LOG(ERROR) << "Deployment service name is not configured"; + return false; + } + if (!IsValidServiceName(service)) { + LOG(ERROR) + << "Deployment service name '" << service + << "' does not match pattern '^[a-z]([-a-z0-9_.]{0,253}[a-z0-9])?$'"; return false; } d->set_target(service); @@ -190,6 +198,11 @@ bool InitializeDeployment(CloudEnv* env, const string& labels, for (const auto& kv : label_kvs) { (*d->mutable_labels())[kv.first] = kv.second; } + + LOG(INFO) << "Initialized deployment: project_id=" << project_id + << ", service=" << service + << ", service_version=" << service_version + << ", zone_name=" << zone_name; return true; } @@ -208,6 +221,29 @@ bool AddProfileLabels(api::Profile* p, const string& labels) { } // namespace +// Returns true if the service name matches the regex +// "^[a-z]([-a-z0-9_.]{0,253}[a-z0-9])?$", and false otherwise. +bool IsValidServiceName(string s) { + if (s.length() < 1 || s.length() > 255) { + return false; + } + if (s[0] < 'a' || s[0] > 'z') { + return false; + } + if ((s.back() < 'a' || s.back() > 'z') && + (s.back() < '0' || s.back() > '9')) { + return false; + } + for (size_t i = 1; i < s.length() - 1; ++i) { + char c = s[i]; + if ((c < 'a' || c > 'z') && (c < '0' || c > '9') && c != '.' && c != '-' && + c != '_') { + return false; + } + } + return true; +} + APIThrottler::APIThrottler() : APIThrottler(DefaultCloudEnv(), DefaultClock(), nullptr) {} @@ -220,7 +256,8 @@ APIThrottler::APIThrottler( clock_(clock), stub_(std::move(stub)), types_({api::CPU, api::WALL}), - creation_backoff_envelope_ns_(kBackoffNanos) { + creation_backoff_envelope_ns_(kBackoffNanos), + closed_(false) { grpc_init(); gpr_set_log_function(GRPCLog); @@ -233,6 +270,11 @@ APIThrottler::APIThrottler( << " to create and upload profiles"; stub_ = NewProfilerServiceStub(FLAGS_cprof_api_address); } + + if (google::javaprofiler::HeapMonitor::Enabled()) { + LOG(INFO) << "Heap allocation sampling supported for this JDK"; + types_.push_back(api::HEAP); + } } void APIThrottler::SetProfileTypes(const std::vector& types) { @@ -254,13 +296,14 @@ bool APIThrottler::WaitNext() { LOG(ERROR) << "Failed to initialize deployment, stop profiling"; return false; } + req.set_parent("projects/" + req.deployment().project_id()); while (true) { LOG(INFO) << "Creating a new profile via profiler service"; - grpc::ClientContext ctx; profile_.Clear(); - grpc::Status st = stub_->CreateProfile(&ctx, req, &profile_); + ResetClientContext(); + grpc::Status st = stub_->CreateProfile(ctx_.get(), req, &profile_); if (st.ok()) { LOG(INFO) << "Profile created: " << ProfileType() << " " << profile_.name(); @@ -268,7 +311,10 @@ bool APIThrottler::WaitNext() { creation_backoff_envelope_ns_ = kBackoffNanos; break; } - OnCreationError(ctx, st); + if (closed_) { + return false; + } + OnCreationError(st); } return true; @@ -281,6 +327,8 @@ string APIThrottler::ProfileType() { return kTypeCPU; case api::WALL: return kTypeWall; + case api::HEAP: + return kTypeHeap; default: const string& pt_name = api::ProfileType_Name(pt); LOG(ERROR) << "Unsupported profile type " << pt_name; @@ -297,8 +345,6 @@ bool APIThrottler::Upload(string profile) { LOG(INFO) << "Uploading " << profile.size() << " bytes of '" << ProfileType() << "' profile data"; - grpc::ClientContext ctx; - if (!AddProfileLabels(&profile_, FLAGS_cprof_profile_labels)) { LOG(ERROR) << "Failed to add profile labels, won't upload the profile"; return false; @@ -308,7 +354,8 @@ bool APIThrottler::Upload(string profile) { *req.mutable_profile() = profile_; req.mutable_profile()->set_profile_bytes(std::move(profile)); - grpc::Status st = stub_->UpdateProfile(&ctx, req, &profile_); + ResetClientContext(); + grpc::Status st = stub_->UpdateProfile(ctx_.get(), req, &profile_); if (!st.ok()) { // TODO: Recognize and retry transient errors. @@ -319,11 +366,10 @@ bool APIThrottler::Upload(string profile) { return true; } -void APIThrottler::OnCreationError(const grpc::ClientContext& ctx, - const grpc::Status& st) { +void APIThrottler::OnCreationError(const grpc::Status& st) { if (st.error_code() == grpc::StatusCode::ABORTED) { int64_t backoff_ns; - if (AbortedBackoffDuration(ctx, &backoff_ns)) { + if (AbortedBackoffDuration(*ctx_, &backoff_ns)) { if (backoff_ns > 0) { LOG(INFO) << "Got ABORTED, will retry after backing off for " << backoff_ns / kNanosPerMilli << "ms"; @@ -343,5 +389,19 @@ void APIThrottler::OnCreationError(const grpc::ClientContext& ctx, kMaxBackoffNanos); } +void APIThrottler::ResetClientContext() { + std::lock_guard lock(ctx_mutex_); + ctx_.reset(new grpc::ClientContext()); // NOLINT + if (closed_) { + ctx_->TryCancel(); + } +} + +void APIThrottler::Close() { + std::lock_guard lock(ctx_mutex_); + closed_ = true; + ctx_->TryCancel(); +} + } // namespace profiler } // namespace cloud diff --git a/src/throttler_api.h b/src/throttler_api.h index 2631734f7..868feb618 100644 --- a/src/throttler_api.h +++ b/src/throttler_api.h @@ -17,7 +17,9 @@ #ifndef CLOUD_PROFILER_AGENT_JAVA_THROTTLER_API_H_ #define CLOUD_PROFILER_AGENT_JAVA_THROTTLER_API_H_ +#include #include +#include // NOLINT #include #include @@ -25,7 +27,6 @@ #include "src/cloud_env.h" #include "src/throttler.h" #include "google/devtools/cloudprofiler/v2/profiler.grpc.pb.h" - #include "grpcpp/client_context.h" #include "grpcpp/support/status.h" @@ -52,12 +53,16 @@ class APIThrottler : public Throttler { string ProfileType() override; int64_t DurationNanos() override; bool Upload(string profile) override; + void Close() override; private: // Takes a backoff on profile creation error. The backoff duration // may be specified by the server. Otherwise it will be a randomized // exponentially increasing value, bounded by kMaxBackoffNanos. - void OnCreationError(const grpc::ClientContext& ctx, const grpc::Status& st); + void OnCreationError(const grpc::Status& st); + + // Resets the client gRPC context for the next call. + void ResetClientContext(); private: CloudEnv* env_; @@ -72,8 +77,18 @@ class APIThrottler : public Throttler { int64_t creation_backoff_envelope_ns_; std::default_random_engine gen_; std::uniform_int_distribution dist_; + + // The throttler is closing, cancel ongoing and future requests. + std::atomic closed_; + std::mutex ctx_mutex_; + std::unique_ptr ctx_; }; +// Returns true if the service name matches the regex +// "^[a-z]([-a-z0-9_.]{0,253}[a-z0-9])?$", and false otherwise. +// Public for testing. +bool IsValidServiceName(string service); + } // namespace profiler } // namespace cloud diff --git a/src/throttler_timed.cc b/src/throttler_timed.cc index f4ca1c59e..0dd646a07 100644 --- a/src/throttler_timed.cc +++ b/src/throttler_timed.cc @@ -30,14 +30,19 @@ namespace profiler { namespace { -const int64_t kRandomRange = 65536; +const int64_t kRandomRange = 100000; // Gets the sampling configuration from the flags. -int64_t GetConfiguration(int64_t *duration_cpu_ns, int64_t *duration_wall_ns) { +int64_t GetConfiguration(int64_t* duration_cpu_ns, int64_t* duration_wall_ns, + bool* enable_heap) { int64_t duration_ns = FLAGS_cprof_duration_sec * kNanosPerSecond; *duration_cpu_ns = 0; *duration_wall_ns = 0; + + // Currently heap is always disabled if not forced explictly. + *enable_heap = false; + if (FLAGS_cprof_force == "") { *duration_cpu_ns = duration_ns; *duration_wall_ns = duration_ns; @@ -45,6 +50,8 @@ int64_t GetConfiguration(int64_t *duration_cpu_ns, int64_t *duration_wall_ns) { *duration_cpu_ns = duration_ns; } else if (FLAGS_cprof_force == kTypeWall) { *duration_wall_ns = duration_ns; + } else if (FLAGS_cprof_force == kTypeHeap) { + *enable_heap = true; } else { LOG(ERROR) << "Unrecognized option cprof_force=" << FLAGS_cprof_force << ", profiling disabled"; @@ -83,13 +90,19 @@ TimedThrottler::TimedThrottler(const string& path) : TimedThrottler(UploaderFromFlags(path), DefaultClock(), false) {} TimedThrottler::TimedThrottler(std::unique_ptr uploader, - Clock* clock, bool fixed_seed) - : clock_(clock), profile_count_(), uploader_(std::move(uploader)) { - interval_ns_ = GetConfiguration(&duration_cpu_ns_, &duration_wall_ns_); + Clock* clock, bool no_randomize) + : clock_(clock), + closed_(false), + profile_count_(), + uploader_(std::move(uploader)) { + interval_ns_ = + GetConfiguration(&duration_cpu_ns_, &duration_wall_ns_, &enable_heap_); + LOG(INFO) << "sampling duration: cpu=" << duration_cpu_ns_ / kNanosPerSecond - << "s, wall=" << duration_wall_ns_ / kNanosPerSecond << "s"; + << "s, wall=" << duration_wall_ns_ / kNanosPerSecond; LOG(INFO) << "sampling interval: " << interval_ns_ / kNanosPerSecond << "s"; LOG(INFO) << "sampling delay: " << FLAGS_cprof_delay_sec << "s"; + LOG(INFO) << "heap sampling enabled: " << enable_heap_; struct timespec now = clock_->Now(); @@ -100,19 +113,19 @@ TimedThrottler::TimedThrottler(std::unique_ptr uploader, next_interval_ = TimeAdd(next_interval_, delay_ts); } - // Create a random number generator, seeded on the microseconds from the - // current timer if not asked for fixed seed (which should only be used for - // determinism in tests). - gen_ = std::default_random_engine(fixed_seed ? 10 : now.tv_nsec / 1000); - dist_ = std::uniform_int_distribution(0, kRandomRange); + // Create a random number generator, seeded on the current time. + gen_ = std::default_random_engine(now.tv_nsec / 1000); + dist_ = std::uniform_int_distribution( + no_randomize ? kRandomRange : 0, kRandomRange); // This will get popped on the first WaitNext() call. cur_.push_back({"", 0}); } bool TimedThrottler::WaitNext() { - if (!uploader_ || (duration_cpu_ns_ == 0 && duration_wall_ns_ == 0)) { - // Refuse profiling if both CPU and wall are disabled or no uploader. + if (!uploader_ || + (duration_cpu_ns_ == 0 && duration_wall_ns_ == 0 && !enable_heap_)) { + // Refuse profiling if CPU, wall, and heap are disabled or no uploader. LOG(WARNING) << "Profiling disabled"; return false; } @@ -139,6 +152,16 @@ bool TimedThrottler::WaitNext() { struct timespec profiling_start = TimeAdd(next_interval_, NanosToTimeSpec(wait_ns)); + + // Wait till the next profiling time polling for the cancellation. + const struct timespec poll_interval = {0, 500 * 1000 * 1000}; // 0.5s + while (!google::javaprofiler::AlmostThere(clock_, profiling_start, + poll_interval)) { + clock_->SleepFor(poll_interval); + if (closed_) { + return false; + } + } clock_->SleepUntil(profiling_start); next_interval_ = TimeAdd(next_interval_, NanosToTimeSpec(interval_ns_)); @@ -148,6 +171,9 @@ bool TimedThrottler::WaitNext() { if (duration_wall_ns_ > 0) { cur_.push_back({kTypeWall, duration_wall_ns_}); } + if (enable_heap_) { + cur_.push_back({kTypeHeap, 0}); + } // Randomize the profile type order. std::shuffle(cur_.begin(), cur_.end(), gen_); } @@ -170,5 +196,7 @@ bool TimedThrottler::Upload(string profile) { return uploader_->Upload(cur_.back().first, profile); } +void TimedThrottler::Close() { closed_ = true; } + } // namespace profiler } // namespace cloud diff --git a/src/throttler_timed.h b/src/throttler_timed.h index 7f6499ce1..5979b9e15 100644 --- a/src/throttler_timed.h +++ b/src/throttler_timed.h @@ -17,6 +17,7 @@ #ifndef CLOUD_PROFILER_AGENT_JAVA_THROTTLER_TIMED_H_ #define CLOUD_PROFILER_AGENT_JAVA_THROTTLER_TIMED_H_ +#include #include #include @@ -37,17 +38,22 @@ class TimedThrottler : public Throttler { // Testing-only constructor. TimedThrottler(std::unique_ptr uploader, Clock* clock, - bool fixed_seed); + bool no_randomize); bool WaitNext() override; string ProfileType() override; int64_t DurationNanos() override; bool Upload(string profile) override; + void Close() override; private: Clock* clock_; - int64_t duration_cpu_ns_, duration_wall_ns_; + int64_t duration_cpu_ns_; + int64_t duration_wall_ns_; + bool enable_heap_; int64_t interval_ns_; + // The throttler is closing, cancel ongoing and future requests. + std::atomic closed_; std::default_random_engine gen_; std::uniform_int_distribution dist_; diff --git a/src/worker.cc b/src/worker.cc index 01139b34b..ddd11d674 100644 --- a/src/worker.cc +++ b/src/worker.cc @@ -18,6 +18,7 @@ #include "src/profiler.h" #include "src/throttler_api.h" #include "src/throttler_timed.h" +#include "third_party/javaprofiler/heap_sampler.h" DEFINE_bool(cprof_enabled, true, "when unset, unconditionally disable the profiling"); @@ -43,6 +44,13 @@ void Worker::Start(JNIEnv *jni) { return; } + // Initialize the throttler here rather in the constructor, since the + // constructor is invoked too early, before the heap profiler is initialized. + throttler_ = FLAGS_cprof_profile_filename.empty() + ? std::unique_ptr(new APIThrottler()) + : std::unique_ptr( + new TimedThrottler(FLAGS_cprof_profile_filename)); + // Pass 'this' as the arg to access members from the worker thread. jvmtiError err = jvmti_->RunAgentThread(thread, ProfileThread, this, JVMTI_THREAD_MIN_PRIORITY); @@ -55,14 +63,16 @@ void Worker::Start(JNIEnv *jni) { } void Worker::Stop() { - // Signal the worker thread to exit and wait until it does. stopping_.store(true, std::memory_order_release); + // Close the throttler which will initiate cancellation of WaitNext / Upload. + throttler_->Close(); + // Wait till the worker thread is done. std::lock_guard lock(mutex_); } namespace { -string Collect(Profiler *p, +string Collect(Profiler *p, JNIEnv *env, google::javaprofiler::NativeProcessInfo *native_info) { const char *profile_type = p->ProfileType(); if (!p->Collect()) { @@ -70,31 +80,39 @@ string Collect(Profiler *p, return ""; } native_info->Refresh(); - return p->SerializeProfile(*native_info); + return p->SerializeProfile(env, *native_info); } +class JNILocalFrame { + public: + explicit JNILocalFrame(JNIEnv *jni_env) : jni_env_(jni_env) { + // Put 100, it doesn't really matter: new spec implementations don't care. + jni_env_->PushLocalFrame(100); + } + + ~JNILocalFrame() { jni_env_->PopLocalFrame(nullptr); } + + // Not copyable or movable. + JNILocalFrame(const JNILocalFrame &) = delete; + JNILocalFrame &operator=(const JNILocalFrame &) = delete; + + private: + JNIEnv *jni_env_; +}; + } // namespace -void Worker::EnableProfiling() { - enabled_ = true; -} +void Worker::EnableProfiling() { enabled_ = true; } -void Worker::DisableProfiling() { - enabled_ = false; -} +void Worker::DisableProfiling() { enabled_ = false; } void Worker::ProfileThread(jvmtiEnv *jvmti_env, JNIEnv *jni_env, void *arg) { Worker *w = static_cast(arg); - google::javaprofiler::NativeProcessInfo n("/proc/self/maps"); + std::lock_guard lock(w->mutex_); - std::unique_ptr t = - FLAGS_cprof_profile_filename.empty() - ? std::unique_ptr(new APIThrottler()) - : std::unique_ptr( - new TimedThrottler(FLAGS_cprof_profile_filename)); + google::javaprofiler::NativeProcessInfo n("/proc/self/maps"); - while (t->WaitNext()) { - std::lock_guard lock(w->mutex_); + while (w->throttler_->WaitNext()) { if (w->stopping_) { // The worker is exiting. break; @@ -103,18 +121,47 @@ void Worker::ProfileThread(jvmtiEnv *jvmti_env, JNIEnv *jni_env, void *arg) { // Skip the collection and upload steps when profiling is disabled. continue; } + + // There are a number of JVMTI functions the agent uses that return + // local references. Normally, local references are freed when a JNI + // call returns to Java. E.g. in the internal /profilez profiler + // those would get freed when the C++ code returns back to the request + // handler. But in case of the cloud agent the agent thread never exits + // and so the local references keep accumulating. Adding an explicit + // local frame around each profiling iteration fixes this. + // Note: normally the various JNIHandles are properly lifetime managed + // now (via b/133409114) and there should be no leaks; but leaving this in + // so that, if ever JNI handle leaks do happen again, this will release the + // handles automatically. + JNILocalFrame local_frame(jni_env); string profile; - string pt = t->ProfileType(); + string pt = w->throttler_->ProfileType(); if (pt == kTypeCPU) { - CPUProfiler p(w->jvmti_, w->threads_, t->DurationNanos(), + CPUProfiler p(w->jvmti_, w->threads_, w->throttler_->DurationNanos(), FLAGS_cprof_cpu_sampling_period_msec * kNanosPerMilli); - profile = Collect(&p, &n); + profile = Collect(&p, jni_env, &n); } else if (pt == kTypeWall) { // Note that the requested sampling period for the wall profiling may be // increased if the number of live threads is too large. - WallProfiler p(w->jvmti_, w->threads_, t->DurationNanos(), + WallProfiler p(w->jvmti_, w->threads_, w->throttler_->DurationNanos(), FLAGS_cprof_wall_sampling_period_msec * kNanosPerMilli); - profile = Collect(&p, &n); + profile = Collect(&p, jni_env, &n); + } else if (pt == kTypeHeap) { + if (!google::javaprofiler::HeapMonitor::Enabled()) { + LOG(WARNING) << "Asked for a heap sampler but it is disabled"; + continue; + } + + // Note: we do not force GC here, instead we rely on what was seen as + // still live at the last GC; this means that technically: + // - Some objects might be dead now. + // - Some other objects might be sampled but not show up yet. + // On the flip side, this allows the profile collection to not provoke a + // GC. + perftools::profiles::Builder::Marshal( + *google::javaprofiler::HeapMonitor::GetHeapProfiles( + jni_env, false /* force_gc */), + &profile); } else { LOG(ERROR) << "Unknown profile type '" << pt << "', skipping the upload"; continue; @@ -123,7 +170,7 @@ void Worker::ProfileThread(jvmtiEnv *jvmti_env, JNIEnv *jni_env, void *arg) { LOG(ERROR) << "No profile bytes collected, skipping the upload"; continue; } - if (!t->Upload(profile)) { + if (!w->throttler_->Upload(profile)) { LOG(ERROR) << "Error on profile upload, discarding the profile"; } } diff --git a/src/worker.h b/src/worker.h index e721b6bdc..82566ead9 100644 --- a/src/worker.h +++ b/src/worker.h @@ -43,6 +43,7 @@ class Worker { jvmtiEnv *jvmti_; ThreadTable *threads_; + std::unique_ptr throttler_; std::mutex mutex_; // Held by the worker thread while it's running. std::atomic stopping_; static std::atomic enabled_; diff --git a/third_party/javaprofiler/accessors.cc b/third_party/javaprofiler/accessors.cc new file mode 100644 index 000000000..ef1eb7b3f --- /dev/null +++ b/third_party/javaprofiler/accessors.cc @@ -0,0 +1,54 @@ +/* + * Copyright 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "third_party/javaprofiler/accessors.h" + +namespace google { +namespace javaprofiler { + +__thread JNIEnv *Accessors::env_; +__thread int64 Accessors::attr_; +__thread Tags *Accessors::tags_; + +void Accessors::InitTags() { + assert(tags_ == nullptr); + tags_ = new Tags(); +} + +void Accessors::DestroyTags() { + const Tags *tags_tmp = tags_; + tags_ = nullptr; + // The thread local storage tags_ is set to null before deallocation to avoid + // getting the destructed object in the signal handler. The compiler barrier + // below makes sure that "delete tags_tmp" will not be scheduled before "tags_ + // = nullptr". + __asm__ __volatile__("" : : : "memory"); + delete tags_tmp; +} + +Tags *Accessors::AllocateAndCopyTags() { + return tags_ == nullptr ? nullptr : new Tags(*tags_); +} + +void Accessors::ApplyAndDeleteTags(Tags *tags) { + if (tags_ != nullptr) { + *tags_ = std::move(*tags); + } + delete tags; +} + +} // namespace javaprofiler +} // namespace google diff --git a/third_party/javaprofiler/accessors.h b/third_party/javaprofiler/accessors.h new file mode 100644 index 000000000..755e29f56 --- /dev/null +++ b/third_party/javaprofiler/accessors.h @@ -0,0 +1,104 @@ +/* + * Copyright 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef THIRD_PARTY_JAVAPROFILER_ACCESSORS_H_ +#define THIRD_PARTY_JAVAPROFILER_ACCESSORS_H_ + +#include "third_party/javaprofiler/globals.h" +#include "third_party/javaprofiler/tags.h" + +namespace google { +namespace javaprofiler { + +// Accessors for a JNIEnv for this thread. +class Accessors { + public: + static void SetCurrentJniEnv(JNIEnv *env) { env_ = env; } + static JNIEnv *CurrentJniEnv() { return env_; } + + static void SetAttribute(int64 value) { attr_ = value; } + static int64 GetAttribute() { return attr_; } + + // Allocates the current thread's tags storage which can be later retrieved by + // GetTags(). If the tags storage is already allocated, asserts an error. + static void InitTags(); + // Deallocates the current thread's tags storage. No tag can be + // set for the current thread after this function is called. + static void DestroyTags(); + // Both GetTags() and GetMutableTags() return the tags storage of the current + // thread. If InitTags() is not called before, GetTags() returns a constant + // reference to an empty Tags instance and GetMutableTags() returns nullptr. + // For all Java threads, InitTags() is called inside the callback function of + // onThreadStart. However, non-Java threads do not trigger the callback, + // which leaves InitTags() not called. + static const Tags &GetTags() { + return tags_ == nullptr ? Tags::Empty() : *tags_; + } + static Tags *GetMutableTags() { return tags_; } + + // AllocateAndCopyTags() and ApplyAndDeleteTags() are used to propagate tags + // between threads. The usage should be like: + // + // Tags *tags_copy = AllocateAndCopyTags(); // Make a copy of tags storage of + // // current thread. + // ... // Pass tags_copy into another thread. + // ApplyAndDeleteTags(tags_copy); // Overwrite the tags storage of current + // //thread with the information stored in tags_copy. + // // If ApplyAndDeleteTags() is never called, users must explicitly reclaim + // // the memory reserved for tags_copy by "delete tags_copy". + + // Copies the thread-local tags. Returns the pointer of newly allocated Tags + // on success; otherwise, return nullptr. + static Tags *AllocateAndCopyTags(); + // Overrides the thread-local tags with the tags passed in and releases the + // memory pointed by tags. + static void ApplyAndDeleteTags(Tags *tags); + + template + static inline FunctionType GetJvmFunction(const char *function_name) { + // get handle to library + static void *handle = dlopen("libjvm.so", RTLD_LAZY); + if (handle == NULL) { + return NULL; + } + + // get address of function, return null if not found + return bit_cast(dlsym(handle, function_name)); + } + + private: + // This is dangerous. TLS accesses are by default not async safe, as + // they can call malloc for lazy initialization. The initial-exec + // TLS mode avoids this potential allocation, with the limitation + // that there is a fixed amount of space to hold all TLS variables + // referenced in the module. This should be OK for the cloud + // profiler agent, which is relatively small. We do provide a way + // to override the TLS model for compilation environments where the + // TLS access is async-safe. +#ifdef JAVAPROFILER_GLOBAL_DYNAMIC_TLS + static __thread JNIEnv *env_ __attribute__((tls_model("global-dynamic"))); + static __thread int64 attr_ __attribute__((tls_model("global-dynamic"))); + static __thread Tags *tags_ __attribute__((tls_model("global-dynamic"))); +#else + static __thread JNIEnv *env_ __attribute__((tls_model("initial-exec"))); + static __thread int64 attr_ __attribute__((tls_model("initial-exec"))); + static __thread Tags *tags_ __attribute__((tls_model("initial-exec"))); +#endif +}; + +} // namespace javaprofiler +} // namespace google +#endif // THIRD_PARTY_JAVAPROFILER_ACCESSORS_H_ diff --git a/third_party/javaprofiler/async_ref_counted_string.cc b/third_party/javaprofiler/async_ref_counted_string.cc new file mode 100644 index 000000000..9704f6a78 --- /dev/null +++ b/third_party/javaprofiler/async_ref_counted_string.cc @@ -0,0 +1,192 @@ +/* + * Copyright 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "third_party/javaprofiler/async_ref_counted_string.h" + +#include // NOLINT +#include + +namespace google { +namespace javaprofiler { +namespace { + +using StringRefCountTable = std::unordered_map>; +using StringRefCount = StringRefCountTable::value_type; + +// Maps string to its reference count. +std::unordered_map> *string_table = nullptr; +// All accesses to string_table should be protected by string_table_mutex. +std::mutex string_table_mutex; + +// The following methods are used to transfer ownership of a string. +// To own a string, the first step is to resolve (or create if necessary) the +// corresponding StringRefCount and claim the ownership by incrementing the +// reference count with either of AcquireByXXX methods. Externally, the +// current AsyncRefCountedString still refers to the old StringRefCount. Next, +// switch ptr_ to the new StringRefCount atomically and release the resource of +// old StringRefCount by calling Release() (or calling AsyncSafeRelease(), but +// it may fail). If the reference count of old StringRefCount reaches 0, the +// string stored in the old StringRefCount is removed. The ownership +// transference is safe upon interrupts. In the signal handler, the current +// instance of AsyncRefCountedString refers to either the old StringRefCount or +// the new StringRefCount without ending up with corrupted data. + +// Resolves the StringRefCount of a given string, increments the reference count +// and returns the pointer of StringRefCount. It returns nullptr if the internal +// string table is not initialized. +StringRefCount *AcquireByString(const string &str) { + std::lock_guard lock(string_table_mutex); + if (string_table == nullptr) { + return nullptr; + } + // The following statement is equivalent to + // "const auto &it = string_table->emplace(str, 0).first". + // However, gcc-4.8 does not compile "emplace(str,0)" as it complains that the + // atomic copy constructor is deleted. Since gcc-4.8 is used in the docker + // container to generate the cloud Java agent, use the pair's piecewise + // constructor to make it compilable under gcc-4.8 + const auto &it = + string_table + ->emplace(std::piecewise_construct, std::forward_as_tuple(str), + std::forward_as_tuple(0)) + .first; + it->second++; + return &*it; +} + +// Increments the reference count of the given StringRefCount pointer if it is +// not null, and returns the StringRefCount pointer. It is used to copy the +// StringRefCount from another AsyncRefCountedString. +StringRefCount *AcquireByCopy(StringRefCount *str_ref_cnt) { + if (str_ref_cnt != nullptr) { + // As "other" already holds a valid StringRefCount, we can directly + // increment the reference count by 1. If "other" is owned by another + // thread, the user has to make sure that "other" is not under editing. + str_ref_cnt->second++; + } + return str_ref_cnt; +} + +// Renounces the ownership of a string represented by the given StringRefCount +// pointer. It is async-signal-safe. If the string is not referred by any other +// AsyncRefCountedString and needs removing from the string table, it fails by +// returning false. +bool AsyncSafeRelease(StringRefCount *str_ref_cnt) { + if (str_ref_cnt == nullptr) { + return true; + } + + int32_t ref_count = str_ref_cnt->second.load(); + while (ref_count > 1) { + if (str_ref_cnt->second.compare_exchange_weak(ref_count, ref_count - 1)) { + return true; + } + } + return false; +} + +// Renounces the ownership of a string represented by the given StringRefCount +// pointer. It firstly calls AsyncSafeRelease() to release the string. If +// AsyncSafeRelease(), it acquires the lock of the string table to make it +// succeed eventually. +void Release(StringRefCount *str_ref_cnt) { + if (!AsyncSafeRelease(str_ref_cnt)) { + std::lock_guard lock(string_table_mutex); + if (str_ref_cnt->second.fetch_sub(1) == 1) { + // The counter becomes zero and delete the entry. + string_table->erase(str_ref_cnt->first); + } + } +} + +} // namespace + +AsyncRefCountedString::AsyncRefCountedString(const string &str) + : AsyncRefCountedString() { + Release(ptr_.exchange(AcquireByString(str))); +} + +AsyncRefCountedString::AsyncRefCountedString(const AsyncRefCountedString &other) + : AsyncRefCountedString() { + Release(ptr_.exchange(AcquireByCopy(other.ptr_.load()))); +} + +AsyncRefCountedString::~AsyncRefCountedString() { + Release(ptr_.exchange(nullptr)); +} + +AsyncRefCountedString &AsyncRefCountedString::operator=(const string &str) { + Release(ptr_.exchange(AcquireByString(str))); + return *this; +} + +AsyncRefCountedString &AsyncRefCountedString::operator=( + const AsyncRefCountedString &other) { + Release(ptr_.exchange(AcquireByCopy(other.ptr_.load()))); + return *this; +} + +AsyncRefCountedString &AsyncRefCountedString::operator=( + AsyncRefCountedString &&other) { + Reset(); + ptr_.store(other.ptr_); + other.ptr_ = nullptr; + return *this; +} + +AsyncRefCountedString &AsyncRefCountedString::AsyncSafeCopy( + const AsyncRefCountedString &other) { + assert(ptr_.load() == nullptr); + ptr_.exchange(AcquireByCopy(other.ptr_.load())); + return *this; +} + +void AsyncRefCountedString::Reset() { Release(ptr_.exchange(nullptr)); } + +void AsyncRefCountedString::AsyncSafeReset() { + assert(AsyncSafeRelease(ptr_.exchange(nullptr))); +} + +const string *AsyncRefCountedString::Get() const { + StringRefCount *str_ref_cnt = ptr_.load(); + if (str_ref_cnt == nullptr) { + return nullptr; + } else { + return &(str_ref_cnt->first); + } +} + +bool AsyncRefCountedString::Init() { + std::lock_guard lock(string_table_mutex); + if (string_table == nullptr) { + string_table = new std::unordered_map>(); + return true; + } + return false; +} + +bool AsyncRefCountedString::Destroy() { + std::lock_guard lock(string_table_mutex); + if (string_table == nullptr || !string_table->empty()) { + return false; + } + delete string_table; + string_table = nullptr; + return true; +} + +} // namespace javaprofiler +} // namespace google diff --git a/third_party/javaprofiler/async_ref_counted_string.h b/third_party/javaprofiler/async_ref_counted_string.h new file mode 100644 index 000000000..276ccb88a --- /dev/null +++ b/third_party/javaprofiler/async_ref_counted_string.h @@ -0,0 +1,112 @@ +/* + * Copyright 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef THIRD_PARTY_JAVAPROFILER_ASYNC_REF_COUNTED_STRING_H_ +#define THIRD_PARTY_JAVAPROFILER_ASYNC_REF_COUNTED_STRING_H_ + +#include + +#include "third_party/javaprofiler/globals.h" + +namespace google { +namespace javaprofiler { + +// A string wrapper which uses string interning to store only one copy of a +// string. +// Init() must be called before any usage and the internal storage allocated by +// Init() will intentionally leak. +// The methods that are safe to call in a signal handler, are explicitly marked +// with "async-signal-safe". +// An underlying string table helps maintain only one copy per string to reduce +// memory overhead and speed up equality checking. Different +// AsyncRefCountedString instances owned by different threads do not need extra +// synchronization even if they refer to the same string. However, users have to +// provide their own synchronization to access the same AsyncRefCountedString +// instance from different threads. String interning cannot be achieved only by +// std::shared_ptr since an additional associative array is needed to +// look up a string's intern. Combining a hashtable, std::shared_ptr can +// only de-allocate itself if not needed and will not remove it from the +// hashtable. Furthermore, a sampling interrupt can happen when +// std::shared_ptr is being copied and destructed, and the interrupt +// routine may see corrupted data. In other words, std::shared_ptr is +// not async-signal-safe and thus not used. +class AsyncRefCountedString { + public: + AsyncRefCountedString() : ptr_(nullptr) {} + explicit AsyncRefCountedString(const string &str); + AsyncRefCountedString(const AsyncRefCountedString &other); + + ~AsyncRefCountedString(); + + AsyncRefCountedString &operator=(const string &str); + AsyncRefCountedString &operator=(const AsyncRefCountedString &other); + AsyncRefCountedString &operator=(AsyncRefCountedString &&other); + // Async-signal-safe version of operator=(const AsyncRefCountedString &other). + // It requires that the instance refers to nullptr (does not contain any + // string). Otherwise, it asserts an error. + AsyncRefCountedString &AsyncSafeCopy(const AsyncRefCountedString &other); + // Tests whether the referred string equals the one in another + // AsyncRefCountedString. As a specific string only has one internal copy, the + // stored string address is directly used for comparison without checking the + // string content. Async-signal-safe. + bool operator==(const AsyncRefCountedString &other) const { + return ptr_.load() == other.ptr_.load(); + } + // Async-signal-safe. + bool operator!=(const AsyncRefCountedString &other) const { + return !(*this == other); + } + + // Renounces the ownership of currently referred string (if it currently owns + // one) and refers to nullptr. + void Reset(); + // Async-signal-safe version of Reset(). It will succeed under the condition + // that there is at least one AsyncRefCountedString still refers to the string + // that is released by this call. Otherwise, it asserts an error. + void AsyncSafeReset(); + + // Returns the string pointer it refers. + // Returns nullptr when it does not refer to any string. + // The returned string pointer is valid as long as the AsyncRefCountedString + // instance stay unchanged and it is async-signal-safe. + const string *Get() const; + + // Returns the hash value. As a specific string only has one internal copy, + // the stored string address is directly used as the hash value. + uint64 Hash() const { return reinterpret_cast(Get()); } + + // Initializes the internal string storage. Must be called before using + // AsyncRefCountedString to store any string. Should only be called once, + // subsequent calls have no effect and return false. Destroy() should be + // called to free the storage. + static bool Init(); + + // Frees the internal string storage. Must be called after all outstanding + // AsyncRefCountedString objects are gone. No string can be stored after + // Destroy() is called. Returns false if the storage is not currently + // allocated or if there are known outstanding references to strings. + static bool Destroy(); + + private: + // The value of ptr_ requires to be changed atomically so that + // its value will not get corrupted upon interrupts. + std::atomic> *> ptr_; +}; + +} // namespace javaprofiler +} // namespace google + +#endif // THIRD_PARTY_JAVAPROFILER_ASYNC_REF_COUNTED_STRING_H_ diff --git a/third_party/javaprofiler/clock.cc b/third_party/javaprofiler/clock.cc index b6025971a..d6fe5718a 100644 --- a/third_party/javaprofiler/clock.cc +++ b/third_party/javaprofiler/clock.cc @@ -21,6 +21,16 @@ namespace { Clock DefaultClockInstance; } +bool AlmostThere(Clock* clock, const struct timespec& finish, + const struct timespec& lap) { + const int64_t kMarginLaps = 2; + + struct timespec now = clock->Now(); + struct timespec laps = {lap.tv_sec * kMarginLaps, lap.tv_nsec * kMarginLaps}; + + return TimeLessThan(finish, TimeAdd(now, laps)); +} + Clock* DefaultClock() { return &DefaultClockInstance; } diff --git a/third_party/javaprofiler/clock.h b/third_party/javaprofiler/clock.h index 1ff3f7203..2a7362ed8 100644 --- a/third_party/javaprofiler/clock.h +++ b/third_party/javaprofiler/clock.h @@ -20,11 +20,13 @@ #include #include +#include "third_party/javaprofiler/globals.h" + namespace google { namespace javaprofiler { -static const int64_t kNanosPerSecond = 1000 * 1000 * 1000; -static const int64_t kNanosPerMilli = 1000 * 1000; +constexpr int64 kNanosPerSecond = 1000 * 1000 * 1000; +constexpr int64 kNanosPerMilli = 1000 * 1000; inline struct timespec TimeAdd(const struct timespec t1, const struct timespec t2) { @@ -41,13 +43,13 @@ inline bool TimeLessThan(const struct timespec &t1, const struct timespec &t2) { (t1.tv_sec == t2.tv_sec && t1.tv_nsec < t2.tv_nsec); } -inline struct timespec NanosToTimeSpec(int64_t nanos) { +inline struct timespec NanosToTimeSpec(int64 nanos) { time_t seconds = nanos / kNanosPerSecond; int32_t nano_seconds = nanos % kNanosPerSecond; return timespec{seconds, nano_seconds}; } -inline int64_t TimeSpecToNanos(const struct timespec &ts) { +inline int64 TimeSpecToNanos(const struct timespec &ts) { return kNanosPerSecond * ts.tv_sec + ts.tv_nsec; } @@ -77,6 +79,11 @@ class Clock { } }; +// Determines if there is time for another lap before reaching the finish line. +// Uses a margin of multiple laps to ensure to not overrun the finish line. +bool AlmostThere(Clock *clock, const struct timespec &finish, + const struct timespec &lap); + Clock *DefaultClock(); } // namespace javaprofiler diff --git a/third_party/javaprofiler/display.cc b/third_party/javaprofiler/display.cc index 3e336a433..f4d940ef5 100644 --- a/third_party/javaprofiler/display.cc +++ b/third_party/javaprofiler/display.cc @@ -15,8 +15,6 @@ #include "third_party/javaprofiler/display.h" #include -#include -#include #include #include "third_party/javaprofiler/stacktrace_fixer.h" @@ -67,8 +65,6 @@ void GetMethodName(jvmtiEnv *jvmti, jmethodID method_id, string *method_name, return; } - signature_ptr.AbandonBecauseOfError(); - name_ptr.AbandonBecauseOfError(); static int once = 0; if (!once) { once = 1; @@ -92,13 +88,12 @@ void GetMethodName(jvmtiEnv *jvmti, jmethodID method_id, string *method_name, *signature = kSignatureUnknown; } -static void GetClassAndFileName(jvmtiEnv *jvmti, jmethodID method_id, +void GetClassAndFileName(jvmtiEnv *jvmti, jmethodID method_id, jclass declaring_class, string *file_name, string *class_name) { JvmtiScopedPtr source_name_ptr(jvmti); if (JVMTI_ERROR_NONE != jvmti->GetSourceFileName(declaring_class, source_name_ptr.GetRef())) { - source_name_ptr.AbandonBecauseOfError(); *file_name = kFileUnknown; } else { *file_name = source_name_ptr.Get(); @@ -108,7 +103,6 @@ static void GetClassAndFileName(jvmtiEnv *jvmti, jmethodID method_id, if (JVMTI_ERROR_NONE != jvmti->GetClassSignature(declaring_class, signature_ptr.GetRef(), nullptr)) { - signature_ptr.AbandonBecauseOfError(); *class_name = kClassUnknown; } else { bool cleaned = CleanJavaSignature(signature_ptr.Get()); @@ -123,6 +117,32 @@ static void GetClassAndFileName(jvmtiEnv *jvmti, jmethodID method_id, } } +void FillFieldsWithUnknown(string *file_name, string *class_name, + string *method_name, string *signature, + int *line_number) { + *file_name = kFileUnknown; + *class_name = kClassUnknown; + *method_name = kMethodUnknown; + *signature = kSignatureUnknown; + if (line_number) { + *line_number = 0; + } +} + +void FillMethodSignatureAndLine(jvmtiEnv *jvmti, + const JVMPI_CallFrame &frame, + string *method_name, string *signature, + int *line_number) { + GetMethodName(jvmti, frame.method_id, method_name, signature); + + // frame.lineno is actually a bci if it is a Java method; Asgct is piggy + // backing on the structure field. For natives, this would be -1 and + // GetLineNumber handles it. + if (line_number) { + *line_number = GetLineNumber(jvmti, frame.method_id, frame.lineno); + } +} + } // end namespace jint GetLineNumber(jvmtiEnv *jvmti, jmethodID method, jlocation location) { @@ -148,8 +168,6 @@ jint GetLineNumber(jvmtiEnv *jvmti, jmethodID method, jlocation location) { no_debug_info = true; } } - - table_ptr_ctr.AbandonBecauseOfError(); return -1; } @@ -177,36 +195,10 @@ jint GetLineNumber(jvmtiEnv *jvmti, jmethodID method, jlocation location) { return -1; } -static void FillFieldsWithUnknown(string *file_name, string *class_name, - string *method_name, string *signature, - int *line_number) { - *file_name = kFileUnknown; - *class_name = kClassUnknown; - *method_name = kMethodUnknown; - *signature = kSignatureUnknown; - if (line_number) { - *line_number = 0; - } -} - -static void FillMethodSignatureAndLine(jvmtiEnv *jvmti, - const JVMPI_CallFrame &frame, - string *method_name, string *signature, - int *line_number) { - GetMethodName(jvmti, frame.method_id, method_name, signature); - - // frame.lineno is actually a bci if it is a Java method; Asgct is piggy - // backing on the structure field. For natives, this would be -1 and - // GetLineNumber handles it. - if (line_number) { - *line_number = GetLineNumber(jvmti, frame.method_id, frame.lineno); - } -} - -bool GetStackFrameElements(jvmtiEnv *jvmti, const JVMPI_CallFrame &frame, - string *file_name, string *class_name, - string *method_name, string *signature, - int *line_number) { +bool GetStackFrameElements(JNIEnv *jni, jvmtiEnv *jvmti, + const JVMPI_CallFrame &frame, string *file_name, + string *class_name, string *method_name, + string *signature, int *line_number) { if (!jvmti) { FillFieldsWithUnknown(file_name, class_name, method_name, signature, line_number); @@ -223,6 +215,7 @@ bool GetStackFrameElements(jvmtiEnv *jvmti, const JVMPI_CallFrame &frame, return true; } + ScopedLocalRef declaring_class_managed(jni, declaring_class); return GetStackFrameElements(jvmti, frame, declaring_class, file_name, class_name, method_name, signature, line_number); } diff --git a/third_party/javaprofiler/display.h b/third_party/javaprofiler/display.h index ba127539a..ee897759c 100644 --- a/third_party/javaprofiler/display.h +++ b/third_party/javaprofiler/display.h @@ -15,6 +15,7 @@ */ #include + #include #include "third_party/javaprofiler/globals.h" @@ -36,11 +37,10 @@ jint GetLineNumber(jvmtiEnv *jvmti, jmethodID method, jlocation location); // the information provided by the frame and using the JVMTI environment. // When unknown, it fills the parameters with: UnknownFile, UnknownClass, // UnknownMethod, "", and -1. -bool GetStackFrameElements(jvmtiEnv *jvmti, - const JVMPI_CallFrame &frame, - string *file_name, string *class_name, - string *method_name, string *signature, - int *line_number); +bool GetStackFrameElements(JNIEnv *jni, jvmtiEnv *jvmti, + const JVMPI_CallFrame &frame, string *file_name, + string *class_name, string *method_name, + string *signature, int *line_number); // Fill the file_name, class_name, method_name, and line_number parameters using // the information provided by the frame and using the JVMTI environment. diff --git a/third_party/javaprofiler/globals.h b/third_party/javaprofiler/globals.h index 1ea3fd27f..7ceeb4034 100644 --- a/third_party/javaprofiler/globals.h +++ b/third_party/javaprofiler/globals.h @@ -26,6 +26,7 @@ #include #include +using std::hash; using std::string; #define DISALLOW_COPY_AND_ASSIGN(TypeName) \ @@ -73,8 +74,6 @@ class JvmtiScopedPtr { T *Get() { return ref_; } - void AbandonBecauseOfError() { ref_ = NULL; } - private: jvmtiEnv *jvmti_; T *ref_; @@ -82,49 +81,24 @@ class JvmtiScopedPtr { DISALLOW_IMPLICIT_CONSTRUCTORS(JvmtiScopedPtr); }; -// Accessors for a JNIEnv for this thread. -class Accessors { +template +class ScopedLocalRef { public: - static void SetCurrentJniEnv(JNIEnv *env) { env_ = env; } - - static JNIEnv *CurrentJniEnv() { return env_; } - - static void Init() {} - - static void Destroy() {} - - static void SetAttribute(int64_t value) { attr_ = value; } + ScopedLocalRef(JNIEnv *jni, T ref) : jni_(jni), ref_(ref) {} - static int64_t GetAttribute() { return attr_; } - - template - static inline FunctionType GetJvmFunction(const char *function_name) { - // get handle to library - static void *handle = dlopen("libjvm.so", RTLD_LAZY); - if (handle == NULL) { - return NULL; + ~ScopedLocalRef() { + if (NULL != ref_) { + jni_->DeleteLocalRef(ref_); } - - // get address of function, return null if not found - return bit_cast(dlsym(handle, function_name)); } + T Get() { return ref_; } + private: - // This is dangerous. TLS accesses are by default not async safe, as - // they can call malloc for lazy initialization. The initial-exec - // TLS mode avoids this potential allocation, with the limitation - // that there is a fixed amount of space to hold all TLS variables - // referenced in the module. This should be OK for the cloud - // profiler agent, which is relatively small. We do provide a way - // to override the TLS model for compilation environments where the - // TLS access is async-safe. -#ifdef JAVAPROFILER_GLOBAL_DYNAMIC_TLS - static __thread JNIEnv *env_ __attribute__((tls_model("global-dynamic"))); - static __thread int64_t attr_ __attribute__((tls_model("global-dynamic"))); -#else - static __thread JNIEnv *env_ __attribute__((tls_model("initial-exec"))); - static __thread int64_t attr_ __attribute__((tls_model("initial-exec"))); -#endif + JNIEnv *jni_; + T ref_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(ScopedLocalRef); }; } // namespace javaprofiler diff --git a/third_party/javaprofiler/heap_sampler.cc b/third_party/javaprofiler/heap_sampler.cc new file mode 100644 index 000000000..e2a69138c --- /dev/null +++ b/third_party/javaprofiler/heap_sampler.cc @@ -0,0 +1,365 @@ +/* + * Copyright 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "third_party/javaprofiler/heap_sampler.h" +#include "third_party/javaprofiler/profile_proto_builder.h" + +namespace { + +std::unique_ptr> +TransformFrames(jvmtiFrameInfo *stack_frames, int count) { + auto frames = + std::unique_ptr>( + new std::vector(count)); + + for (int i = 0; i < count; i++) { + // Note that technically this is not the line number; it is the location but + // our CPU profiler piggy-backs on JVMPI_CallFrame and uses lineno as a + // jlocation as well... + (*frames)[i].lineno = stack_frames[i].location; + (*frames)[i].method_id = stack_frames[i].method; + } + + return frames; +} + +extern "C" JNIEXPORT void SampledObjectAlloc(jvmtiEnv *jvmti_env, + JNIEnv *jni_env, jthread thread, + jobject object, + jclass object_klass, jlong size) { + google::javaprofiler::HeapMonitor::AddSample(jni_env, thread, object, + object_klass, size); +} + +extern "C" JNIEXPORT void GarbageCollectionFinish(jvmtiEnv *jvmti_env) { + google::javaprofiler::HeapMonitor::NotifyGCWaitingThread(); +} + +} // namespace + +namespace google { +namespace javaprofiler { +std::atomic HeapMonitor::jvmti_; +std::atomic HeapMonitor::sampling_interval_; + +HeapEventStorage::HeapEventStorage(jvmtiEnv *jvmti, ProfileFrameCache *cache, + int max_garbage_size) + : max_garbage_size_(max_garbage_size), + cur_garbage_pos_(0), + jvmti_(jvmti), cache_(cache) { +} + +void HeapEventStorage::HeapObjectTrace::AddToProto( + const std::unique_ptr &builder) { + JVMPI_CallTrace call_trace{nullptr, static_cast(frames_->size()), + frames_->data()}; + ProfileStackTrace trace = {&call_trace, size_}; + builder->AddTraces(&trace, 1); +} + +void HeapEventStorage::Add(JNIEnv *jni, jthread thread, jobject object, + jclass klass, jlong size) { + const int kMaxFrames = 128; + jint count = 0; + jvmtiFrameInfo stack_frames[kMaxFrames]; + jvmtiError err = + jvmti_->GetStackTrace(thread, 0, kMaxFrames, stack_frames, &count); + + if (err == JVMTI_ERROR_NONE && count > 0) { + auto frames = TransformFrames(stack_frames, count); + + jweak weak_ref = jni->NewWeakGlobalRef(object); + if (jni->ExceptionCheck()) { + LOG(WARNING) << "Failed to create NewWeakGlobalRef, skipping heap sample"; + return; + } + + auto live_object = std::unique_ptr( + new HeapObjectTrace(weak_ref, size, std::move(frames))); + + // Only now lock and get things done quickly. + std::lock_guard lock(storage_lock_); + newly_allocated_objects_.push_back(std::move(live_object)); + } +} + +void HeapEventStorage::AddToGarbage(std::unique_ptr obj) { + if (garbage_objects_.size() >= max_garbage_size_) { + garbage_objects_[cur_garbage_pos_] = std::move(obj); + cur_garbage_pos_ = (cur_garbage_pos_ + 1) % max_garbage_size_; + } else { + garbage_objects_.push_back(std::move(obj)); + } +} + +void HeapEventStorage::MoveLiveObjects( + JNIEnv *env, std::vector> *objects, + std::vector> *still_live_objects) { + for (auto &elem : *objects) { + if (elem->IsLive(env)) { + still_live_objects->push_back(std::move(elem)); + } else { + elem->DeleteWeakReference(env); + AddToGarbage(std::move(elem)); + } + } +} + +void HeapEventStorage::CompactSamples(JNIEnv *env) { + std::lock_guard lock(storage_lock_); + + std::vector> still_live; + + MoveLiveObjects(env, &newly_allocated_objects_, &still_live); + MoveLiveObjects(env, &live_objects_, &still_live); + + // Live objects are the objects still alive. + live_objects_ = std::move(still_live); + // Newly allocated objects is now reset, those still alive are now in + // live_objects. + newly_allocated_objects_.clear(); +} + +std::unique_ptr HeapEventStorage::GetProfiles( + JNIEnv *env, int sampling_interval, bool force_gc, bool get_live) { + auto builder = + ProfileProtoBuilder::ForHeap(env, jvmti_, sampling_interval, cache_); + + if (force_gc) { + if (jvmti_->ForceGarbageCollection() != JVMTI_ERROR_NONE) { + LOG(WARNING) << "Failed to force GC, returning empty heap profile proto"; + return builder->CreateProto(); + } + + CompactSamples(env); + } + + { + std::lock_guard lock(storage_lock_); + + if (get_live) { + for (auto &elem : live_objects_) { + elem->AddToProto(builder); + } + } else { + for (auto &elem : garbage_objects_) { + elem->AddToProto(builder); + } + } + } + return builder->CreateProto(); +} + +bool HeapMonitor::CreateGCWaitingThread(jvmtiEnv* jvmti, JNIEnv* jni) { + jclass cls = jni->FindClass("java/lang/Thread"); + jmethodID constructor = jni->GetMethodID(cls, "", "()V"); + jobject thread = jni->NewGlobalRef(jni->NewObject(cls, constructor)); + if (thread == nullptr) { + LOG(WARNING) << "Failed to construct the GC waiting thread"; + return false; + } + + if (jvmti->RunAgentThread(thread, GCWaitingThread, nullptr, + JVMTI_THREAD_MIN_PRIORITY) != JVMTI_ERROR_NONE) { + LOG(WARNING) << "Failed to start the GC waiting thread"; + return false; + } + + return true; +} + +bool HeapMonitor::Supported(jvmtiEnv *jvmti) { +#ifdef ENABLE_HEAP_SAMPLING + jvmtiCapabilities caps; + memset(&caps, 0, sizeof(caps)); + if (jvmti->GetPotentialCapabilities(&caps) != JVMTI_ERROR_NONE) { + LOG(WARNING) << "Failed to get potential capabilities, disabling the heap " + << "sampling monitor"; + return false; + } + + // If ever this was run with a JDK before JDK11, it would not set this bit as + // it was added at the end of the structure. Therefore this is a cheap way to + // check for a runtime "are we running with JDK11+". + return caps.can_generate_sampled_object_alloc_events + && caps.can_generate_garbage_collection_events; +#else + return false; +#endif +} + +void HeapMonitor::AddCallback(jvmtiEventCallbacks *callbacks) { +#ifdef ENABLE_HEAP_SAMPLING + callbacks->SampledObjectAlloc = &SampledObjectAlloc; + callbacks->GarbageCollectionFinish = &GarbageCollectionFinish; +#endif +} + +// Currently, we enable once and forget about it. +bool HeapMonitor::Enable(jvmtiEnv *jvmti, JNIEnv* jni, int sampling_interval) { +#ifdef ENABLE_HEAP_SAMPLING + if (!Supported(jvmti)) { + LOG(INFO) << "Heap sampling is not supported by the JVM, disabling the " + << " heap sampling monitor"; + return false; + } + + jvmtiCapabilities caps; + memset(&caps, 0, sizeof(caps)); + // Get line numbers, sample events, and filename for the tests. + caps.can_get_line_numbers = 1; + caps.can_get_source_file_name = 1; + caps.can_generate_sampled_object_alloc_events = 1; + caps.can_generate_garbage_collection_events = 1; + + if (jvmti->AddCapabilities(&caps) != JVMTI_ERROR_NONE) { + LOG(WARNING) << "Failed to add capabilities, disabling the heap " + << "sampling monitor"; + return false; + } + + if (jvmti->SetHeapSamplingInterval(sampling_interval) != JVMTI_ERROR_NONE) { + LOG(WARNING) << "Failed to set the heap sampling interval, disabling the " + << "heap sampling monitor"; + return false; + } + + jvmti_.store(jvmti); + sampling_interval_.store(sampling_interval); + + if (!GetInstance()->CreateGCWaitingThread(jvmti, jni)) { + return false; + } + + if (jvmti->SetEventNotificationMode(JVMTI_ENABLE, + JVMTI_EVENT_SAMPLED_OBJECT_ALLOC, + nullptr) != JVMTI_ERROR_NONE) { + LOG(WARNING) << "Failed to enable sampled object alloc event, disabling the" + << " heap sampling monitor"; + return false; + } + + if (jvmti->SetEventNotificationMode(JVMTI_ENABLE, + JVMTI_EVENT_GARBAGE_COLLECTION_FINISH, + nullptr) != JVMTI_ERROR_NONE) { + jvmti->SetEventNotificationMode(JVMTI_DISABLE, + JVMTI_EVENT_SAMPLED_OBJECT_ALLOC, + nullptr); + LOG(WARNING) << "Failed to enable garbage collection finish event, " + << "disabling the heap sampling monitor"; + return false; + } + + return true; +#else + return false; +#endif +} + +void HeapMonitor::Disable() { +#ifdef ENABLE_HEAP_SAMPLING + jvmtiEnv *jvmti = jvmti_.load(); + if (!jvmti) { + return; + } + + jvmti->SetEventNotificationMode(JVMTI_DISABLE, + JVMTI_EVENT_SAMPLED_OBJECT_ALLOC, nullptr); + jvmti->SetEventNotificationMode(JVMTI_DISABLE, + JVMTI_EVENT_GARBAGE_COLLECTION_FINISH, + nullptr); + jvmti_.store(nullptr); + + // Notify the agent thread that we are done. + google::javaprofiler::HeapMonitor::GetInstance()->NotifyGCWaitingThread(); + +#else + // Do nothing: we never enabled ourselves. +#endif +} + +std::unique_ptr HeapMonitor::GetHeapProfiles( + JNIEnv* env, bool force_gc) { +#ifdef ENABLE_HEAP_SAMPLING + // Note: technically this means that you cannot disable the sampler and then + // get the profile afterwards; this could be changed if needed. + if (jvmti_) { + return GetInstance()->storage_.GetHeapProfiles(env, sampling_interval_, + force_gc); + } +#endif + return EmptyHeapProfile(env); +} + +std::unique_ptr +HeapMonitor::GetGarbageHeapProfiles(JNIEnv* env, bool force_gc) { +#ifdef ENABLE_HEAP_SAMPLING + // Note: technically this means that you cannot disable the sampler and then + // get the profile afterwards; this could be changed if needed. + if (jvmti_) { + return GetInstance()->storage_.GetGarbageHeapProfiles( + env, sampling_interval_, force_gc); + } +#endif + return EmptyHeapProfile(env); +} + +std::unique_ptr HeapMonitor::EmptyHeapProfile( + JNIEnv *jni_env) { + return ProfileProtoBuilder::ForHeap(jni_env, jvmti_, sampling_interval_) + ->CreateProto(); +} + +void HeapMonitor::NotifyGCWaitingThreadInternal() { + std::unique_lock lock(gc_waiting_mutex_); + gc_notified_ = true; + gc_waiting_cv_.notify_all(); +} + +void HeapMonitor::WaitForGC() { + std::unique_lock lock(gc_waiting_mutex_); + gc_notified_ = false; + + // If we are woken up without having been notified, just go back to sleep. + gc_waiting_cv_.wait(lock, [this] { return gc_notified_; } ); +} + +void HeapMonitor::GCWaitingThread(jvmtiEnv* jvmti_env, JNIEnv* jni_env, + void* arg) { + GetInstance()->GCWaitingThreadRun(jni_env); +} + +void HeapMonitor::GCWaitingThreadRun(JNIEnv* jni_env) { + while (true) { + WaitForGC(); + + // Was the heap monitor disabled? + if (!Enabled()) { + break; + } + + CompactData(jni_env); + } + + LOG(INFO) << "Heap sampling GC waiting thread finished"; +} + +void HeapMonitor::CompactData(JNIEnv* jni_env) { + storage_.CompactSamples(jni_env); +} + +} // namespace javaprofiler +} // namespace google diff --git a/third_party/javaprofiler/heap_sampler.h b/third_party/javaprofiler/heap_sampler.h new file mode 100644 index 000000000..e99088f0e --- /dev/null +++ b/third_party/javaprofiler/heap_sampler.h @@ -0,0 +1,205 @@ +/* + * Copyright 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef THIRD_PARTY_JAVAPROFILER_HEAP_SAMPLER_H_ +#define THIRD_PARTY_JAVAPROFILER_HEAP_SAMPLER_H_ + +#include +#include + +#include // NOLINT +#include +#include // NOLINT +#include + +#include "third_party/javaprofiler/globals.h" +#include "third_party/javaprofiler/profile_proto_builder.h" + +namespace google { +namespace javaprofiler { + +// Storage for the sampled heap objects recorded from the heap sampling JVMTI +// API callbacks. +class HeapEventStorage { + public: + HeapEventStorage(jvmtiEnv *jvmti, ProfileFrameCache *cache = nullptr, + int max_garbage_size = 200); + + // TODO: establish correct shutdown sequence: how do we ensure that + // things are not going to go awfully wrong at shutdown, is it this class' job + // or should it be the owner of this class' instance's job? + + // Adds an object to the storage system. + void Add(JNIEnv *jni, jthread thread, jobject object, jclass klass, + jlong size); + + // Returns a perftools::profiles::Profile with the objects stored via + // calls to the Add method. + // force_gc provides a means to force GC before returning the sampled heap + // profiles; + // setting force_gc to true has a performance impact and is discouraged. + std::unique_ptr GetHeapProfiles( + JNIEnv *env, int sampling_interval, bool force_gc = false) { + return GetProfiles(env, sampling_interval, force_gc, true); + } + + // Returns a perftools::profiles::Profile with the objects that have been + // GC'd. + // force_gc provides a means to force GC before returning the sampled heap + // profiles; + // setting force_gc to true has a performance impact and is discouraged. + std::unique_ptr GetGarbageHeapProfiles( + JNIEnv* env, int sampling_interval, bool force_gc = false) { + return GetProfiles(env, sampling_interval, force_gc, false); + } + + void CompactSamples(JNIEnv *env); + + // Not copyable or movable. + HeapEventStorage(const HeapEventStorage&) = delete; + HeapEventStorage& operator=(const HeapEventStorage&) = delete; + + private: + // A sampled heap object, defined by the object, its size, and the stack + // frame. + class HeapObjectTrace { + public: + // This object owns the jweak object parameter. It is freed when the object + // is sent to the garbage list, and the object is set to nullptr. + HeapObjectTrace(jweak object, jlong size, + std::unique_ptr> frames) + : object_(object), size_(size), frames_(std::move(frames)) {} + + void AddToProto(const std::unique_ptr &builder); + + void DeleteWeakReference(JNIEnv* env) { + env->DeleteWeakGlobalRef(object_); + object_ = nullptr; + } + + + bool IsLive(JNIEnv *env) { + // When GC collects the object, the object represented by the weak + // reference will be considered as the same object as NULL. + return !env->IsSameObject(object_, NULL); + } + + // Not copyable or movable. + HeapObjectTrace(const HeapObjectTrace&) = delete; + HeapObjectTrace& operator=(const HeapObjectTrace&) = delete; + + private: + jweak object_; + int size_; + std::unique_ptr> frames_; + }; + + std::unique_ptr GetProfiles( + JNIEnv* env, int sampling_interval, bool force_gc, bool get_live); + + // Add object to the garbage list: it uses a queue with a max size of + // max_garbage_size, provided via the constructor. + void AddToGarbage(std::unique_ptr obj); + + // Moves live objects from objects to still_live_objects; live elements from + // the objects vector are replaced with nullptr via std::move. + void MoveLiveObjects( + JNIEnv *env, std::vector> *objects, + std::vector> *still_live_objects); + + std::vector> newly_allocated_objects_; + std::vector> live_objects_; + + // Though a queue really would be nice, we need a way to iterate when + // requested. + int max_garbage_size_; + int cur_garbage_pos_; + std::vector> garbage_objects_; + + std::mutex storage_lock_; + jvmtiEnv *jvmti_; + ProfileFrameCache *cache_; +}; + +// Due to the JVMTI callback, everything here is static. +class HeapMonitor { + public: + static bool Enable(jvmtiEnv *jvmti, JNIEnv* jni, int sampling_interval); + static void Disable(); + + static bool Enabled() { return jvmti_ != nullptr; } + + // Returns a perftools::profiles::Profile with the objects provided by the + // HeapEventStorage. + static std::unique_ptr GetHeapProfiles( + JNIEnv* env, bool force_gc); + + // Returns a perftools::profiles::Profile with the GC'd objects provided by + // the HeapEventStorage. + static std::unique_ptr GetGarbageHeapProfiles( + JNIEnv* env, bool force_gc); + + static void AddSample(JNIEnv *jni_env, jthread thread, jobject object, + jclass object_klass, jlong size) { + GetInstance()->storage_.Add(jni_env, thread, object, object_klass, size); + } + + static void AddCallback(jvmtiEventCallbacks *callbacks); + + static void NotifyGCWaitingThread() { + GetInstance()->NotifyGCWaitingThreadInternal(); + } + + // Not copyable or movable. + HeapMonitor(const HeapMonitor &) = delete; + HeapMonitor &operator=(const HeapMonitor &) = delete; + + private: + HeapMonitor() : gc_notified_(false), storage_(jvmti_.load()) {} + + // We construct the heap_monitor at the first call to GetInstance, so ensure + // Enable was called at least once before to initialize jvmti_. + static HeapMonitor *GetInstance() { + static HeapMonitor heap_monitor; + return &heap_monitor; + } + + static bool Supported(jvmtiEnv* jvmti); + + bool CreateGCWaitingThread(jvmtiEnv* jvmti, JNIEnv* jni); + static void GCWaitingThread(jvmtiEnv *jvmti_env, JNIEnv *jni_env, void *arg); + void GCWaitingThreadRun(JNIEnv* jni_env); + void WaitForGC(); + void NotifyGCWaitingThreadInternal(); + + void CompactData(JNIEnv* jni_env); + + static std::unique_ptr EmptyHeapProfile( + JNIEnv *jni_env); + + static std::atomic jvmti_; + static std::atomic sampling_interval_; + + bool gc_notified_; + std::condition_variable gc_waiting_cv_; + std::mutex gc_waiting_mutex_; + HeapEventStorage storage_; +}; + +} // namespace javaprofiler +} // namespace google + +#endif // THIRD_PARTY_JAVAPROFILER_HEAP_SAMPLER_H_ diff --git a/third_party/javaprofiler/native.h b/third_party/javaprofiler/native.h index c71883376..58acab8ce 100644 --- a/third_party/javaprofiler/native.h +++ b/third_party/javaprofiler/native.h @@ -34,7 +34,7 @@ class NativeProcessInfo { explicit NativeProcessInfo(const string &procmaps_filename); struct Mapping { - uint64_t start, limit; + uint64 start, limit; string name; }; diff --git a/third_party/javaprofiler/profile_proto_builder.cc b/third_party/javaprofiler/profile_proto_builder.cc index aaea2f86c..a44593165 100644 --- a/third_party/javaprofiler/profile_proto_builder.cc +++ b/third_party/javaprofiler/profile_proto_builder.cc @@ -24,13 +24,16 @@ namespace javaprofiler { const int kCount = 0; const int kMetric = 1; -ProfileProtoBuilder::ProfileProtoBuilder(jvmtiEnv *jvmti_env, +ProfileProtoBuilder::ProfileProtoBuilder(JNIEnv *jni_env, jvmtiEnv *jvmti_env, ProfileFrameCache *native_cache, int64 sampling_rate, const SampleType &count_type, const SampleType &metric_type) - : jvmti_env_(jvmti_env), native_cache_(native_cache), - location_builder_(&builder_), sampling_rate_(sampling_rate) { + : sampling_rate_(sampling_rate), + jni_env_(jni_env), + jvmti_env_(jvmti_env), + native_cache_(native_cache), + location_builder_(&builder_) { AddSampleType(count_type); AddSampleType(metric_type); SetPeriodType(metric_type); @@ -38,7 +41,9 @@ ProfileProtoBuilder::ProfileProtoBuilder(jvmtiEnv *jvmti_env, void ProfileProtoBuilder::AddTraces(const ProfileStackTrace *traces, int num_traces) { - native_cache_->ProcessTraces(traces, num_traces); + if (native_cache_) { + native_cache_->ProcessTraces(traces, num_traces); + } for (int i = 0; i < num_traces; ++i) { AddTrace(traces[i], 1); @@ -48,7 +53,9 @@ void ProfileProtoBuilder::AddTraces(const ProfileStackTrace *traces, void ProfileProtoBuilder::AddTraces(const ProfileStackTrace *traces, const int32 *counts, int num_traces) { - native_cache_->ProcessTraces(traces, num_traces); + if (native_cache_) { + native_cache_->ProcessTraces(traces, num_traces); + } for (int i = 0; i < num_traces; ++i) { AddTrace(traces[i], counts[i]); @@ -63,7 +70,8 @@ void ProfileProtoBuilder::AddArtificialTrace(const string& name, int count, auto profile = builder_.mutable_profile(); auto sample = profile->add_sample(); sample->add_location_id(location->id()); - InitSampleValues(sample, count, count * sampling_rate); + // Move count * sampling rate to 64-bit. + InitSampleValues(sample, count, static_cast(count) * sampling_rate); } void ProfileProtoBuilder::UnsampleMetrics() { @@ -107,18 +115,18 @@ void ProfileProtoBuilder::SetPeriodType(const SampleType &metric_type) { } void ProfileProtoBuilder::UpdateSampleValues( - perftools::profiles::Sample *sample, jint count, jint size) { + perftools::profiles::Sample *sample, int64 count, int64 size) { sample->set_value(kCount, sample->value(kCount) + count); sample->set_value(kMetric, sample->value(kMetric) + size); } void ProfileProtoBuilder::InitSampleValues( - perftools::profiles::Sample *sample, jint metric) { + perftools::profiles::Sample *sample, int64 metric) { InitSampleValues(sample, 1, metric); } void ProfileProtoBuilder::InitSampleValues( - perftools::profiles::Sample *sample, jint count, jint metric) { + perftools::profiles::Sample *sample, int64 count, int64 metric) { sample->add_value(count); sample->add_value(metric); } @@ -155,7 +163,7 @@ void ProfileProtoBuilder::AddTrace(const ProfileStackTrace &trace, } void ProfileProtoBuilder::AddJavaInfo( - const google::javaprofiler::JVMPI_CallFrame &jvm_frame, + const JVMPI_CallFrame &jvm_frame, perftools::profiles::Profile *profile, perftools::profiles::Sample *sample, StackState *stack_state) { @@ -163,7 +171,7 @@ void ProfileProtoBuilder::AddJavaInfo( if (!jvm_frame.method_id) { perftools::profiles::Location *location = location_builder_.LocationFor( - "", "Unknown method", "", 0); + "", "[Unknown method]", "", 0); sample->add_location_id(location->id()); return; } @@ -173,12 +181,10 @@ void ProfileProtoBuilder::AddJavaInfo( string method_name; string signature; int line_number; - google::javaprofiler::GetStackFrameElements(jvmti_env_, jvm_frame, - &file_name, &class_name, - &method_name, &signature, - &line_number); + GetStackFrameElements(jni_env_, jvmti_env_, jvm_frame, &file_name, + &class_name, &method_name, &signature, &line_number); - ::google::javaprofiler::FixMethodParameters(&signature); + FixMethodParameters(&signature); string full_method_name = class_name + "." + method_name + signature; perftools::profiles::Location *location = location_builder_.LocationFor( @@ -187,11 +193,17 @@ void ProfileProtoBuilder::AddJavaInfo( sample->add_location_id(location->id()); } -void ProfileProtoBuilder::AddNativeInfo( - const google::javaprofiler::JVMPI_CallFrame &jvm_frame, - perftools::profiles::Profile *profile, - perftools::profiles::Sample *sample, - StackState *stack_state) { +void ProfileProtoBuilder::AddNativeInfo(const JVMPI_CallFrame &jvm_frame, + perftools::profiles::Profile *profile, + perftools::profiles::Sample *sample, + StackState *stack_state) { + if (!native_cache_) { + perftools::profiles::Location *location = location_builder_.LocationFor( + "", "[Unknown non-Java frame]", "", 0); + sample->add_location_id(location->id()); + return; + } + string function_name = native_cache_->GetFunctionName(jvm_frame); perftools::profiles::Location *location = native_cache_->GetLocation(jvm_frame, @@ -206,6 +218,21 @@ void ProfileProtoBuilder::AddNativeInfo( } } +void ContentionProfileProtoBuilder::MultiplyBySamplingRate() { + auto profile = builder_.mutable_profile(); + + for (int i = 0; i < profile->sample_size(); ++i) { + auto sample = profile->mutable_sample(i); + + auto count = sample->value(kCount); + auto metric_value = sample->value(kMetric); + + sample->set_value(kCount, static_cast(count) * sampling_rate_); + sample->set_value(kMetric, + static_cast(metric_value) * sampling_rate_); + } +} + size_t LocationBuilder::LocationInfoHash::operator()( const LocationInfo &info) const { @@ -251,9 +278,9 @@ perftools::profiles::Location *LocationBuilder::LocationFor( auto line = location->add_line(); - // TODO: Handle library name etc too... + auto simplified_name = SimplifyFunctionName(function_name); auto function_id = builder_->FunctionId( - function_name.c_str(), "", file_name.c_str(), 0); + simplified_name.c_str(), function_name.c_str(), file_name.c_str(), 0); line->set_function_id(function_id); line->set_line(line_number); @@ -335,21 +362,38 @@ double CalculateSamplingRatio(int64 rate, int64 count, int64 metric_value) { } std::unique_ptr ProfileProtoBuilder::ForHeap( - jvmtiEnv *jvmti_env, int64 sampling_rate, ProfileFrameCache *cache) { - return std::unique_ptr(new HeapProfileProtoBuilder( - jvmti_env, sampling_rate, cache)); + JNIEnv *jni_env, jvmtiEnv *jvmti_env, int64 sampling_rate, + ProfileFrameCache *cache) { + // Cache can be nullptr because the heap sampler can be using a JVMTI + // Java-only stackframe gatherer. + return std::unique_ptr( + new HeapProfileProtoBuilder(jni_env, jvmti_env, sampling_rate, cache)); +} + +std::unique_ptr ProfileProtoBuilder::ForNativeHeap( + JNIEnv *jni_env, jvmtiEnv *jvmti_env, int64 sampling_rate, + ProfileFrameCache *cache) { + assert(cache != nullptr); + return std::unique_ptr(new NativeHeapProfileProtoBuilder( + jni_env, jvmti_env, sampling_rate, cache)); } std::unique_ptr ProfileProtoBuilder::ForCpu( - jvmtiEnv *jvmti_env, int64 sampling_rate, ProfileFrameCache *cache) { + JNIEnv *jni_env, jvmtiEnv *jvmti_env, int64 sampling_rate, + ProfileFrameCache *cache) { + CHECK (cache != nullptr) + << "CPU profiles may have native frames, cache must be provided"; return std::unique_ptr( - new CpuProfileProtoBuilder(jvmti_env, sampling_rate, cache)); + new CpuProfileProtoBuilder(jni_env, jvmti_env, sampling_rate, cache)); } std::unique_ptr ProfileProtoBuilder::ForContention( - jvmtiEnv *jvmti_env, int64 sampling_rate, ProfileFrameCache *cache) { - return std::unique_ptr( - new ContentionProfileProtoBuilder(jvmti_env, sampling_rate, cache)); + JNIEnv *jni_env, jvmtiEnv *jvmti_env, int64 sampling_rate, + ProfileFrameCache *cache) { + CHECK (cache != nullptr) + << "Contention profiles may have native frames, cache must be provided"; + return std::unique_ptr(new ContentionProfileProtoBuilder( + jni_env, jvmti_env, sampling_rate, cache)); } } // namespace javaprofiler diff --git a/third_party/javaprofiler/profile_proto_builder.h b/third_party/javaprofiler/profile_proto_builder.h index a94f2e5e2..f68ab2d1c 100644 --- a/third_party/javaprofiler/profile_proto_builder.h +++ b/third_party/javaprofiler/profile_proto_builder.h @@ -17,13 +17,16 @@ #ifndef THIRD_PARTY_JAVAPROFILER_PROFILE_PROTO_BUILDER_H__ #define THIRD_PARTY_JAVAPROFILER_PROFILE_PROTO_BUILDER_H__ +#include #include + +#include #include #include +#include #include #include "perftools/profiles/proto/builder.h" -#include "third_party/java/jdk/include/jvmti.h" #include "third_party/javaprofiler/stacktrace_decls.h" namespace google { @@ -50,9 +53,8 @@ class TraceSamples { const JVMPI_CallTrace &trace2) const; }; - __gnu_cxx::hash_map - traces_; + std::unordered_map traces_; }; // Store locations previously seen so that the profile is only @@ -88,9 +90,8 @@ class LocationBuilder { perftools::profiles::Builder *builder_; - __gnu_cxx::hash_map - locations_; + std::unordered_map locations_; }; // Remember traces and use the information to create locations with native @@ -113,11 +114,11 @@ class ProfileProtoBuilder { public: virtual ~ProfileProtoBuilder() {} - // Add traces to the proto. + // Add traces to the proto. The elements of the array are copied over. void AddTraces(const ProfileStackTrace *traces, int num_traces); // Add traces to the proto, where each trace has a defined count - // of occurrences. + // of occurrences. The elements of the arrays are copied over. void AddTraces(const ProfileStackTrace *traces, const int32 *counts, int num_traces); @@ -130,14 +131,29 @@ class ProfileProtoBuilder { // this has undefined behavior. virtual std::unique_ptr CreateProto() = 0; + // Create a heap profile. + // jvmti_env can be null as well but then all calls to AddTraces will return + // unknown. + // ForHeap/ForNativeHeap is the only case where we accept a null cache since + // the heap profiles can be using JVMTI's GetStackTrace and remain in pure + // Java land frames. Other ForX methods will fail an assertion when attempting + // a nullptr cache. static std::unique_ptr ForHeap( - jvmtiEnv *jvmti_env, int64 sampling_rate, ProfileFrameCache *cache); + JNIEnv *jni_env, jvmtiEnv *jvmti_env, int64 sampling_rate, + ProfileFrameCache *cache = nullptr); + + static std::unique_ptr ForNativeHeap( + JNIEnv *jni_env, jvmtiEnv *jvmti_env, int64 sampling_rate, + ProfileFrameCache *cache = nullptr); - static std::unique_ptr ForCpu( - jvmtiEnv *jvmti_env, int64 sampling_rate, ProfileFrameCache *cache); + static std::unique_ptr ForCpu(JNIEnv *jni_env, + jvmtiEnv *jvmti_env, + int64 sampling_rate, + ProfileFrameCache *cache); static std::unique_ptr ForContention( - jvmtiEnv *jvmti_env, int64 sampling_rate, ProfileFrameCache *cache); + JNIEnv *jni_env, jvmtiEnv *jvmti_env, int64 sampling_rate, + ProfileFrameCache *cache); protected: struct SampleType { @@ -148,9 +164,11 @@ class ProfileProtoBuilder { string unit; }; - ProfileProtoBuilder(jvmtiEnv *jvmti_env, - ProfileFrameCache *native_cache, - int64 sampling_rate, + // Create the profile proto builder, if native_cache is nullptr, then no + // information about native frames can be provided. The proto buffer will then + // contain "Unknown native method" frames. + ProfileProtoBuilder(JNIEnv *env, jvmtiEnv *jvmti_env, + ProfileFrameCache *native_cache, int64 sampling_rate, const SampleType &count_type, const SampleType &metric_type); @@ -166,6 +184,7 @@ class ProfileProtoBuilder { std::unique_ptr CreateSampledProto(); perftools::profiles::Builder builder_; + int64 sampling_rate_ = 0; private: // Track progress through a stack as we traverse it, in order to determine @@ -207,28 +226,28 @@ class ProfileProtoBuilder { void AddSampleType(const SampleType &sample_type); void SetPeriodType(const SampleType &metric_type); - void InitSampleValues(perftools::profiles::Sample *sample, jint metric); - void InitSampleValues(perftools::profiles::Sample *sample, jint count, - jint metric); - void UpdateSampleValues(perftools::profiles::Sample *sample, jint count, - jint size); + void InitSampleValues(perftools::profiles::Sample *sample, int64 metric); + void InitSampleValues(perftools::profiles::Sample *sample, int64 count, + int64 metric); + void UpdateSampleValues(perftools::profiles::Sample *sample, int64 count, + int64 size); void AddTrace(const ProfileStackTrace &trace, int32 count); - void AddJavaInfo(const google::javaprofiler::JVMPI_CallFrame &jvm_frame, + void AddJavaInfo(const JVMPI_CallFrame &jvm_frame, perftools::profiles::Profile *profile, perftools::profiles::Sample *sample, StackState *stack_state); - void AddNativeInfo(const google::javaprofiler::JVMPI_CallFrame &jvm_frame, + void AddNativeInfo(const JVMPI_CallFrame &jvm_frame, perftools::profiles::Profile *profile, perftools::profiles::Sample *sample, StackState *stack_state); void UnsampleMetrics(); + JNIEnv *jni_env_; jvmtiEnv *jvmti_env_; ProfileFrameCache *native_cache_; TraceSamples trace_samples_; LocationBuilder location_builder_; - int64 sampling_rate_ = 0; }; // Computes the ratio to use to scale heap data to unsample it. @@ -241,13 +260,12 @@ double CalculateSamplingRatio(int64 rate, int64 count, int64 metric_value); class CpuProfileProtoBuilder : public ProfileProtoBuilder { public: - CpuProfileProtoBuilder(jvmtiEnv *jvmti_env, - int64 sampling_rate, - ProfileFrameCache *cache) - : ProfileProtoBuilder(jvmti_env, cache, sampling_rate, - ProfileProtoBuilder::SampleType("samples", "count"), - ProfileProtoBuilder::SampleType("cpu", - "nanoseconds")) { + CpuProfileProtoBuilder(JNIEnv *jni_env, jvmtiEnv *jvmti_env, + int64 sampling_rate, ProfileFrameCache *cache) + : ProfileProtoBuilder( + jni_env, jvmti_env, cache, sampling_rate, + ProfileProtoBuilder::SampleType("samples", "count"), + ProfileProtoBuilder::SampleType("cpu", "nanoseconds")) { builder_.mutable_profile()->set_period(sampling_rate); } @@ -261,52 +279,83 @@ class CpuProfileProtoBuilder : public ProfileProtoBuilder { class HeapProfileProtoBuilder : public ProfileProtoBuilder { public: - HeapProfileProtoBuilder(jvmtiEnv *jvmti_env, - int64 sampling_rate, - ProfileFrameCache *cache) - : ProfileProtoBuilder(jvmti_env, cache, sampling_rate, - ProfileProtoBuilder::SampleType("inuse_objects", - "count"), - ProfileProtoBuilder::SampleType("inuse_space", - "bytes")) { - } + HeapProfileProtoBuilder(JNIEnv *jni_env, jvmtiEnv *jvmti_env, + int64 sampling_rate, ProfileFrameCache *cache) + : ProfileProtoBuilder( + jni_env, jvmti_env, cache, sampling_rate, + ProfileProtoBuilder::SampleType("inuse_objects", "count"), + ProfileProtoBuilder::SampleType("inuse_space", "bytes")) {} std::unique_ptr CreateProto() override { return CreateUnsampledProto(); } protected: + // Depending on the JDK or how we got the frames (e.g. internal JVM or + // JVMTI), we might have native frames on top of the Java frames + // (basically where the JVM internally allocated the object). + // Returns the first Java frame index or 0 if none were found. int SkipTopNativeFrames(const JVMPI_CallTrace &trace) override { - for (int i = 0; i < trace.num_frames; ++i) { - if (trace.frames[i].lineno != - google::javaprofiler::kNativeFrameLineNum) { + // Skip until we see the first Java frame. + for (int i = 0; i < trace.num_frames; i++) { + if (trace.frames[i].lineno != kNativeFrameLineNum) { return i; } } - return trace.num_frames; + // All are native is weird for Java heap samples but do nothing in this + // case. + return 0; + } +}; + +class NativeHeapProfileProtoBuilder : public HeapProfileProtoBuilder { + public: + NativeHeapProfileProtoBuilder(JNIEnv *jni_env, jvmtiEnv *jvmti_env, + int64 sampling_rate, ProfileFrameCache *cache) + : HeapProfileProtoBuilder(jni_env, jvmti_env, sampling_rate, cache) {} + + protected: + // In cases of long native frames, we really only want to skip the + // frames_to_skip first frames, which would be something akin to: + // absl::base_internal::MallocHook::InvokeNewHookSlow + // absl::base_internal::MallocHook::InvokeNewHook + // calloc + int SkipTopNativeFrames(const JVMPI_CallTrace &trace) override { + // If frames_to_skip changes, change the number of frames checked against + // kNativeFrameLineNum below to check the correct number of frames. + const int frames_to_skip = 3; + return (trace.num_frames >= frames_to_skip + && trace.frames[0].lineno == kNativeFrameLineNum + && trace.frames[1].lineno == kNativeFrameLineNum + && trace.frames[2].lineno == kNativeFrameLineNum) + ? frames_to_skip : 0; } }; class ContentionProfileProtoBuilder : public ProfileProtoBuilder { public: - ContentionProfileProtoBuilder(jvmtiEnv *jvmti_env, - int64 sampling_rate, - ProfileFrameCache *cache) - : ProfileProtoBuilder(jvmti_env, cache, sampling_rate, - ProfileProtoBuilder::SampleType("contentions", - "count"), - ProfileProtoBuilder::SampleType("delay", - "microseconds")) { + ContentionProfileProtoBuilder(JNIEnv *jni_env, jvmtiEnv *jvmti_env, + int64 sampling_rate, ProfileFrameCache *cache) + : ProfileProtoBuilder( + jni_env, jvmti_env, cache, sampling_rate, + ProfileProtoBuilder::SampleType("contentions", "count"), + ProfileProtoBuilder::SampleType("delay", "microseconds")) { builder_.mutable_profile()->set_period(sampling_rate); } std::unique_ptr CreateProto() { - return CreateSampledProto(); + MultiplyBySamplingRate(); + builder_.Finalize(); + return std::unique_ptr(builder_.Consume()); } protected: int SkipTopNativeFrames(const JVMPI_CallTrace &trace) override { return 0; } + + private: + // Multiply the (count, metric) values by the sampling_rate. + void MultiplyBySamplingRate(); }; } // namespace javaprofiler diff --git a/third_party/javaprofiler/profile_test_lib.cc b/third_party/javaprofiler/profile_test_lib.cc index 2683f28cb..9d7d6de07 100644 --- a/third_party/javaprofiler/profile_test_lib.cc +++ b/third_party/javaprofiler/profile_test_lib.cc @@ -56,6 +56,10 @@ static jvmtiError GetMethodName(jvmtiEnv* jvmti, jmethodID method_id, CreateJvmtiString(jvmti, "thirdMethodName", name_str); CreateJvmtiString(jvmti, "()V", sig_str); break; + case 4: + CreateJvmtiString(jvmti, "fourthMethodName$$Lambda$42.42", name_str); + CreateJvmtiString(jvmti, "()V", sig_str); + break; default: ADD_FAILURE() << "Unknown method id in test."; } @@ -75,6 +79,9 @@ static jvmtiError GetClassSignature(jvmtiEnv* jvmti, jclass declaring_class, case 3: CreateJvmtiString(jvmti, "Lcom/google/ThirdClass;", sig_str); break; + case 4: + CreateJvmtiString(jvmti, "Lcom/google/FourthClass;", sig_str); + break; default: ADD_FAILURE() << "Unknown class id in test."; } @@ -101,6 +108,9 @@ static jvmtiError GetSourceFileName(jvmtiEnv *env, jclass klass, case 3: *source_name_ptr = strdup("ThirdClass.java"); break; + case 4: + *source_name_ptr = strdup("FourthClass.java"); + break; default: ADD_FAILURE() << "Unknown class id in test."; } @@ -130,6 +140,39 @@ static jvmtiError GetLineNumberTable(jvmtiEnv *env, jmethodID method, return JVMTI_ERROR_NONE; } +static jvmtiError GetStackTrace(jvmtiEnv* env, jthread thread, jint start_depth, + jint max_frame_count, + jvmtiFrameInfo* frame_buffer, jint* count_ptr) { + uint64 thread_num = reinterpret_cast(thread); + + if (thread_num < 0 || thread_num >= JvmProfileTestLib::GetMaxThreads()) { + *count_ptr = 0; + return JVMTI_ERROR_NONE; + } + + std::vector frames; + + switch (thread_num) { + case 0: + frames.push_back({reinterpret_cast(1), 30}); + frames.push_back({reinterpret_cast(2), 64}); + break; + case 1: + frames.push_back({reinterpret_cast(3), 128}); + break; + } + + jint count = std::min(max_frame_count, (jint) frames.size()); + memcpy(frame_buffer, &frames[0], sizeof(*frame_buffer) * count); + *count_ptr = count; + + return JVMTI_ERROR_NONE; +} + +static jvmtiError ForceGarbageCollection(jvmtiEnv* env) { + return JVMTI_ERROR_NONE; +} + struct jvmtiInterface_1_ JvmProfileTestLib::GetDispatchTable() { struct jvmtiInterface_1_ jvmti_dispatch_table; jvmti_dispatch_table.GetMethodName = &GetMethodName; @@ -139,8 +182,20 @@ struct jvmtiInterface_1_ JvmProfileTestLib::GetDispatchTable() { jvmti_dispatch_table.GetLineNumberTable = &GetLineNumberTable; jvmti_dispatch_table.Allocate = &Allocate; jvmti_dispatch_table.Deallocate = &Deallocate; + jvmti_dispatch_table.GetStackTrace = &GetStackTrace; + jvmti_dispatch_table.ForceGarbageCollection = &ForceGarbageCollection; return jvmti_dispatch_table; } +int JvmProfileTestLib::GetMaxThreads() { return 2; } + +jthread JvmProfileTestLib::GetThread(int thread_id) { + if (thread_id < 0 || thread_id >= GetMaxThreads()) { + return 0; + } + + return reinterpret_cast(thread_id); +} + } // namespace javaprofiler } // namespace google diff --git a/third_party/javaprofiler/profile_test_lib.h b/third_party/javaprofiler/profile_test_lib.h index 2f2e32881..f663ef31a 100644 --- a/third_party/javaprofiler/profile_test_lib.h +++ b/third_party/javaprofiler/profile_test_lib.h @@ -17,11 +17,30 @@ #ifndef THIRD_PARTY_JAVAPROFILER_PROFILE_TEST_H__ #define THIRD_PARTY_JAVAPROFILER_PROFILE_TEST_H__ -#include "third_party/java/jdk/include/jvmti.h" +#include + +#include "third_party/javaprofiler/profile_proto_builder.h" +#include "third_party/javaprofiler/stacktrace_decls.h" namespace google { namespace javaprofiler { +class TestProfileFrameCache : public ProfileFrameCache { + void ProcessTraces(const ProfileStackTrace *traces, int num_traces) override { + } + + perftools::profiles::Location *GetLocation( + const JVMPI_CallFrame &jvm_frame, + LocationBuilder *location_builder) override { + return &nop_; + } + + string GetFunctionName(const JVMPI_CallFrame &jvm_frame) { return ""; } + + private: + perftools::profiles::Location nop_; +}; + class JvmProfileTestLib { public: static jmethodID GetDroppedFrameMethodId() { @@ -29,6 +48,27 @@ class JvmProfileTestLib { } static struct jvmtiInterface_1_ GetDispatchTable(); + + /** + * Returns a jthread object that can be used by the various JvmProfileTestLib + * methods. It is not a real jthread object but is an identifier to a "fake" + * thread that is mocked to be at a given point in the code. + * ie. GetStackTrace will return something sane. + * + * thread_id should be an integer with 0 < thread_id < N; + * N being the value returned by GetMaxThreads. + * + * implementation dependent on the test framework. + * A return of 0 signifies the thread_id is not supported. + */ + static jthread GetThread(int thread_id); + + /** + * Returns the number of threads that are supported by GetThread and + * subsequent methods that would take a thread as argument to fake + * various threads in action. + */ + static int GetMaxThreads(); }; } // namespace javaprofiler diff --git a/third_party/javaprofiler/stacktraces.cc b/third_party/javaprofiler/stacktraces.cc index d25f727a9..48d7cc450 100644 --- a/third_party/javaprofiler/stacktraces.cc +++ b/third_party/javaprofiler/stacktraces.cc @@ -17,8 +17,6 @@ namespace google { namespace javaprofiler { -__thread JNIEnv *Accessors::env_; -__thread int64_t Accessors::attr_; ASGCTType Asgct::asgct_; std::mutex *AttributeTable::mutex_; @@ -26,23 +24,23 @@ std::unordered_map *AttributeTable::string_map_; std::vector *AttributeTable::strings_; bool AsyncSafeTraceMultiset::Add(int attr, JVMPI_CallTrace *trace) { - uint64_t hash_val = CalculateHash(attr, trace->num_frames, &trace->frames[0]); + uint64 hash_val = CalculateHash(attr, trace->num_frames, &trace->frames[0]); - active_insertions_.fetch_add(1, std::memory_order_acquire); - for (int64_t i = 0; i < MaxEntries(); i++) { - int64_t idx = (i + hash_val) % MaxEntries(); + for (int64 i = 0; i < MaxEntries(); i++) { + int64 idx = (i + hash_val) % MaxEntries(); auto &entry = traces_[idx]; - int64_t count_zero = 0; - int64_t count = entry.count.load(std::memory_order_acquire); + int64 count_zero = 0; + entry.active_updates.fetch_add(1, std::memory_order_acquire); + int64 count = entry.count.load(std::memory_order_acquire); switch (count) { case 0: if (entry.count.compare_exchange_weak(count_zero, kTraceCountLocked, std::memory_order_relaxed)) { // This entry is reserved, there is no danger of interacting - // with Extract, so decrement active_insertions early. - active_insertions_.fetch_add(-1, std::memory_order_release); + // with Extract, so decrement active_updates early. + entry.active_updates.fetch_sub(1, std::memory_order_release); // memcpy is not async safe - JVMPI_CallFrame *fb = frame_buffer_[idx]; + JVMPI_CallFrame *fb = entry.frame_buffer; int num_frames = trace->num_frames; for (int frame_num = 0; frame_num < num_frames; ++frame_num) { fb[frame_num].lineno = trace->frames[frame_num].lineno; @@ -51,7 +49,7 @@ bool AsyncSafeTraceMultiset::Add(int attr, JVMPI_CallTrace *trace) { entry.trace.frames = fb; entry.trace.num_frames = num_frames; entry.attr = attr; - entry.count.store(int64_t(1), std::memory_order_release); + entry.count.store(static_cast(1), std::memory_order_release); return true; } break; @@ -70,25 +68,25 @@ bool AsyncSafeTraceMultiset::Add(int attr, JVMPI_CallTrace *trace) { if (count != kTraceCountLocked && entry.count.compare_exchange_weak(count, count + 1, std::memory_order_relaxed)) { - active_insertions_.fetch_add(-1, std::memory_order_release); + entry.active_updates.fetch_sub(1, std::memory_order_release); return true; } } } + // Did nothing, but we still need storage ordering between this + // store and preceding loads. + entry.active_updates.fetch_sub(1, std::memory_order_release); } - // Did nothing, but we still need storage ordering between this - // store and preceding loads. - active_insertions_.fetch_add(-1, std::memory_order_release); return false; } -int AsyncSafeTraceMultiset::Extract(int location, int64_t *attr, int max_frames, - JVMPI_CallFrame *frames, int64_t *count) { +int AsyncSafeTraceMultiset::Extract(int location, int64 *attr, int max_frames, + JVMPI_CallFrame *frames, int64 *count) { if (location < 0 || location >= MaxEntries()) { return 0; } auto &entry = traces_[location]; - int64_t c = entry.count.load(std::memory_order_acquire); + int64 c = entry.count.load(std::memory_order_acquire); if (c <= 0) { // Unused or in process of being updated, skip for now. return 0; @@ -101,22 +99,15 @@ int AsyncSafeTraceMultiset::Extract(int location, int64_t *attr, int max_frames, c = entry.count.exchange(kTraceCountLocked, std::memory_order_acquire); *attr = entry.attr; - bool all_quiet = false; for (int i = 0; i < num_frames; ++i) { frames[i].lineno = entry.trace.frames[i].lineno; frames[i].method_id = entry.trace.frames[i].method_id; - if (all_quiet == false && - active_insertions_.load(std::memory_order_acquire) == 0) { - all_quiet = true; - } } - if (!all_quiet) { - while (active_insertions_.load(std::memory_order_acquire) != 0) { - // spin - // TODO: Introduce a limit to detect and break - // deadlock - } + while (entry.active_updates.load(std::memory_order_acquire) != 0) { + // spin + // TODO: Introduce a limit to detect and break + // deadlock } entry.count.store(0, std::memory_order_release); @@ -124,8 +115,8 @@ int AsyncSafeTraceMultiset::Extract(int location, int64_t *attr, int max_frames, return num_frames; } -void TraceMultiset::Add(int64_t attr, int num_frames, JVMPI_CallFrame *frames, - int64_t count) { +void TraceMultiset::Add(int64 attr, int num_frames, JVMPI_CallFrame *frames, + int64 count) { CallTrace t; t.attr = attr; t.frames = std::vector(frames, frames + num_frames); @@ -140,10 +131,10 @@ void TraceMultiset::Add(int64_t attr, int num_frames, JVMPI_CallFrame *frames, int HarvestSamples(AsyncSafeTraceMultiset *from, TraceMultiset *to) { int trace_count = 0; - int64_t num_traces = from->MaxEntries(); - for (int64_t i = 0; i < num_traces; i++) { + int64 num_traces = from->MaxEntries(); + for (int64 i = 0; i < num_traces; i++) { JVMPI_CallFrame frame[kMaxFramesToCapture]; - int64_t attr, count; + int64 attr, count; int num_frames = from->Extract(i, &attr, kMaxFramesToCapture, &frame[0], &count); @@ -155,10 +146,10 @@ int HarvestSamples(AsyncSafeTraceMultiset *from, TraceMultiset *to) { return trace_count; } -uint64_t CalculateHash(int64_t attr, int num_frames, +uint64 CalculateHash(int64 attr, int num_frames, const JVMPI_CallFrame *frame) { // Make hash-value - uint64_t h = attr; + uint64 h = attr; h += h << 10; h ^= h >> 6; for (int i = 0; i < num_frames; i++) { diff --git a/third_party/javaprofiler/stacktraces.h b/third_party/javaprofiler/stacktraces.h index 05427d142..9adbbec2e 100644 --- a/third_party/javaprofiler/stacktraces.h +++ b/third_party/javaprofiler/stacktraces.h @@ -33,7 +33,7 @@ namespace javaprofiler { // Maximum number of frames to store from the stack traces sampled. const int kMaxFramesToCapture = 128; -uint64_t CalculateHash(int64_t attr, int num_frames, +uint64 CalculateHash(int64 attr, int num_frames, const JVMPI_CallFrame *frame); bool Equal(int num_frames, const JVMPI_CallFrame *f1, const JVMPI_CallFrame *f2); @@ -121,8 +121,6 @@ class AsyncSafeTraceMultiset { void Reset() { memset(traces_, 0, sizeof(traces_)); - memset(frame_buffer_, 0, sizeof(frame_buffer_)); - active_insertions_ = 0; } // Add a trace to the set. If it is already present, increment its @@ -135,10 +133,10 @@ class AsyncSafeTraceMultiset { // there is no valid trace at this location. This operation is // thread safe with respect to Add() but only a single call to // Extract can be done at a time. - int Extract(int location, int64_t *attr, int max_frames, - JVMPI_CallFrame *frames, int64_t *count); + int Extract(int location, int64 *attr, int max_frames, + JVMPI_CallFrame *frames, int64 *count); - int64_t MaxEntries() const { return kMaxStackTraces; } + int64 MaxEntries() const { return kMaxStackTraces; } private: struct TraceData { @@ -146,12 +144,16 @@ class AsyncSafeTraceMultiset { // this will represent a sample label. int attr; // trace is a triple containing the JNIEnv and the individual call frames. - // The frames are stored in AsyncSafeTraceMultiset::frame_buffer_ + // The frames are stored in frame_buffer. JVMPI_CallTrace trace; + // frame_buffer is the storage for stack frames. + JVMPI_CallFrame frame_buffer[kMaxFramesToCapture]; // Number of times a trace has been encountered. // 0 indicates that the trace is unused // <0 values are reserved, used for concurrency control. - std::atomic count; + std::atomic count; + // Number of active attempts to increase the counter on the trace. + std::atomic active_updates; }; // TODO: Re-evaluate MaxStackTraces, to minimize storage @@ -160,13 +162,9 @@ class AsyncSafeTraceMultiset { static const int kMaxStackTraces = 2048; // Sentinel to use as trace count while the frames are being updated. - static const int64_t kTraceCountLocked = -1; - - // Number of calls to Add() currently in progress. - std::atomic active_insertions_; + static const int64 kTraceCountLocked = -1; TraceData traces_[kMaxStackTraces]; - JVMPI_CallFrame frame_buffer_[kMaxStackTraces][kMaxFramesToCapture]; DISALLOW_COPY_AND_ASSIGN(AsyncSafeTraceMultiset); }; @@ -179,7 +177,7 @@ class TraceMultiset { private: typedef struct { std::vector frames; - int64_t attr; + int64 attr; } CallTrace; struct CallTraceHash { @@ -201,7 +199,7 @@ class TraceMultiset { } }; - typedef std::unordered_map + typedef std::unordered_map CountMap; public: @@ -209,8 +207,8 @@ class TraceMultiset { // Add a trace to the array. If it is already in the array, // increment its count. - void Add(int64_t attr, int num_frames, JVMPI_CallFrame *frames, - int64_t count); + void Add(int64 attr, int num_frames, JVMPI_CallFrame *frames, + int64 count); typedef CountMap::iterator iterator; typedef CountMap::const_iterator const_iterator; diff --git a/third_party/javaprofiler/tags.cc b/third_party/javaprofiler/tags.cc new file mode 100644 index 000000000..65d8aaff8 --- /dev/null +++ b/third_party/javaprofiler/tags.cc @@ -0,0 +1,195 @@ +/* + * Copyright 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "third_party/javaprofiler/tags.h" + +#include +#include // NOLINT +#include + +#include "third_party/javaprofiler/globals.h" + +namespace google { +namespace javaprofiler { +namespace { + +// Protects the global variables keys and key_to_id. +std::mutex mutex; +const Tags *empty_tags = nullptr; +const AsyncRefCountedString *empty_async_string = nullptr; +// Stores all the keys. +std::vector *keys = nullptr; +// Maps key to its id. The index of a key stored in keys (called id or key_id) +// is used to retrieve a key or value quickly as it is very friendly to vectors. +std::unordered_map *key_to_id = nullptr; + +// Returns the id of the key if the key has been registered. Otherwise, it tries +// to add the key and returns its id upon success or returns -1 if there is no +// extra space. +int32_t RegisterKey(const string &key) { + std::lock_guard lock(mutex); + const auto &target = key_to_id->find(key); + if (target != key_to_id->end()) { + // Key is found and directly return its id. + return target->second; + } + if (keys->size() >= kMaxNumTags) { + // No space to register a new key. + return -1; + } + // Enough space to register a new key. + keys->emplace_back(key); + int32_t id = keys->size() - 1; + (*key_to_id)[key] = id; + return id; +} + +} // namespace + +// The key stored in tags storage aims to support GetAttribute() and +// SetAttribute(). +const char kAttrKey[] = "attr"; + +// Returns the id of the key if the key has been registered; otherwise, returns +// -1. +int32_t Tags::GetIdByKey(const string &key) { + std::lock_guard lock(mutex); + auto target = key_to_id->find(key); + if (target == key_to_id->end()) { + return -1; + } + return target->second; +} + +void Tags::AsyncSafeCopy(const Tags &from) { + for (int i = 0; i < kMaxNumTags; i++) { + values_[i].AsyncSafeCopy(from.values_[i]); + } +} + +bool Tags::Set(const string &key, const AsyncRefCountedString &value) { + // All values are stored in the vector "values_" and "key_id" (resolved by + // "key") is used to locate the right position. + int32_t key_id = RegisterKey(key); + if (key_id < 0) { + return false; + } + values_[key_id] = value; + return true; +} + +void Tags::ClearAll() { + for (auto &val : values_) { + val.Reset(); + } +} + +void Tags::AsyncSafeClearAll() { + for (auto &val : values_) { + val.AsyncSafeReset(); + } +} + +bool Tags::operator==(const Tags &other) const { + for (int i = 0; i < kMaxNumTags; i++) { + if (values_[i] != other.values_[i]) { + return false; + } + } + return true; +} + +uint64 Tags::Hash() const { + uint64 h = 0; + for (const auto &val : values_) { + h += val.Hash(); + h += h << 10; + h ^= h >> 6; + } + return h; +} + +const AsyncRefCountedString &Tags::Get(const string &key) const { + int32_t key_id = GetIdByKey(key); + if (key_id >= 0) { + return values_[key_id]; + } + return *empty_async_string; +} + +std::vector> Tags::GetAll() const { + std::lock_guard lock(mutex); + std::vector> all_pairs(keys->size()); + for (int i = 0; i < keys->size(); i++) { + all_pairs[i].first = (*keys)[i]; + all_pairs[i].second = values_[i]; + } + return all_pairs; +} + +void Tags::SetAttribute(int64 value) { + // TODO: Check whether the conversion between integer and string is a + // performance concern. + Set(kAttrKey, AsyncRefCountedString(std::to_string(value))); +} + +int64 Tags::GetAttribute() const { + const string *value = Get(kAttrKey).Get(); + if (value == nullptr) { + return 0; + } + return static_cast(std::stol(*value)); +} + +const Tags &Tags::Empty() { return *empty_tags; } + +bool Tags::Init() { + { + std::lock_guard lock(mutex); + if (empty_tags != nullptr || empty_async_string != nullptr || + keys != nullptr || key_to_id != nullptr) { + return false; + } + empty_async_string = new AsyncRefCountedString(); + keys = new std::vector(); + key_to_id = new std::unordered_map(); + empty_tags = new Tags(); + } + // Register the key "Accessors::kAttrKey" to support Accessors::SetAttribute() + // and Accessors::GetAttribute(). + RegisterKey(kAttrKey); + return true; +} + +bool Tags::Destroy() { + std::lock_guard lock(mutex); + if (empty_tags == nullptr || empty_async_string == nullptr || + keys == nullptr || key_to_id == nullptr) { + return false; + } + delete empty_tags; + empty_tags = nullptr; + delete key_to_id; + key_to_id = nullptr; + delete keys; + keys = nullptr; + delete empty_async_string; + empty_async_string = nullptr; + return true; +} + +} // namespace javaprofiler +} // namespace google diff --git a/third_party/javaprofiler/tags.h b/third_party/javaprofiler/tags.h new file mode 100644 index 000000000..0523c83a1 --- /dev/null +++ b/third_party/javaprofiler/tags.h @@ -0,0 +1,92 @@ +/* + * Copyright 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef THIRD_PARTY_JAVAPROFILER_TAGS_H_ +#define THIRD_PARTY_JAVAPROFILER_TAGS_H_ +#include +#include + +#include "third_party/javaprofiler/async_ref_counted_string.h" + +namespace google { +namespace javaprofiler { + +constexpr int kMaxNumTags = 16; + +extern const char kAttrKey[]; + +// Stores additional attributes as pairs for profiles. +class Tags { + public: + // Returns the value (in AsyncRefCountedString reference) indexed by "key". Be + // cautious that the referenced AsyncRefCountedString may change if modifiers + // (e.g., Set(), Clear()) are invoked later. + const AsyncRefCountedString &Get(const string &key) const; + // Sets the value indexed by "key" to "value". It returns true on success; + // otherwise returns false. The failure usually comes from that "key" does not + // exist in the key list while there is no space to accommodate this new + // "key". + bool Set(const string &key, const AsyncRefCountedString &value); + // Resets all values to nullptr. + void ClearAll(); + // Returns all the key-value pairs stored in this Tags. + std::vector> GetAll() const; + + // Legacy interfaces. The attribute value is stored internally indexed by a + // special key (kAttrKey). + void SetAttribute(int64 value); + int64 GetAttribute() const; + + // Async-signal-safe version of operator=(const Tags &other). It requires that + // the instance should be empty (does not refer to any string). Otherwise, it + // asserts an error. + void AsyncSafeCopy(const Tags &from); + // Async-signal-safe version of ClearAll(). It will succeed if no enty is + // removed from the AsyncRefCountedString internal string table. Otherwise, it + // asserts an error. + void AsyncSafeClearAll(); + // Async-signal-safe. + bool operator==(const Tags &other) const; + // Async-signal-safe. + uint64 Hash() const; + + // Async-signal-safe. + static const Tags &Empty(); + // Initializes the internal for key table and empty Tags and + // AsyncRefCountedString instances. Must be called after + // AsyncRefCountedString::Init(), and before using Tags to store any key-value + // pair or calling any other static method. Should only be called once, + // subsequent calls have no effect and return false. Destroy() should be + // called to free the storage. + static bool Init(); + // Frees the internal storage allocated by Init(). Must be called before + // AsyncRefCountedString::Destroy(), and after all outstanding Tags objects + // are gone. No key-value pair can be stored after Destroy() is called. + // Returns false if the storage is not currently allocated. + static bool Destroy(); + + private: + // // Returns the id of "key" if "key" is present; otherwise, returns -1. + static int32_t GetIdByKey(const string &key); + + AsyncRefCountedString values_[kMaxNumTags]; + + friend class TagsTest; +}; + +} // namespace javaprofiler +} // namespace google +#endif // THIRD_PARTY_JAVAPROFILER_TAGS_H_ diff --git a/third_party/perftools/profiles/proto/builder.cc b/third_party/perftools/profiles/proto/builder.cc index ec0eac129..fbcfd28db 100644 --- a/third_party/perftools/profiles/proto/builder.cc +++ b/third_party/perftools/profiles/proto/builder.cc @@ -18,10 +18,11 @@ #include #include -#include -#include #include #include + +#include +#include #include "glog/logging.h" #include "google/protobuf/io/gzip_stream.h" #include "google/protobuf/io/zero_copy_stream_impl.h" @@ -31,6 +32,13 @@ using google::protobuf::io::GzipOutputStream; using google::protobuf::io::FileOutputStream; using google::protobuf::RepeatedField; +namespace perftools { +namespace profiles { +typedef std::unordered_map IndexMap; +typedef std::unordered_set IndexSet; +} // namespace profiles +} // namespace perftools + namespace perftools { namespace profiles { @@ -60,7 +68,8 @@ uint64 Builder::FunctionId(const char *name, const char *system_name, int64 system_name_index = StringId(system_name); int64 file_index = StringId(file); - Function fn(name_index, system_name_index, file_index, start_line); + auto fn = + std::make_tuple(name_index, system_name_index, file_index, start_line); int64 index = profile_->function_size() + 1; const auto inserted = functions_.insert(std::make_pair(fn, index)); @@ -122,7 +131,8 @@ bool Builder::MarshalToFile(const Profile &profile, const char *filename) { // Returns a bool indicating if the profile is valid. It logs any // errors it encounters. bool Builder::CheckValid(const Profile &profile) { - std::unordered_set mapping_ids; + IndexSet mapping_ids; + mapping_ids.reserve(profile.mapping_size()); for (const auto &mapping : profile.mapping()) { const int64 id = mapping.id(); if (id != 0) { @@ -134,7 +144,8 @@ bool Builder::CheckValid(const Profile &profile) { } } - std::unordered_set function_ids; + IndexSet function_ids; + function_ids.reserve(profile.function_size()); for (const auto &function : profile.function()) { const int64 id = function.id(); if (id != 0) { @@ -146,7 +157,8 @@ bool Builder::CheckValid(const Profile &profile) { } } - std::unordered_set location_ids; + IndexSet location_ids; + location_ids.reserve(profile.location_size()); for (const auto &location : profile.location()) { const int64 id = location.id(); if (id != 0) { @@ -211,7 +223,8 @@ bool Builder::CheckValid(const Profile &profile) { // - Associates locations to the corresponding mappings. bool Builder::Finalize() { if (profile_->location_size() == 0) { - std::unordered_map address_to_id; + IndexMap address_to_id; + address_to_id.reserve(profile_->sample_size()); for (auto &sample : *profile_->mutable_sample()) { // Copy sample locations into a temp vector, and then clear and // repopulate it with the corresponding location IDs. diff --git a/third_party/perftools/profiles/proto/builder.h b/third_party/perftools/profiles/proto/builder.h index 810f6792f..e0f1915bb 100644 --- a/third_party/perftools/profiles/proto/builder.h +++ b/third_party/perftools/profiles/proto/builder.h @@ -20,7 +20,9 @@ #include #include #include + #include + namespace perftools { namespace profiles { @@ -28,8 +30,26 @@ typedef int64_t int64; typedef uint64_t uint64; typedef std::string string; -} -} +typedef std::unordered_map StringIndexMap; + +class FunctionHasher { + public: + size_t operator()(const std::tuple &f) const { + int64 hash = std::get<0>(f); + hash = hash + ((hash << 8) ^ std::get<1>(f)); + hash = hash + ((hash << 8) ^ std::get<2>(f)); + hash = hash + ((hash << 8) ^ std::get<3>(f)); + return static_cast(hash); + } +}; + +typedef std::unordered_map, int64, + FunctionHasher> + FunctionIndexMap; + +} // namespace profiles +} // namespace perftools + #include "perftools/profiles/proto/profile.pb.h" namespace perftools { @@ -100,22 +120,9 @@ class Builder { Profile *mutable_profile() { return profile_.get(); } private: - // Holds the information about a function to facilitate deduplication. - typedef std::tuple Function; - class FunctionHasher { - public: - size_t operator()(const Function &f) const { - int64 hash = std::get<0>(f); - hash = hash + ((hash << 8) ^ std::get<1>(f)); - hash = hash + ((hash << 8) ^ std::get<2>(f)); - hash = hash + ((hash << 8) ^ std::get<3>(f)); - return static_cast(hash); - } - }; - - // Hashes to deduplicate strings and functions. - std::unordered_map strings_; - std::unordered_map functions_; + // Maps to deduplicate strings and functions. + StringIndexMap strings_; + FunctionIndexMap functions_; // Actual profile being updated. std::unique_ptr profile_; diff --git a/third_party/perftools/profiles/proto/profile.proto b/third_party/perftools/profiles/proto/profile.proto index ed6fb2b83..e59e4562b 100644 --- a/third_party/perftools/profiles/proto/profile.proto +++ b/third_party/perftools/profiles/proto/profile.proto @@ -69,7 +69,7 @@ message Profile { repeated string string_table = 6; // frames with Function.function_name fully matching the following // regexp will be dropped from the samples, along with their successors. - int64 drop_frames = 7; // Index into string table. + int64 drop_frames = 7; // Index into string table. // frames with Function.function_name fully matching the following // regexp will be kept, even if it matches drop_functions. int64 keep_frames = 8; // Index into string table. @@ -81,13 +81,13 @@ message Profile { int64 time_nanos = 9; // Duration of the profile, if a duration makes sense. int64 duration_nanos = 10; - // The kind of events between sampled ocurrences. + // The kind of events between sampled occurrences. // e.g [ "cpu","cycles" ] or [ "heap","bytes" ] ValueType period_type = 11; // The number of events between sampled occurrences. int64 period = 12; // Freeform text associated to the profile. - repeated int64 comment = 13; // Indices into string table. + repeated int64 comment = 13; // Indices into string table. // Index into the string table of the type of the preferred sample // value. If unset, clients should default to the last sample value. int64 default_sample_type = 14; @@ -95,8 +95,8 @@ message Profile { // ValueType describes the semantics and measurement units of a value. message ValueType { - int64 type = 1; // Index into string table. - int64 unit = 2; // Index into string table. + int64 type = 1; // Index into string table. + int64 unit = 2; // Index into string table. } // Each Sample records values encountered in some program @@ -111,7 +111,7 @@ message Sample { // entry in Profile.sample_type. All samples must have the same // number of values, the same as the length of Profile.sample_type. // When aggregating multiple samples into a single sample, the - // result has a list of values that is the elemntwise sum of the + // result has a list of values that is the element-wise sum of the // lists of the originals. repeated int64 value = 2; // label includes additional context for this sample. It can include @@ -120,10 +120,10 @@ message Sample { } message Label { - int64 key = 1; // Index into string table + int64 key = 1; // Index into string table // At most one of the following must be present - int64 str = 2; // Index into string table + int64 str = 2; // Index into string table int64 num = 3; // Should only be present when num is present. @@ -203,12 +203,12 @@ message Function { // Unique nonzero id for the function. uint64 id = 1; // Name of the function, in human-readable form if available. - int64 name = 2; // Index into string table + int64 name = 2; // Index into string table // Name of the function, as identified by the system. // For instance, it can be a C++ mangled name. - int64 system_name = 3; // Index into string table + int64 system_name = 3; // Index into string table // Source file containing the function. - int64 filename = 4; // Index into string table + int64 filename = 4; // Index into string table // Line number in source file. int64 start_line = 5; }