From 817eaec91766cbfd01787333fe08ac12bb85de80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dan=20No=C3=A9?= Date: Thu, 27 Apr 2017 15:33:24 -0400 Subject: [PATCH 1/7] Backtrace on fatal signals. This commit adds signal handlers into main.cc, or any smaller scope as desired. For any fatal signals a backtrace is logged and then the signal is re-raised to continue with the default behavior. The backtrace logging and script are modified slightly to support all objects found and not make assumptions about the first stack frame being in the binary we care about. This is needed for reasonable output in the case of abort() or any fatal signal like SEGV that crops up inside a library function we call. --- source/exe/BUILD | 10 +++++ source/exe/main.cc | 2 + source/exe/signal_action.cc | 74 +++++++++++++++++++++++++++++++++++++ source/exe/signal_action.h | 74 +++++++++++++++++++++++++++++++++++++ source/server/backtrace.h | 30 +++++++++++++-- test/exe/BUILD | 7 ++++ test/exe/signals_test.cc | 69 ++++++++++++++++++++++++++++++++++ tools/stack_decode.py | 48 +++++++++++++++++------- 8 files changed, 298 insertions(+), 16 deletions(-) create mode 100644 source/exe/signal_action.cc create mode 100644 source/exe/signal_action.h create mode 100644 test/exe/signals_test.cc diff --git a/source/exe/BUILD b/source/exe/BUILD index ff429669ca339..779d0f6b50e9e 100644 --- a/source/exe/BUILD +++ b/source/exe/BUILD @@ -52,6 +52,7 @@ envoy_cc_library( deps = [ ":envoy_common_lib", ":hot_restart_lib", + ":sigaction_lib", ], ) @@ -70,3 +71,12 @@ envoy_cc_library( "//source/common/stats:stats_lib", ], ) + +envoy_cc_library( + name = "sigaction_lib", + srcs = ["signal_action.cc"], + hdrs = ["signal_action.h"], + deps = [ + "//source/server:backtrace_lib", + ], +) diff --git a/source/exe/main.cc b/source/exe/main.cc index 1076c3255efad..48845168f1e55 100644 --- a/source/exe/main.cc +++ b/source/exe/main.cc @@ -7,6 +7,7 @@ #include "common/stats/thread_local_store.h" #include "exe/hot_restart.h" +#include "exe/signal_action.h" #include "server/drain_manager_impl.h" #include "server/options_impl.h" @@ -34,6 +35,7 @@ class ProdComponentFactory : public ComponentFactory { } // Server int main(int argc, char** argv) { + SignalAction handle_sigs; ares_library_init(ARES_LIB_INIT_ALL); Event::Libevent::Global::initialize(); OptionsImpl options(argc, argv, Server::SharedMemory::version(), spdlog::level::warn); diff --git a/source/exe/signal_action.cc b/source/exe/signal_action.cc new file mode 100644 index 0000000000000..88dddf092690f --- /dev/null +++ b/source/exe/signal_action.cc @@ -0,0 +1,74 @@ +#include "exe/signal_action.h" + +#include + +constexpr int SignalAction::FATAL_SIGS[]; + +void SignalAction::SigHandler(int sig, siginfo_t* info, void* context) { + void* error_pc = 0; + + const ucontext_t* ucontext = reinterpret_cast(context); + if (ucontext != nullptr) { +#ifdef REG_RIP + // x86_64 + error_pc = reinterpret_cast(ucontext->uc_mcontext.gregs[REG_RIP]); +#elif defined(REG_EIP) + // x86 Classic - not tested + error_pc = reinterpret_cast(ucontext->uc_mcontext.gregs[REG_EIP]); +#elif defined(__arm__) + // ARM - not tested + error_pc = reinterpret_cast(ucontext->uc_mcontext.arm_pc); +#elif defined(__ppc__) + // PPC - not tested + error_pc = reinterpret_cast(ucontext->uc_mcontext.regs->nip); +#else +#warning "Cannot determine PC location in machine context for your architecture" +#endif + } + + BackwardsTrace tracer(true); + tracer.LogFault(strsignal(sig), info->si_addr); + if (error_pc != 0) { + tracer.CaptureFrom(error_pc); + } else { + tracer.Capture(); + } + tracer.Log(); + + signal(sig, SIG_DFL); + raise(sig); +} + +void SignalAction::InstallSigHandlers() { + stack_t stack; + stack.ss_sp = altstack_.get(); + stack.ss_size = ALTSTACK_SIZE; + stack.ss_flags = 0; + + if (sigaltstack(&stack, nullptr) < 0) { + // Failed sigaltstack. Just print the error message. + // We can still try to backtrace, but it might be clobbered. + std::cerr << "Nonfatal error: Failed to set up alternate signal stack: " << strerror(errno) + << std::endl; + } + + for (const auto& sig : FATAL_SIGS) { + struct sigaction saction; + std::memset(&saction, 0, sizeof(saction)); + sigemptyset(&saction.sa_mask); + saction.sa_flags = (SA_SIGINFO | SA_ONSTACK | SA_RESETHAND | SA_NODEFER); + saction.sa_sigaction = SigHandler; + if (sigaction(sig, &saction, nullptr) < 0) { + // Failed sigaltstack. Just print the error message. + // Auto backtraces won't work, but we can still start up. + std::cerr << "Nonfatal error: Failed to set up signal action for signal " << sig << ": " + << strerror(errno) << std::endl; + } + } +} + +void SignalAction::RemoveSigHandlers() const { + for (const auto& sig : FATAL_SIGS) { + signal(sig, SIG_DFL); + } +} diff --git a/source/exe/signal_action.h b/source/exe/signal_action.h new file mode 100644 index 0000000000000..a99afeb1e57d1 --- /dev/null +++ b/source/exe/signal_action.h @@ -0,0 +1,74 @@ +#pragma once + +#include + +#include "server/backtrace.h" + +/** + * This class installs signal handlers for fatal signal types. + * + * These signals are handled: + * SIGABRT + * SIGBUS + * SIGFPE + * SIGILL + * SIGSEGV + * + * Upon intercepting the signal the following actions are taken: + * + * A Backtrace is printed from the address the signal was encountered at, if + * it is possible to retrieve. + * + * The signal handler is reset to the default handler (which is expected to + * terminate the process). + * + * The signal is raised again (which ultimately kills the process) + * + * The signal handler must run on an alternative stack so that we can do the + * stack unwind on the original stack. Memory is allocated for this purpose when + * this object is constructed. When this object goes out of scope the memory is + * alternate signal stack memory is destroyed and the default signal handler is + * restored. + * + * NOTE: Existing non-default signal handlers are overridden and will not be + * restored. If this behavior is ever required it can be implemented. + * + * It is recommended that this object be instantiated at the highest possible + * scope, eg, in main(). This enables fatal signal handling for almost all code + * executed. + */ +class SignalAction { +public: + SignalAction() : altstack_(new char[ALTSTACK_SIZE]) { InstallSigHandlers(); } + ~SignalAction() { RemoveSigHandlers(); } + +private: + /** + * Allocate this many bytes for the alternate signal handling stack. + */ + static const size_t ALTSTACK_SIZE = 1024 * 1024 * 2; + /** + * This constant array defines the signals we will insert handlers for. + * + * Essentially this is the list of signals that would cause a core dump. + */ + static constexpr int FATAL_SIGS[] = {SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV}; + /** + * The actual signal handler function with prototype matching signal.h + */ + static void SigHandler(int sig, siginfo_t* info, void* context); + /** + * Install all signal handlers and setup signal handling stack. + */ + void InstallSigHandlers(); + /** + * Remove all signal handlers. + * + * Must be executed before the alt stack memory goes away. + * + * Signal handlers will be reset to the default, NOT back to any signal + * handler existing before InstallSigHandlers(). + */ + void RemoveSigHandlers() const; + std::unique_ptr altstack_; +}; diff --git a/source/server/backtrace.h b/source/server/backtrace.h index e2fe2e1f5c024..5b307b75d2666 100644 --- a/source/server/backtrace.h +++ b/source/server/backtrace.h @@ -82,6 +82,23 @@ class BackwardsTrace : Logger::Loggable { stack_trace_.load_here(MAX_STACK_DEPTH); } + /** + * Capture a stack trace from a particular address. + * + * This can be used to capture a useful stack trace from a fatal signal + * handler. + * + * @param address The stack trace will begin from this address. + */ + void CaptureFrom(void* address) { +#ifdef NDEBUG + if (!log_in_prod_) { + return; + } +#endif + stack_trace_.load_from(address, MAX_STACK_DEPTH); + } + /** * Log the stack trace. */ @@ -100,11 +117,9 @@ class BackwardsTrace : Logger::Loggable { } const auto thread_id = stack_trace_.thread_id(); - // Trick to figure out our own object file name for script to use: backward::ResolvedTrace trace = resolver.resolve(stack_trace_[0]); auto obj_name = trace.object_filename; - if (obj_name.empty()) { - obj_name = "path/to/envoy-executable"; + for (unsigned int i = 0; i < stack_trace_.size(); ++i) { } LogAtLevel("Backtrace obj<{}> thr<{}> (use tools/stack_decode.py):", obj_name, thread_id); @@ -112,11 +127,20 @@ class BackwardsTrace : Logger::Loggable { // Why start at 2? To hide the function call to backward that began the // trace. for (unsigned int i = 0; i < stack_trace_.size(); ++i) { + backward::ResolvedTrace trace = resolver.resolve(stack_trace_[i]); + if (trace.object_filename != obj_name) { + obj_name = trace.object_filename; + LogAtLevel("thr<{}> obj<{}>", thread_id, obj_name); + } LogAtLevel("thr<{}> #{} {}", thread_id, stack_trace_[i].idx, stack_trace_[i].addr); } LogAtLevel("end backtrace thread {}", stack_trace_.thread_id()); } + void LogFault(const char* signame, const void* addr) { + LogAtLevel("Caught {}, suspect faulting address {}", signame, addr); + } + private: template void LogAtLevel(T format_str, Args... args) { if (log_in_prod_) { diff --git a/test/exe/BUILD b/test/exe/BUILD index 9b2d36eaac2c5..73c9b0f554fb5 100644 --- a/test/exe/BUILD +++ b/test/exe/BUILD @@ -1,5 +1,6 @@ load( "//bazel:envoy_build_system.bzl", + "envoy_cc_test", "envoy_package", ) @@ -13,3 +14,9 @@ sh_test( # test when doing ASAN. tags = ["no_asan"], ) + +envoy_cc_test( + name = "signals_test", + srcs = ["signals_test.cc"], + deps = ["//source/exe:sigaction_lib"], +) diff --git a/test/exe/signals_test.cc b/test/exe/signals_test.cc new file mode 100644 index 0000000000000..d887ad9e1b3ab --- /dev/null +++ b/test/exe/signals_test.cc @@ -0,0 +1,69 @@ +#include +#include + +#include "exe/signal_action.h" + +#include "gtest/gtest.h" + +TEST(Signals, InvalidAddressDeathTest) { + SignalAction actions; + EXPECT_DEATH([]() -> void { + // Oooooops! + volatile int* nasty_ptr = reinterpret_cast(0x0); + *(nasty_ptr) = 0; + }(), "Segmentation fault"); +} + +#if defined(__x86_64__) || defined(__i386__) +// Unfortunately we don't have a reliable way to do this on other platforms +TEST(Signals, IllegalInstructionDeathTest) { + SignalAction actions; + EXPECT_DEATH([]() -> void { + // Intel defines the "ud2" opcode to be an invalid instruction: + __asm__("ud2"); + }(), "Illegal"); +} +#endif + +TEST(Signals, AbortDeathTest) { + SignalAction actions; + EXPECT_DEATH([]() -> void { abort(); }(), "Aborted"); +} + +TEST(Signals, BadMathDeathTest) { + SignalAction actions; + EXPECT_DEATH([]() -> void { + // It turns out to be really hard to not have the optimizer get rid of a + // division by zero. Just raise the signal for this test. + raise(SIGFPE); + }(), "Floating point"); +} + +TEST(Signals, BusDeathTest) { + SignalAction actions; + EXPECT_DEATH([]() -> void { + // Bus error is tricky. There's one way that can work on POSIX systems + // described below but it depends on mmaping a file. Just make it easy and + // raise a bus. + // + // FILE *f = tmpfile(); + // int *p = mmap(0, 4, PROT_WRITE, MAP_PRIVATE, fileno(f), 0); + // *p = 0; + raise(SIGBUS); + }(), "Bus"); +} + +TEST(Signals, LegalTest) { + // Don't do anything wrong. + { SignalAction actions; } + // Nothing should happen... +} + +TEST(Signals, RaiseNonFatalTest) { + { + SignalAction actions; + // I urgently request that you do nothing please! + raise(SIGURG); + } + // Nothing should happen... +} diff --git a/tools/stack_decode.py b/tools/stack_decode.py index b7976e1373efd..545ab8f991166 100755 --- a/tools/stack_decode.py +++ b/tools/stack_decode.py @@ -17,8 +17,8 @@ import subprocess import sys -Backtrace = collections.namedtuple("Backtrace", - "log_prefix obj_file address_list") +Backtrace = collections.namedtuple("Backtrace", "log_prefix obj_list") +AddressList = collections.namedtuple("AddressList", "obj_file addresses") # Process the log output looking for stacktrace snippets, print them out once @@ -28,6 +28,7 @@ def decode_stacktrace_log(input_source): trace_begin_re = re.compile( "^(.+)\[backtrace\] Backtrace obj<(.+)> thr<(\d+)") stackaddr_re = re.compile("\[backtrace\] thr<(\d+)> #\d+ (0x[0-9a-fA-F]+)$") + new_object_re = re.compile("\[backtrace\] thr<(\d+)> obj<(.+)>$") trace_end_re = re.compile("\[backtrace\] end backtrace thread (\d+)") # build a dictionary indexed by thread_id, value is a Backtrace namedtuple @@ -39,13 +40,20 @@ def decode_stacktrace_log(input_source): begin_trace_match = trace_begin_re.search(line) if begin_trace_match: log_prefix, objfile, thread_id = begin_trace_match.groups() - traces[thread_id] = Backtrace( - log_prefix=log_prefix, obj_file=objfile, address_list=[]) + traces[thread_id] = Backtrace(log_prefix=log_prefix, obj_list=[]) + traces[thread_id].obj_list.append( + AddressList(obj_file=objfile, addresses=[])) continue stackaddr_match = stackaddr_re.search(line) if stackaddr_match: thread_id, address = stackaddr_match.groups() - traces[thread_id].address_list.append(address) + traces[thread_id].obj_list[-1].addresses.append(address) + continue + new_object_match = new_object_re.search(line) + if new_object_match: + thread_id, newobj = new_object_match.groups() + traces[thread_id].obj_list.append( + AddressList(obj_file=newobj, addresses=[])) continue trace_end_match = trace_end_re.search(line) if trace_end_match: @@ -58,18 +66,32 @@ def decode_stacktrace_log(input_source): return -# Output one stacktrace after passing it through addr2line with appropriate -# options -def output_stacktrace(thread_id, traceinfo): - piped_input = "" - for stack_addr in traceinfo.address_list: - piped_input += (stack_addr + "\n") +# Execute addr2line with a particular object file and input string of addresses +# to resolve, one per line. +# +# Returns list of result lines +def run_addr2line(obj_file, piped_input): addr2line = subprocess.Popen( - ["addr2line", "-Cpisfe", traceinfo.obj_file], + ["addr2line", "-Cpisfe", obj_file], stdin=subprocess.PIPE, stdout=subprocess.PIPE) output_stdout, _ = addr2line.communicate(piped_input) - output_lines = output_stdout.split("\n") + return output_stdout.split("\n") + + +# Output one stacktrace after passing it through addr2line with appropriate +# options +def output_stacktrace(thread_id, traceinfo): + output_lines = [] + for address_list in traceinfo.obj_list: + piped_input = "" + obj_name = address_list.obj_file + # end of stack sentinel + if obj_name == "[0xffffffffffffffff]": + break + for stack_addr in address_list.addresses: + piped_input += (stack_addr + "\n") + output_lines += run_addr2line(obj_name, piped_input) resolved_stack_frames = enumerate(output_lines, start=1) sys.stdout.write("%s Backtrace (most recent call first) from thread %s:\n" % From 2c81ad2b92f4761bc0b1fd247b099fd03391ff93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dan=20No=C3=A9?= Date: Fri, 28 Apr 2017 14:10:36 -0400 Subject: [PATCH 2/7] Review comments and improvements. Switched to using mmap with mprotect()ed guard pages for the alternate signal stack. Added test code for guarded stack. Fail harder if we fail normal POSIX commands sigaction, mmap, etc. Default signal capturing behavior can be inhibited with "--define=signal_trace=disabled" build option Inhibited some tests under ASAN where the ASAN signal grabbing behavior render the test pointless and failing. tools/stack_decode.py now properly passes the return code back so test don't magically pass when --run_under Docs++ --- bazel/BUILD | 5 +++ bazel/README.md | 17 ++++++--- bazel/envoy_build_system.bzl | 3 ++ source/exe/BUILD | 1 + source/exe/main.cc | 3 ++ source/exe/signal_action.cc | 67 ++++++++++++++++++++++++++++++------ source/exe/signal_action.h | 47 ++++++++++++++++++++----- source/server/backtrace.h | 18 +++++----- test/exe/signals_test.cc | 67 +++++++++++++++++++++++++----------- tools/stack_decode.py | 2 ++ 10 files changed, 177 insertions(+), 53 deletions(-) diff --git a/bazel/BUILD b/bazel/BUILD index 64474a35ddb95..fe7127c5e855d 100644 --- a/bazel/BUILD +++ b/bazel/BUILD @@ -24,3 +24,8 @@ config_setting( name = "disable_tcmalloc", values = {"define": "tcmalloc=disabled"}, ) + +config_setting( + name = "disable_signal_trace", + values = {"define": "signal_trace=disabled"}, +) diff --git a/bazel/README.md b/bazel/README.md index 1be74a6c38424..517aa4e1f4016 100644 --- a/bazel/README.md +++ b/bazel/README.md @@ -93,11 +93,11 @@ bazel test //test/common/http:async_client_impl_test --strategy=TestRunner=stand ``` # Stack trace symbol resolution -Envoy can produce backtraces on demand or from assertions and other activity. -The stack traces written in the log or to stderr contain addresses rather than -resolved symbols. The `tools/stack_decode.py` script exists to process the output -and do symbol resolution to make the stack traces useful. Any log lines not -relevant to the backtrace capability are passed through the script unchanged +Envoy can produce backtraces on demand and from assertions and other fatal +actions like segfaults. The stack traces written in the log or to stderr contain +addresses rather than resolved symbols. The `tools/stack_decode.py` script exists +to process the output and do symbol resolution to make the stack traces useful. Any +log lines not relevant to the backtrace capability are passed through the script unchanged (it acts like a filter). The script runs in one of two modes. If passed no arguments it anticipates @@ -116,6 +116,13 @@ bazel test -c dbg //test/server:backtrace_test You will need to use either a `dbg` build type or the `--define debug_symbols=yes` option to get symbol information in the binaries. +By default main.cc will install signal handlers to print backtraces at the +location where a fatal signal occurred. The signal handler will re-raise the +fatal signal with the default handler so a core file will still be dumped after +the stack trace is logged. To inhibit this behavior use +`--define=signal_trace=disabled` on the Bazel command line. No signal handlers will +be installed. + # Running a single Bazel test under GDB ``` diff --git a/bazel/envoy_build_system.bzl b/bazel/envoy_build_system.bzl index 15c0331501259..b1d7bcbf2f167 100644 --- a/bazel/envoy_build_system.bzl +++ b/bazel/envoy_build_system.bzl @@ -21,6 +21,9 @@ def envoy_copts(repository, test = False): }) + select({ repository + "//bazel:disable_tcmalloc": [], "//conditions:default": ["-DTCMALLOC"], + }) + select({ + repository + "//bazel:disable_signal_trace": [], + "//conditions:default": ["-DENVOY_HANDLE_SIGNALS"], }) # References to Envoy external dependencies should be wrapped with this function. diff --git a/source/exe/BUILD b/source/exe/BUILD index 779d0f6b50e9e..ffa5f0431e29b 100644 --- a/source/exe/BUILD +++ b/source/exe/BUILD @@ -78,5 +78,6 @@ envoy_cc_library( hdrs = ["signal_action.h"], deps = [ "//source/server:backtrace_lib", + "//source/common/common:assert_lib", ], ) diff --git a/source/exe/main.cc b/source/exe/main.cc index 48845168f1e55..f96941a6c85ad 100644 --- a/source/exe/main.cc +++ b/source/exe/main.cc @@ -35,7 +35,10 @@ class ProdComponentFactory : public ComponentFactory { } // Server int main(int argc, char** argv) { +#ifdef ENVOY_HANDLE_SIGNALS + // Enabled by default. Control with "bazel --define=signal_trace=disabled" SignalAction handle_sigs; +#endif ares_library_init(ARES_LIB_INIT_ALL); Event::Libevent::Global::initialize(); OptionsImpl options(argc, argv, Server::SharedMemory::version(), spdlog::level::warn); diff --git a/source/exe/signal_action.cc b/source/exe/signal_action.cc index 88dddf092690f..307f16452c746 100644 --- a/source/exe/signal_action.cc +++ b/source/exe/signal_action.cc @@ -1,6 +1,8 @@ +#include "common/common/assert.h" #include "exe/signal_action.h" #include +#include constexpr int SignalAction::FATAL_SIGS[]; @@ -14,13 +16,16 @@ void SignalAction::SigHandler(int sig, siginfo_t* info, void* context) { error_pc = reinterpret_cast(ucontext->uc_mcontext.gregs[REG_RIP]); #elif defined(REG_EIP) // x86 Classic - not tested - error_pc = reinterpret_cast(ucontext->uc_mcontext.gregs[REG_EIP]); + // error_pc = reinterpret_cast(ucontext->uc_mcontext.gregs[REG_EIP]); +#warning "Please enable and test x86_32 pc retrieval code in signal_action.cc" #elif defined(__arm__) // ARM - not tested - error_pc = reinterpret_cast(ucontext->uc_mcontext.arm_pc); + // error_pc = reinterpret_cast(ucontext->uc_mcontext.arm_pc); +#warning "Please enable and test ARM pc retrieval code in signal_action.cc" #elif defined(__ppc__) // PPC - not tested - error_pc = reinterpret_cast(ucontext->uc_mcontext.regs->nip); + // error_pc = reinterpret_cast(ucontext->uc_mcontext.regs->nip); +#warning "Please enable and test PPC pc retrieval code in signal_action.cc" #else #warning "Cannot determine PC location in machine context for your architecture" #endif @@ -41,15 +46,14 @@ void SignalAction::SigHandler(int sig, siginfo_t* info, void* context) { void SignalAction::InstallSigHandlers() { stack_t stack; - stack.ss_sp = altstack_.get(); - stack.ss_size = ALTSTACK_SIZE; + stack.ss_sp = altstack_ + GUARD_SIZE; // Guard page at one end ... + stack.ss_size = ALTSTACK_SIZE; // ... guard page at the other stack.ss_flags = 0; if (sigaltstack(&stack, nullptr) < 0) { - // Failed sigaltstack. Just print the error message. - // We can still try to backtrace, but it might be clobbered. - std::cerr << "Nonfatal error: Failed to set up alternate signal stack: " << strerror(errno) + std::cerr << "Failed to set up alternate signal stack: " << strerror(errno) << std::endl; + RELEASE_ASSERT(false); } for (const auto& sig : FATAL_SIGS) { @@ -59,10 +63,9 @@ void SignalAction::InstallSigHandlers() { saction.sa_flags = (SA_SIGINFO | SA_ONSTACK | SA_RESETHAND | SA_NODEFER); saction.sa_sigaction = SigHandler; if (sigaction(sig, &saction, nullptr) < 0) { - // Failed sigaltstack. Just print the error message. - // Auto backtraces won't work, but we can still start up. - std::cerr << "Nonfatal error: Failed to set up signal action for signal " << sig << ": " + std::cerr << "Failed to set up signal action for signal " << sig << ": " << strerror(errno) << std::endl; + RELEASE_ASSERT(false); } } } @@ -72,3 +75,45 @@ void SignalAction::RemoveSigHandlers() const { signal(sig, SIG_DFL); } } + +void SignalAction::MapAndProtectStackMemory() { + // Per docs MAP_STACK doesn't actually do anything today but provides a + // library hint that might be used in the future. + altstack_ = static_cast(mmap(nullptr, MapSizeWithGuards(), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0)); + RELEASE_ASSERT(altstack_); + if (mprotect(altstack_, GUARD_SIZE, PROT_NONE) < 0) { + std::cerr << "Failed to protect signal stack memory: " << strerror(errno) << std::endl; + RELEASE_ASSERT(false); + } + if (mprotect(altstack_ + GUARD_SIZE + ALTSTACK_SIZE, GUARD_SIZE, PROT_NONE) < 0) { + std::cerr << "Failed to protect signal stack memory: " << strerror(errno) << std::endl; + RELEASE_ASSERT(false); + } + +} + +void SignalAction::UnmapStackMemory() { + if (altstack_ != nullptr) { + munmap(altstack_, MapSizeWithGuards()); + } +} + +void SignalAction::DoGoodAccessForTest() { + for (size_t i = 0; i < ALTSTACK_SIZE; ++i) { + *(altstack_ + GUARD_SIZE + i) = 42; + } + for (size_t i = 0; i < ALTSTACK_SIZE; ++i) { + ASSERT(*(altstack_ + GUARD_SIZE + i) == 42); + } +} + +void SignalAction::TryEvilAccessForTest(bool end) { + if (end) { + // One byte past the valid region + // http://oeis.org/A001969 + *(altstack_ + GUARD_SIZE + ALTSTACK_SIZE) = 43; + } else { + // One byte before the valid region + *(altstack_ + GUARD_SIZE - 1) = 43; + } +} diff --git a/source/exe/signal_action.h b/source/exe/signal_action.h index a99afeb1e57d1..abe352e9fd403 100644 --- a/source/exe/signal_action.h +++ b/source/exe/signal_action.h @@ -26,9 +26,9 @@ * * The signal handler must run on an alternative stack so that we can do the * stack unwind on the original stack. Memory is allocated for this purpose when - * this object is constructed. When this object goes out of scope the memory is - * alternate signal stack memory is destroyed and the default signal handler is - * restored. + * this object is constructed. When this object goes out of scope the memory + * used for the alternate signal stack is destroyed and the default signal handler + * is restored. * * NOTE: Existing non-default signal handlers are overridden and will not be * restored. If this behavior is ever required it can be implemented. @@ -39,20 +39,38 @@ */ class SignalAction { public: - SignalAction() : altstack_(new char[ALTSTACK_SIZE]) { InstallSigHandlers(); } - ~SignalAction() { RemoveSigHandlers(); } - + SignalAction() : altstack_(nullptr) { MapAndProtectStackMemory(); InstallSigHandlers(); } + SignalAction(const SignalAction&) = delete; + ~SignalAction() { RemoveSigHandlers(); UnmapStackMemory(); } + /** + * Helpers for testing guarded stack memory + */ + void DoGoodAccessForTest(); + void TryEvilAccessForTest(bool end); private: /** - * Allocate this many bytes for the alternate signal handling stack. + * Use this many bytes for the alternate signal handling stack. + * + * This should be a multiple of 4096. + * Additionally, two guard pages will be allocated to bookend the usable area. + */ + static const size_t ALTSTACK_SIZE = 4096*4; + /** + * Allocate this many bytes on each side of the area used for alt stack. + * + * The memory will be protected from read and write. */ - static const size_t ALTSTACK_SIZE = 1024 * 1024 * 2; + static const size_t GUARD_SIZE = 4096; /** * This constant array defines the signals we will insert handlers for. * * Essentially this is the list of signals that would cause a core dump. */ static constexpr int FATAL_SIGS[] = {SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV}; + /** + * Return the memory size we actually map including two guard pages. + */ + static constexpr size_t MapSizeWithGuards() { return ALTSTACK_SIZE+GUARD_SIZE*2; } /** * The actual signal handler function with prototype matching signal.h */ @@ -70,5 +88,16 @@ class SignalAction { * handler existing before InstallSigHandlers(). */ void RemoveSigHandlers() const; - std::unique_ptr altstack_; + /** + * Use mmap to map anonymous memory for the alternative stack. + * + * GUARD_SIZE on either end of the memory will be marked PROT_NONE, protected + * from all access. + */ + void MapAndProtectStackMemory(); + /** + * Unmap alternative stack memory. + */ + void UnmapStackMemory(); + char* altstack_; }; diff --git a/source/server/backtrace.h b/source/server/backtrace.h index 5b307b75d2666..127837fc52331 100644 --- a/source/server/backtrace.h +++ b/source/server/backtrace.h @@ -117,20 +117,22 @@ class BackwardsTrace : Logger::Loggable { } const auto thread_id = stack_trace_.thread_id(); - backward::ResolvedTrace trace = resolver.resolve(stack_trace_[0]); - auto obj_name = trace.object_filename; - for (unsigned int i = 0; i < stack_trace_.size(); ++i) { - } + backward::ResolvedTrace first_frame_trace = resolver.resolve(stack_trace_[0]); + auto obj_name = first_frame_trace.object_filename; LogAtLevel("Backtrace obj<{}> thr<{}> (use tools/stack_decode.py):", obj_name, thread_id); // Why start at 2? To hide the function call to backward that began the // trace. for (unsigned int i = 0; i < stack_trace_.size(); ++i) { - backward::ResolvedTrace trace = resolver.resolve(stack_trace_[i]); - if (trace.object_filename != obj_name) { - obj_name = trace.object_filename; - LogAtLevel("thr<{}> obj<{}>", thread_id, obj_name); + // Backtrace gets tagged by ASAN when we try the object name resolution for the last + // frame on stack, so skip the last one. It has no useful info anyway. + if (i + 1 != stack_trace_.size()) { + backward::ResolvedTrace trace = resolver.resolve(stack_trace_[i]); + if (trace.object_filename != obj_name) { + obj_name = trace.object_filename; + LogAtLevel("thr<{}> obj<{}>", thread_id, obj_name); + } } LogAtLevel("thr<{}> #{} {}", thread_id, stack_trace_[i].idx, stack_trace_[i].addr); } diff --git a/test/exe/signals_test.cc b/test/exe/signals_test.cc index d887ad9e1b3ab..f0a78406e3fb3 100644 --- a/test/exe/signals_test.cc +++ b/test/exe/signals_test.cc @@ -5,6 +5,21 @@ #include "gtest/gtest.h" +#if defined(__has_feature) +#if __has_feature(address_sanitizer) +#define ASANITIZED /* Sanitized by Clang */ +#endif +#endif + +#if defined(__SANITIZE_ADDRESS__) +#define ASANITIZED /* Sanitized by GCC */ +#endif + +// Memory violation signal tests are disabled under address sanitizer. The +// sanitizer does its own special signal handling and prints messages that are +// not ours instead of what this test expects. The signals special handled by ASAN +// include SIGSEGV, SIGBUS, and SIGFPE. +#ifndef ASANITIZED TEST(Signals, InvalidAddressDeathTest) { SignalAction actions; EXPECT_DEATH([]() -> void { @@ -14,6 +29,30 @@ TEST(Signals, InvalidAddressDeathTest) { }(), "Segmentation fault"); } +TEST(Signals, BusDeathTest) { + SignalAction actions; + EXPECT_DEATH([]() -> void { + // Bus error is tricky. There's one way that can work on POSIX systems + // described below but it depends on mmaping a file. Just make it easy and + // raise a bus. + // + // FILE *f = tmpfile(); + // int *p = mmap(0, 4, PROT_WRITE, MAP_PRIVATE, fileno(f), 0); + // *p = 0; + raise(SIGBUS); + }(), "Bus"); +} + +TEST(Signals, BadMathDeathTest) { + SignalAction actions; + EXPECT_DEATH([]() -> void { + // It turns out to be really hard to not have the optimizer get rid of a + // division by zero. Just raise the signal for this test. + raise(SIGFPE); + }(), "Floating point"); +} +#endif + #if defined(__x86_64__) || defined(__i386__) // Unfortunately we don't have a reliable way to do this on other platforms TEST(Signals, IllegalInstructionDeathTest) { @@ -30,27 +69,10 @@ TEST(Signals, AbortDeathTest) { EXPECT_DEATH([]() -> void { abort(); }(), "Aborted"); } -TEST(Signals, BadMathDeathTest) { - SignalAction actions; - EXPECT_DEATH([]() -> void { - // It turns out to be really hard to not have the optimizer get rid of a - // division by zero. Just raise the signal for this test. - raise(SIGFPE); - }(), "Floating point"); -} - -TEST(Signals, BusDeathTest) { +TEST(Signals, IllegalStackAccessDeathTest) { SignalAction actions; - EXPECT_DEATH([]() -> void { - // Bus error is tricky. There's one way that can work on POSIX systems - // described below but it depends on mmaping a file. Just make it easy and - // raise a bus. - // - // FILE *f = tmpfile(); - // int *p = mmap(0, 4, PROT_WRITE, MAP_PRIVATE, fileno(f), 0); - // *p = 0; - raise(SIGBUS); - }(), "Bus"); + EXPECT_DEATH(actions.TryEvilAccessForTest(false), ""); + EXPECT_DEATH(actions.TryEvilAccessForTest(true), ""); } TEST(Signals, LegalTest) { @@ -67,3 +89,8 @@ TEST(Signals, RaiseNonFatalTest) { } // Nothing should happen... } + +TEST(Signals, LegalStackAccessTest) { + SignalAction actions; + actions.DoGoodAccessForTest(); +} diff --git a/tools/stack_decode.py b/tools/stack_decode.py index 545ab8f991166..1c08b54616f99 100755 --- a/tools/stack_decode.py +++ b/tools/stack_decode.py @@ -105,6 +105,8 @@ def output_stacktrace(thread_id, traceinfo): rununder = subprocess.Popen( sys.argv[1:], stdout=subprocess.PIPE, stderr=subprocess.PIPE) decode_stacktrace_log(rununder.stderr) + rununder.wait() + sys.exit(rununder.returncode) # Pass back test pass/fail result else: decode_stacktrace_log(sys.stdin) sys.exit(0) From 942c63ae39f0b977e47b83a2317edea634b68d0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dan=20No=C3=A9?= Date: Fri, 28 Apr 2017 14:30:54 -0400 Subject: [PATCH 3/7] Fix_format Now with fix_format actually run... --- source/exe/BUILD | 2 +- source/exe/signal_action.cc | 40 ++++++++++++++++++------------------- source/exe/signal_action.h | 15 ++++++++++---- 3 files changed, 32 insertions(+), 25 deletions(-) diff --git a/source/exe/BUILD b/source/exe/BUILD index ffa5f0431e29b..f5cb877f0102d 100644 --- a/source/exe/BUILD +++ b/source/exe/BUILD @@ -77,7 +77,7 @@ envoy_cc_library( srcs = ["signal_action.cc"], hdrs = ["signal_action.h"], deps = [ - "//source/server:backtrace_lib", "//source/common/common:assert_lib", + "//source/server:backtrace_lib", ], ) diff --git a/source/exe/signal_action.cc b/source/exe/signal_action.cc index 307f16452c746..fc17bc6434e79 100644 --- a/source/exe/signal_action.cc +++ b/source/exe/signal_action.cc @@ -1,9 +1,10 @@ -#include "common/common/assert.h" #include "exe/signal_action.h" #include #include +#include "common/common/assert.h" + constexpr int SignalAction::FATAL_SIGS[]; void SignalAction::SigHandler(int sig, siginfo_t* info, void* context) { @@ -15,16 +16,16 @@ void SignalAction::SigHandler(int sig, siginfo_t* info, void* context) { // x86_64 error_pc = reinterpret_cast(ucontext->uc_mcontext.gregs[REG_RIP]); #elif defined(REG_EIP) - // x86 Classic - not tested - // error_pc = reinterpret_cast(ucontext->uc_mcontext.gregs[REG_EIP]); +// x86 Classic - not tested +// error_pc = reinterpret_cast(ucontext->uc_mcontext.gregs[REG_EIP]); #warning "Please enable and test x86_32 pc retrieval code in signal_action.cc" #elif defined(__arm__) - // ARM - not tested - // error_pc = reinterpret_cast(ucontext->uc_mcontext.arm_pc); +// ARM - not tested +// error_pc = reinterpret_cast(ucontext->uc_mcontext.arm_pc); #warning "Please enable and test ARM pc retrieval code in signal_action.cc" #elif defined(__ppc__) - // PPC - not tested - // error_pc = reinterpret_cast(ucontext->uc_mcontext.regs->nip); +// PPC - not tested +// error_pc = reinterpret_cast(ucontext->uc_mcontext.regs->nip); #warning "Please enable and test PPC pc retrieval code in signal_action.cc" #else #warning "Cannot determine PC location in machine context for your architecture" @@ -47,12 +48,11 @@ void SignalAction::SigHandler(int sig, siginfo_t* info, void* context) { void SignalAction::InstallSigHandlers() { stack_t stack; stack.ss_sp = altstack_ + GUARD_SIZE; // Guard page at one end ... - stack.ss_size = ALTSTACK_SIZE; // ... guard page at the other + stack.ss_size = ALTSTACK_SIZE; // ... guard page at the other stack.ss_flags = 0; if (sigaltstack(&stack, nullptr) < 0) { - std::cerr << "Failed to set up alternate signal stack: " << strerror(errno) - << std::endl; + std::cerr << "Failed to set up alternate signal stack: " << strerror(errno) << std::endl; RELEASE_ASSERT(false); } @@ -63,8 +63,8 @@ void SignalAction::InstallSigHandlers() { saction.sa_flags = (SA_SIGINFO | SA_ONSTACK | SA_RESETHAND | SA_NODEFER); saction.sa_sigaction = SigHandler; if (sigaction(sig, &saction, nullptr) < 0) { - std::cerr << "Failed to set up signal action for signal " << sig << ": " - << strerror(errno) << std::endl; + std::cerr << "Failed to set up signal action for signal " << sig << ": " << strerror(errno) + << std::endl; RELEASE_ASSERT(false); } } @@ -79,17 +79,17 @@ void SignalAction::RemoveSigHandlers() const { void SignalAction::MapAndProtectStackMemory() { // Per docs MAP_STACK doesn't actually do anything today but provides a // library hint that might be used in the future. - altstack_ = static_cast(mmap(nullptr, MapSizeWithGuards(), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0)); + altstack_ = static_cast(mmap(nullptr, MapSizeWithGuards(), PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0)); RELEASE_ASSERT(altstack_); if (mprotect(altstack_, GUARD_SIZE, PROT_NONE) < 0) { - std::cerr << "Failed to protect signal stack memory: " << strerror(errno) << std::endl; - RELEASE_ASSERT(false); + std::cerr << "Failed to protect signal stack memory: " << strerror(errno) << std::endl; + RELEASE_ASSERT(false); } - if (mprotect(altstack_ + GUARD_SIZE + ALTSTACK_SIZE, GUARD_SIZE, PROT_NONE) < 0) { - std::cerr << "Failed to protect signal stack memory: " << strerror(errno) << std::endl; - RELEASE_ASSERT(false); + if (mprotect(altstack_ + GUARD_SIZE + ALTSTACK_SIZE, GUARD_SIZE, PROT_NONE) < 0) { + std::cerr << "Failed to protect signal stack memory: " << strerror(errno) << std::endl; + RELEASE_ASSERT(false); } - } void SignalAction::UnmapStackMemory() { @@ -111,7 +111,7 @@ void SignalAction::TryEvilAccessForTest(bool end) { if (end) { // One byte past the valid region // http://oeis.org/A001969 - *(altstack_ + GUARD_SIZE + ALTSTACK_SIZE) = 43; + *(altstack_ + GUARD_SIZE + ALTSTACK_SIZE) = 43; } else { // One byte before the valid region *(altstack_ + GUARD_SIZE - 1) = 43; diff --git a/source/exe/signal_action.h b/source/exe/signal_action.h index abe352e9fd403..3e550119879ed 100644 --- a/source/exe/signal_action.h +++ b/source/exe/signal_action.h @@ -39,14 +39,21 @@ */ class SignalAction { public: - SignalAction() : altstack_(nullptr) { MapAndProtectStackMemory(); InstallSigHandlers(); } + SignalAction() : altstack_(nullptr) { + MapAndProtectStackMemory(); + InstallSigHandlers(); + } SignalAction(const SignalAction&) = delete; - ~SignalAction() { RemoveSigHandlers(); UnmapStackMemory(); } + ~SignalAction() { + RemoveSigHandlers(); + UnmapStackMemory(); + } /** * Helpers for testing guarded stack memory */ void DoGoodAccessForTest(); void TryEvilAccessForTest(bool end); + private: /** * Use this many bytes for the alternate signal handling stack. @@ -54,7 +61,7 @@ class SignalAction { * This should be a multiple of 4096. * Additionally, two guard pages will be allocated to bookend the usable area. */ - static const size_t ALTSTACK_SIZE = 4096*4; + static const size_t ALTSTACK_SIZE = 4096 * 4; /** * Allocate this many bytes on each side of the area used for alt stack. * @@ -70,7 +77,7 @@ class SignalAction { /** * Return the memory size we actually map including two guard pages. */ - static constexpr size_t MapSizeWithGuards() { return ALTSTACK_SIZE+GUARD_SIZE*2; } + static constexpr size_t MapSizeWithGuards() { return ALTSTACK_SIZE + GUARD_SIZE * 2; } /** * The actual signal handler function with prototype matching signal.h */ From 967238ee267e7977675012cc2fa4dfa6037b88d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dan=20No=C3=A9?= Date: Fri, 28 Apr 2017 16:52:39 -0400 Subject: [PATCH 4/7] Style fixes and error handling changes. --- source/exe/BUILD | 1 + source/exe/signal_action.cc | 66 +++++++++++++------------------------ source/exe/signal_action.h | 27 +++++++-------- source/server/backtrace.h | 18 +++++----- 4 files changed, 46 insertions(+), 66 deletions(-) diff --git a/source/exe/BUILD b/source/exe/BUILD index f5cb877f0102d..3f950fe559a07 100644 --- a/source/exe/BUILD +++ b/source/exe/BUILD @@ -78,6 +78,7 @@ envoy_cc_library( hdrs = ["signal_action.h"], deps = [ "//source/common/common:assert_lib", + "//source/common/common:non_copyable", "//source/server:backtrace_lib", ], ) diff --git a/source/exe/signal_action.cc b/source/exe/signal_action.cc index fc17bc6434e79..57266aa8244f1 100644 --- a/source/exe/signal_action.cc +++ b/source/exe/signal_action.cc @@ -7,7 +7,7 @@ constexpr int SignalAction::FATAL_SIGS[]; -void SignalAction::SigHandler(int sig, siginfo_t* info, void* context) { +void SignalAction::sigHandler(int sig, siginfo_t* info, void* context) { void* error_pc = 0; const ucontext_t* ucontext = reinterpret_cast(context); @@ -15,90 +15,68 @@ void SignalAction::SigHandler(int sig, siginfo_t* info, void* context) { #ifdef REG_RIP // x86_64 error_pc = reinterpret_cast(ucontext->uc_mcontext.gregs[REG_RIP]); -#elif defined(REG_EIP) -// x86 Classic - not tested -// error_pc = reinterpret_cast(ucontext->uc_mcontext.gregs[REG_EIP]); -#warning "Please enable and test x86_32 pc retrieval code in signal_action.cc" -#elif defined(__arm__) -// ARM - not tested -// error_pc = reinterpret_cast(ucontext->uc_mcontext.arm_pc); -#warning "Please enable and test ARM pc retrieval code in signal_action.cc" -#elif defined(__ppc__) -// PPC - not tested -// error_pc = reinterpret_cast(ucontext->uc_mcontext.regs->nip); -#warning "Please enable and test PPC pc retrieval code in signal_action.cc" #else -#warning "Cannot determine PC location in machine context for your architecture" +#warning "Please enable and test PC retrieval code for your arch in signal_action.cc" +// x86 Classic: reinterpret_cast(ucontext->uc_mcontext.gregs[REG_EIP]); +// ARM: reinterpret_cast(ucontext->uc_mcontext.arm_pc); +// PPC: reinterpret_cast(ucontext->uc_mcontext.regs->nip); #endif } BackwardsTrace tracer(true); - tracer.LogFault(strsignal(sig), info->si_addr); + tracer.logFault(strsignal(sig), info->si_addr); if (error_pc != 0) { - tracer.CaptureFrom(error_pc); + tracer.captureFrom(error_pc); } else { - tracer.Capture(); + tracer.capture(); } - tracer.Log(); + tracer.log(); signal(sig, SIG_DFL); raise(sig); } -void SignalAction::InstallSigHandlers() { +void SignalAction::installSigHandlers() { stack_t stack; stack.ss_sp = altstack_ + GUARD_SIZE; // Guard page at one end ... stack.ss_size = ALTSTACK_SIZE; // ... guard page at the other stack.ss_flags = 0; - if (sigaltstack(&stack, nullptr) < 0) { - std::cerr << "Failed to set up alternate signal stack: " << strerror(errno) << std::endl; - RELEASE_ASSERT(false); - } + RELEASE_ASSERT(sigaltstack(&stack, nullptr) == 0); for (const auto& sig : FATAL_SIGS) { struct sigaction saction; std::memset(&saction, 0, sizeof(saction)); sigemptyset(&saction.sa_mask); saction.sa_flags = (SA_SIGINFO | SA_ONSTACK | SA_RESETHAND | SA_NODEFER); - saction.sa_sigaction = SigHandler; - if (sigaction(sig, &saction, nullptr) < 0) { - std::cerr << "Failed to set up signal action for signal " << sig << ": " << strerror(errno) - << std::endl; - RELEASE_ASSERT(false); - } + saction.sa_sigaction = sigHandler; + RELEASE_ASSERT(sigaction(sig, &saction, nullptr) == 0); } } -void SignalAction::RemoveSigHandlers() const { +void SignalAction::removeSigHandlers() const { for (const auto& sig : FATAL_SIGS) { signal(sig, SIG_DFL); } } -void SignalAction::MapAndProtectStackMemory() { +void SignalAction::mapAndProtectStackMemory() { // Per docs MAP_STACK doesn't actually do anything today but provides a // library hint that might be used in the future. - altstack_ = static_cast(mmap(nullptr, MapSizeWithGuards(), PROT_READ | PROT_WRITE, + altstack_ = static_cast(mmap(nullptr, mapSizeWithGuards(), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0)); RELEASE_ASSERT(altstack_); - if (mprotect(altstack_, GUARD_SIZE, PROT_NONE) < 0) { - std::cerr << "Failed to protect signal stack memory: " << strerror(errno) << std::endl; - RELEASE_ASSERT(false); - } - if (mprotect(altstack_ + GUARD_SIZE + ALTSTACK_SIZE, GUARD_SIZE, PROT_NONE) < 0) { - std::cerr << "Failed to protect signal stack memory: " << strerror(errno) << std::endl; - RELEASE_ASSERT(false); - } + RELEASE_ASSERT(mprotect(altstack_, GUARD_SIZE, PROT_NONE) == 0); + RELEASE_ASSERT(mprotect(altstack_ + GUARD_SIZE + ALTSTACK_SIZE, GUARD_SIZE, PROT_NONE) == 0); } -void SignalAction::UnmapStackMemory() { +void SignalAction::unmapStackMemory() { if (altstack_ != nullptr) { - munmap(altstack_, MapSizeWithGuards()); + munmap(altstack_, mapSizeWithGuards()); } } -void SignalAction::DoGoodAccessForTest() { +void SignalAction::doGoodAccessForTest() { for (size_t i = 0; i < ALTSTACK_SIZE; ++i) { *(altstack_ + GUARD_SIZE + i) = 42; } @@ -107,7 +85,7 @@ void SignalAction::DoGoodAccessForTest() { } } -void SignalAction::TryEvilAccessForTest(bool end) { +void SignalAction::tryEvilAccessForTest(bool end) { if (end) { // One byte past the valid region // http://oeis.org/A001969 diff --git a/source/exe/signal_action.h b/source/exe/signal_action.h index 3e550119879ed..662d63c7272e2 100644 --- a/source/exe/signal_action.h +++ b/source/exe/signal_action.h @@ -2,6 +2,7 @@ #include +#include "common/common/non_copyable.h" #include "server/backtrace.h" /** @@ -37,22 +38,22 @@ * scope, eg, in main(). This enables fatal signal handling for almost all code * executed. */ -class SignalAction { +class SignalAction : NonCopyable { public: SignalAction() : altstack_(nullptr) { - MapAndProtectStackMemory(); - InstallSigHandlers(); + mapAndProtectStackMemory(); + installSigHandlers(); } SignalAction(const SignalAction&) = delete; ~SignalAction() { - RemoveSigHandlers(); - UnmapStackMemory(); + removeSigHandlers(); + unmapStackMemory(); } /** * Helpers for testing guarded stack memory */ - void DoGoodAccessForTest(); - void TryEvilAccessForTest(bool end); + void doGoodAccessForTest(); + void tryEvilAccessForTest(bool end); private: /** @@ -77,15 +78,15 @@ class SignalAction { /** * Return the memory size we actually map including two guard pages. */ - static constexpr size_t MapSizeWithGuards() { return ALTSTACK_SIZE + GUARD_SIZE * 2; } + static constexpr size_t mapSizeWithGuards() { return ALTSTACK_SIZE + GUARD_SIZE * 2; } /** * The actual signal handler function with prototype matching signal.h */ - static void SigHandler(int sig, siginfo_t* info, void* context); + static void sigHandler(int sig, siginfo_t* info, void* context); /** * Install all signal handlers and setup signal handling stack. */ - void InstallSigHandlers(); + void installSigHandlers(); /** * Remove all signal handlers. * @@ -94,17 +95,17 @@ class SignalAction { * Signal handlers will be reset to the default, NOT back to any signal * handler existing before InstallSigHandlers(). */ - void RemoveSigHandlers() const; + void removeSigHandlers() const; /** * Use mmap to map anonymous memory for the alternative stack. * * GUARD_SIZE on either end of the memory will be marked PROT_NONE, protected * from all access. */ - void MapAndProtectStackMemory(); + void mapAndProtectStackMemory(); /** * Unmap alternative stack memory. */ - void UnmapStackMemory(); + void unmapStackMemory(); char* altstack_; }; diff --git a/source/server/backtrace.h b/source/server/backtrace.h index 127837fc52331..552c02d5e0b0e 100644 --- a/source/server/backtrace.h +++ b/source/server/backtrace.h @@ -7,15 +7,15 @@ #define BACKTRACE_LOG() \ do { \ BackwardsTrace t; \ - t.Capture(); \ - t.Log(); \ + t.capture(); \ + t.log(); \ } while (0) #define BACKTRACE_PROD_LOG() \ do { \ BackwardsTrace t(true); \ - t.Capture(); \ - t.Log(); \ + t.capture(); \ + t.log(); \ } while (0) /** @@ -71,9 +71,9 @@ class BackwardsTrace : Logger::Loggable { /** * Capture a stack trace. * - * The trace will begin with the call to Capture(). + * The trace will begin with the call to capture(). */ - void Capture() { + void capture() { #ifdef NDEBUG if (!log_in_prod_) { return; @@ -90,7 +90,7 @@ class BackwardsTrace : Logger::Loggable { * * @param address The stack trace will begin from this address. */ - void CaptureFrom(void* address) { + void captureFrom(void* address) { #ifdef NDEBUG if (!log_in_prod_) { return; @@ -102,7 +102,7 @@ class BackwardsTrace : Logger::Loggable { /** * Log the stack trace. */ - void Log() { + void log() { #ifdef NDEBUG if (!log_in_prod_) { return; @@ -139,7 +139,7 @@ class BackwardsTrace : Logger::Loggable { LogAtLevel("end backtrace thread {}", stack_trace_.thread_id()); } - void LogFault(const char* signame, const void* addr) { + void logFault(const char* signame, const void* addr) { LogAtLevel("Caught {}, suspect faulting address {}", signame, addr); } From f7e5ab588f2745acbcaa06f7ab2049efa9f1233d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dan=20No=C3=A9?= Date: Fri, 28 Apr 2017 19:28:00 -0400 Subject: [PATCH 5/7] Code review comments Used sysconf to get the runtime system page size for guard page sizes and made altstack a multiple of that size. Backtrace changed to make all logging critical, skip sentinel frame completely. Error handling changes --- source/exe/signal_action.cc | 30 ++++++------ source/exe/signal_action.h | 25 ++++++---- source/server/backtrace.h | 90 ++++++++--------------------------- test/exe/signals_test.cc | 6 +-- test/server/backtrace_test.cc | 10 ++-- tools/stack_decode.py | 3 -- 6 files changed, 60 insertions(+), 104 deletions(-) diff --git a/source/exe/signal_action.cc b/source/exe/signal_action.cc index 57266aa8244f1..371fc9cd85554 100644 --- a/source/exe/signal_action.cc +++ b/source/exe/signal_action.cc @@ -23,14 +23,14 @@ void SignalAction::sigHandler(int sig, siginfo_t* info, void* context) { #endif } - BackwardsTrace tracer(true); + BackwardsTrace tracer; tracer.logFault(strsignal(sig), info->si_addr); if (error_pc != 0) { tracer.captureFrom(error_pc); } else { tracer.capture(); } - tracer.log(); + tracer.logTrace(); signal(sig, SIG_DFL); raise(sig); @@ -38,8 +38,8 @@ void SignalAction::sigHandler(int sig, siginfo_t* info, void* context) { void SignalAction::installSigHandlers() { stack_t stack; - stack.ss_sp = altstack_ + GUARD_SIZE; // Guard page at one end ... - stack.ss_size = ALTSTACK_SIZE; // ... guard page at the other + stack.ss_sp = altstack_ + guard_size_; // Guard page at one end ... + stack.ss_size = altstack_size_; // ... guard page at the other stack.ss_flags = 0; RELEASE_ASSERT(sigaltstack(&stack, nullptr) == 0); @@ -66,32 +66,32 @@ void SignalAction::mapAndProtectStackMemory() { altstack_ = static_cast(mmap(nullptr, mapSizeWithGuards(), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0)); RELEASE_ASSERT(altstack_); - RELEASE_ASSERT(mprotect(altstack_, GUARD_SIZE, PROT_NONE) == 0); - RELEASE_ASSERT(mprotect(altstack_ + GUARD_SIZE + ALTSTACK_SIZE, GUARD_SIZE, PROT_NONE) == 0); + RELEASE_ASSERT(mprotect(altstack_, guard_size_, PROT_NONE) == 0); + RELEASE_ASSERT(mprotect(altstack_ + guard_size_ + altstack_size_, guard_size_, PROT_NONE) == 0); } void SignalAction::unmapStackMemory() { - if (altstack_ != nullptr) { - munmap(altstack_, mapSizeWithGuards()); - } + munmap(altstack_, mapSizeWithGuards()); } void SignalAction::doGoodAccessForTest() { - for (size_t i = 0; i < ALTSTACK_SIZE; ++i) { - *(altstack_ + GUARD_SIZE + i) = 42; + volatile char* altaltstack = altstack_; + for (size_t i = 0; i < altstack_size_; ++i) { + *(altaltstack + guard_size_ + i) = 42; } - for (size_t i = 0; i < ALTSTACK_SIZE; ++i) { - ASSERT(*(altstack_ + GUARD_SIZE + i) == 42); + for (size_t i = 0; i < altstack_size_; ++i) { + ASSERT(*(altaltstack + guard_size_ + i) == 42); } } void SignalAction::tryEvilAccessForTest(bool end) { + volatile char* altaltstack = altstack_; if (end) { // One byte past the valid region // http://oeis.org/A001969 - *(altstack_ + GUARD_SIZE + ALTSTACK_SIZE) = 43; + *(altaltstack + guard_size_ + altstack_size_) = 43; } else { // One byte before the valid region - *(altstack_ + GUARD_SIZE - 1) = 43; + *(altaltstack + guard_size_ - 1) = 43; } } diff --git a/source/exe/signal_action.h b/source/exe/signal_action.h index 662d63c7272e2..639b7ae2de5dc 100644 --- a/source/exe/signal_action.h +++ b/source/exe/signal_action.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include "common/common/non_copyable.h" #include "server/backtrace.h" @@ -40,7 +41,9 @@ */ class SignalAction : NonCopyable { public: - SignalAction() : altstack_(nullptr) { + SignalAction() : guard_size_(sysconf(_SC_PAGE_SIZE)), + altstack_size_(guard_size_*4), + altstack_(nullptr) { mapAndProtectStackMemory(); installSigHandlers(); } @@ -57,18 +60,22 @@ class SignalAction : NonCopyable { private: /** - * Use this many bytes for the alternate signal handling stack. + * Allocate this many bytes on each side of the area used for alt stack. * - * This should be a multiple of 4096. - * Additionally, two guard pages will be allocated to bookend the usable area. + * Set to system page size. + * + * The memory will be protected from read and write. */ - static const size_t ALTSTACK_SIZE = 4096 * 4; + const size_t guard_size_; /** - * Allocate this many bytes on each side of the area used for alt stack. + * Use this many bytes for the alternate signal handling stack. * - * The memory will be protected from read and write. + * Initialized as a multiple of page size (although sigaltstack will + * do alignment as needed). + * + * Additionally, two guard pages will be allocated to bookend the usable area. */ - static const size_t GUARD_SIZE = 4096; + const size_t altstack_size_; /** * This constant array defines the signals we will insert handlers for. * @@ -78,7 +85,7 @@ class SignalAction : NonCopyable { /** * Return the memory size we actually map including two guard pages. */ - static constexpr size_t mapSizeWithGuards() { return ALTSTACK_SIZE + GUARD_SIZE * 2; } + size_t mapSizeWithGuards() const { return altstack_size_ + guard_size_ * 2; } /** * The actual signal handler function with prototype matching signal.h */ diff --git a/source/server/backtrace.h b/source/server/backtrace.h index 552c02d5e0b0e..99b9c44cb6860 100644 --- a/source/server/backtrace.h +++ b/source/server/backtrace.h @@ -8,14 +8,7 @@ do { \ BackwardsTrace t; \ t.capture(); \ - t.log(); \ - } while (0) - -#define BACKTRACE_PROD_LOG() \ - do { \ - BackwardsTrace t(true); \ - t.capture(); \ - t.log(); \ + t.logTrace(); \ } while (0) /** @@ -23,16 +16,15 @@ * stack traces on demand. To use this just do: * * BackwardsTrace tracer; - * tracer.Capture(); // Trace is captured as of here. - * tracer.Log(); // Output the captured trace to the log. + * tracer.capture(); // Trace is captured as of here. + * tracer.logTrace(); // Output the captured trace to the log. * * The capture and log steps are separated to enable debugging in the case where * you want to capture a stack trace from inside some logic but don't know whether * you want to bother logging it until later. * * For convenience a macro is provided BACKTRACE_LOG() which performs the - * construction, capture, and log in one shot. Use BACKTRACE_PROD_LOG() if you - * want the logs to appear in production (critical level, with NDEBUG) + * construction, capture, and log in one shot. * * To resolve the addresses in the backtrace output and de-interleave * multithreaded output use the tools/stack_decode.py command and pass the @@ -51,22 +43,7 @@ */ class BackwardsTrace : Logger::Loggable { public: - /** - * Construct a BackwardsTrace with optional production enablement. - * - * If the optional argument log_in_prod is true then this BackwardsTrace will - * capture even when NDEBUG is defined and will log at critical level. This - * allows defining backtrace captures associated with PANIC() and other - * serious errors encountered in real production while also defining backtrace - * captures used only in debug builds. - * - * By default log_in_prod is false and if NDEBUG is defined these methods are - * all no-ops. Backtraces will be logged at debug level when NDEBUG is not defined. - * - * @param log_in_prod Log at a critical level so we see output even in - * optimized builds. - */ - BackwardsTrace(const bool log_in_prod = false) : log_in_prod_(log_in_prod) {} + BackwardsTrace() {} /** * Capture a stack trace. @@ -74,11 +51,6 @@ class BackwardsTrace : Logger::Loggable { * The trace will begin with the call to capture(). */ void capture() { -#ifdef NDEBUG - if (!log_in_prod_) { - return; - } -#endif stack_trace_.load_here(MAX_STACK_DEPTH); } @@ -91,28 +63,20 @@ class BackwardsTrace : Logger::Loggable { * @param address The stack trace will begin from this address. */ void captureFrom(void* address) { -#ifdef NDEBUG - if (!log_in_prod_) { - return; - } -#endif stack_trace_.load_from(address, MAX_STACK_DEPTH); } /** * Log the stack trace. */ - void log() { -#ifdef NDEBUG - if (!log_in_prod_) { - return; - } -#endif + void logTrace() { backward::TraceResolver resolver; resolver.load_stacktrace(stack_trace_); // If there's nothing in the captured trace we cannot do anything. - if (stack_trace_.size() < 1) { - LogAtLevel("Back trace attempt failed"); + // The size must be at least two for useful info - there is a sentinel frame + // at the end that we ignore. + if (stack_trace_.size() < 2) { + log().critical("Back trace attempt failed"); return; } @@ -120,38 +84,26 @@ class BackwardsTrace : Logger::Loggable { backward::ResolvedTrace first_frame_trace = resolver.resolve(stack_trace_[0]); auto obj_name = first_frame_trace.object_filename; - LogAtLevel("Backtrace obj<{}> thr<{}> (use tools/stack_decode.py):", obj_name, thread_id); + log().critical("Backtrace obj<{}> thr<{}> (use tools/stack_decode.py):", obj_name, thread_id); - // Why start at 2? To hide the function call to backward that began the - // trace. - for (unsigned int i = 0; i < stack_trace_.size(); ++i) { - // Backtrace gets tagged by ASAN when we try the object name resolution for the last - // frame on stack, so skip the last one. It has no useful info anyway. - if (i + 1 != stack_trace_.size()) { - backward::ResolvedTrace trace = resolver.resolve(stack_trace_[i]); - if (trace.object_filename != obj_name) { - obj_name = trace.object_filename; - LogAtLevel("thr<{}> obj<{}>", thread_id, obj_name); - } + // Backtrace gets tagged by ASAN when we try the object name resolution for the last + // frame on stack, so skip the last one. It has no useful info anyway. + for (unsigned int i = 0; i < stack_trace_.size() - 1; ++i) { + backward::ResolvedTrace trace = resolver.resolve(stack_trace_[i]); + if (trace.object_filename != obj_name) { + obj_name = trace.object_filename; + log().critical("thr<{}> obj<{}>", thread_id, obj_name); } - LogAtLevel("thr<{}> #{} {}", thread_id, stack_trace_[i].idx, stack_trace_[i].addr); + log().critical("thr<{}> #{} {}", thread_id, stack_trace_[i].idx, stack_trace_[i].addr); } - LogAtLevel("end backtrace thread {}", stack_trace_.thread_id()); + log().critical("end backtrace thread {}", stack_trace_.thread_id()); } void logFault(const char* signame, const void* addr) { - LogAtLevel("Caught {}, suspect faulting address {}", signame, addr); + log().critical("Caught {}, suspect faulting address {}", signame, addr); } private: - template void LogAtLevel(T format_str, Args... args) { - if (log_in_prod_) { - log().critical(format_str, args...); - } else { - log().debug(format_str, args...); - } - } static const int MAX_STACK_DEPTH = 64; backward::StackTrace stack_trace_; - const bool log_in_prod_; }; diff --git a/test/exe/signals_test.cc b/test/exe/signals_test.cc index f0a78406e3fb3..d1abc187e6af3 100644 --- a/test/exe/signals_test.cc +++ b/test/exe/signals_test.cc @@ -71,8 +71,8 @@ TEST(Signals, AbortDeathTest) { TEST(Signals, IllegalStackAccessDeathTest) { SignalAction actions; - EXPECT_DEATH(actions.TryEvilAccessForTest(false), ""); - EXPECT_DEATH(actions.TryEvilAccessForTest(true), ""); + EXPECT_DEATH(actions.tryEvilAccessForTest(false), ""); + EXPECT_DEATH(actions.tryEvilAccessForTest(true), ""); } TEST(Signals, LegalTest) { @@ -92,5 +92,5 @@ TEST(Signals, RaiseNonFatalTest) { TEST(Signals, LegalStackAccessTest) { SignalAction actions; - actions.DoGoodAccessForTest(); + actions.doGoodAccessForTest(); } diff --git a/test/server/backtrace_test.cc b/test/server/backtrace_test.cc index 6c6da20deb133..08df4c3c3ccf5 100644 --- a/test/server/backtrace_test.cc +++ b/test/server/backtrace_test.cc @@ -6,13 +6,13 @@ TEST(Backward, Basic) { // There isn't much to test here and this feature is really just useful for // debugging. This test simply verifies that we do not cause a crash when // logging a backtrace, and covers the added lines. - BackwardsTrace tracer(true); // Log at a level that means we cover lines even in opt - tracer.Capture(); - tracer.Log(); + BackwardsTrace tracer; + tracer.capture(); + tracer.logTrace(); } TEST(Backward, InvalidUsageTest) { // Ensure we do not crash if logging is attempted when there was no trace captured - BackwardsTrace tracer(true); - tracer.Log(); + BackwardsTrace tracer; + tracer.logTrace(); } diff --git a/tools/stack_decode.py b/tools/stack_decode.py index 1c08b54616f99..e95af62c14192 100755 --- a/tools/stack_decode.py +++ b/tools/stack_decode.py @@ -86,9 +86,6 @@ def output_stacktrace(thread_id, traceinfo): for address_list in traceinfo.obj_list: piped_input = "" obj_name = address_list.obj_file - # end of stack sentinel - if obj_name == "[0xffffffffffffffff]": - break for stack_addr in address_list.addresses: piped_input += (stack_addr + "\n") output_lines += run_addr2line(obj_name, piped_input) From b58da42d9baa687e90a9126038fc55e422d5bc0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dan=20No=C3=A9?= Date: Fri, 28 Apr 2017 19:30:54 -0400 Subject: [PATCH 6/7] Fix formatting --- source/exe/signal_action.cc | 4 +--- source/exe/signal_action.h | 6 +++--- source/server/backtrace.h | 8 ++------ 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/source/exe/signal_action.cc b/source/exe/signal_action.cc index 371fc9cd85554..faa85013db358 100644 --- a/source/exe/signal_action.cc +++ b/source/exe/signal_action.cc @@ -70,9 +70,7 @@ void SignalAction::mapAndProtectStackMemory() { RELEASE_ASSERT(mprotect(altstack_ + guard_size_ + altstack_size_, guard_size_, PROT_NONE) == 0); } -void SignalAction::unmapStackMemory() { - munmap(altstack_, mapSizeWithGuards()); -} +void SignalAction::unmapStackMemory() { munmap(altstack_, mapSizeWithGuards()); } void SignalAction::doGoodAccessForTest() { volatile char* altaltstack = altstack_; diff --git a/source/exe/signal_action.h b/source/exe/signal_action.h index 639b7ae2de5dc..fe318e8dcd037 100644 --- a/source/exe/signal_action.h +++ b/source/exe/signal_action.h @@ -4,6 +4,7 @@ #include #include "common/common/non_copyable.h" + #include "server/backtrace.h" /** @@ -41,9 +42,8 @@ */ class SignalAction : NonCopyable { public: - SignalAction() : guard_size_(sysconf(_SC_PAGE_SIZE)), - altstack_size_(guard_size_*4), - altstack_(nullptr) { + SignalAction() + : guard_size_(sysconf(_SC_PAGE_SIZE)), altstack_size_(guard_size_ * 4), altstack_(nullptr) { mapAndProtectStackMemory(); installSigHandlers(); } diff --git a/source/server/backtrace.h b/source/server/backtrace.h index 99b9c44cb6860..4711c7bdb3961 100644 --- a/source/server/backtrace.h +++ b/source/server/backtrace.h @@ -50,9 +50,7 @@ class BackwardsTrace : Logger::Loggable { * * The trace will begin with the call to capture(). */ - void capture() { - stack_trace_.load_here(MAX_STACK_DEPTH); - } + void capture() { stack_trace_.load_here(MAX_STACK_DEPTH); } /** * Capture a stack trace from a particular address. @@ -62,9 +60,7 @@ class BackwardsTrace : Logger::Loggable { * * @param address The stack trace will begin from this address. */ - void captureFrom(void* address) { - stack_trace_.load_from(address, MAX_STACK_DEPTH); - } + void captureFrom(void* address) { stack_trace_.load_from(address, MAX_STACK_DEPTH); } /** * Log the stack trace. From 0ab65ec65da3a3ef20bca9d1065a7d969537db1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dan=20No=C3=A9?= Date: Mon, 1 May 2017 10:40:19 -0400 Subject: [PATCH 7/7] Final review changes Make inclusion of sigaction_lib dependency conditional on command line arguments NonCopyable means no need to delete default cctor. Consider signal handler removal logically non-const. --- source/exe/BUILD | 6 ++++-- source/exe/main.cc | 3 +++ source/exe/signal_action.cc | 2 +- source/exe/signal_action.h | 3 +-- tools/bazel.rc | 1 + 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/source/exe/BUILD b/source/exe/BUILD index 3f950fe559a07..ec42b253d3f73 100644 --- a/source/exe/BUILD +++ b/source/exe/BUILD @@ -52,8 +52,10 @@ envoy_cc_library( deps = [ ":envoy_common_lib", ":hot_restart_lib", - ":sigaction_lib", - ], + ] + select({ + "//bazel:disable_signal_trace": [], + "//conditions:default": [":sigaction_lib"], + }), ) envoy_cc_library( diff --git a/source/exe/main.cc b/source/exe/main.cc index f96941a6c85ad..5ddf2c0cc2ea4 100644 --- a/source/exe/main.cc +++ b/source/exe/main.cc @@ -7,7 +7,10 @@ #include "common/stats/thread_local_store.h" #include "exe/hot_restart.h" + +#ifdef ENVOY_HANDLE_SIGNALS #include "exe/signal_action.h" +#endif #include "server/drain_manager_impl.h" #include "server/options_impl.h" diff --git a/source/exe/signal_action.cc b/source/exe/signal_action.cc index faa85013db358..d0588611c48b6 100644 --- a/source/exe/signal_action.cc +++ b/source/exe/signal_action.cc @@ -54,7 +54,7 @@ void SignalAction::installSigHandlers() { } } -void SignalAction::removeSigHandlers() const { +void SignalAction::removeSigHandlers() { for (const auto& sig : FATAL_SIGS) { signal(sig, SIG_DFL); } diff --git a/source/exe/signal_action.h b/source/exe/signal_action.h index fe318e8dcd037..3936d5c118d35 100644 --- a/source/exe/signal_action.h +++ b/source/exe/signal_action.h @@ -47,7 +47,6 @@ class SignalAction : NonCopyable { mapAndProtectStackMemory(); installSigHandlers(); } - SignalAction(const SignalAction&) = delete; ~SignalAction() { removeSigHandlers(); unmapStackMemory(); @@ -102,7 +101,7 @@ class SignalAction : NonCopyable { * Signal handlers will be reset to the default, NOT back to any signal * handler existing before InstallSigHandlers(). */ - void removeSigHandlers() const; + void removeSigHandlers(); /** * Use mmap to map anonymous memory for the alternative stack. * diff --git a/tools/bazel.rc b/tools/bazel.rc index 0983941902e29..81edaa7abbf53 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -8,3 +8,4 @@ build:asan --linkopt -ldl build:asan --define tcmalloc=disabled build:asan --build_tag_filters=-no_asan build:asan --test_tag_filters=-no_asan +build:asan --define signal_trace=disabled