From 01a6af752605d4ac4f5f03e18ccd7db0562f39f6 Mon Sep 17 00:00:00 2001 From: John Millikin Date: Fri, 6 Apr 2018 15:12:32 -0700 Subject: [PATCH 1/5] Mark BUILD_SCM_REVISION as stable so Bazel will always relink with it. Fixes #2551 uses genrule workaround for missing STABLE_ keys in cc linkstamp, more details at https://github.com/bazelbuild/bazel/issues/1992 Signed-off-by: John Millikin --- bazel/get_workspace_status | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/bazel/get_workspace_status b/bazel/get_workspace_status index 1704aa6fcb056..8f83d55cbf782 100755 --- a/bazel/get_workspace_status +++ b/bazel/get_workspace_status @@ -12,12 +12,18 @@ # If the script exits with non-zero code, it's considered as a failure # and the output will be discarded. +# For Envoy in particular, we want to force binaries to relink when the Git +# SHA changes (https://github.com/envoyproxy/envoy/issues/2551). This can be +# done by prefixing keys with "STABLE_". To avoid breaking compatibility with +# other status scripts, this one still echos the non-stable ("volatile") names. + # If this SOURCE_VERSION file exists then it must have been placed here by a # distribution doing a non-git, source build. # Distributions would be expected to echo the commit/tag as BUILD_SCM_REVISION if [ -f SOURCE_VERSION ] then echo "BUILD_SCM_REVISION $(cat SOURCE_VERSION)" + echo "STABLE_BUILD_SCM_REVISION $(cat SOURCE_VERSION)" echo "BUILD_SCM_STATUS Distribution" exit 0 fi @@ -29,6 +35,7 @@ then exit 1 fi echo "BUILD_SCM_REVISION ${git_rev}" +echo "STABLE_BUILD_SCM_REVISION ${git_rev}" # Check whether there are any uncommited changes git diff-index --quiet HEAD -- @@ -39,3 +46,4 @@ else tree_status="Modified" fi echo "BUILD_SCM_STATUS ${tree_status}" +echo "STABLE_BUILD_SCM_STATUS ${tree_status}" From ebf681ffd9203b1783f1fb962dd7f73bc5ea34fb Mon Sep 17 00:00:00 2001 From: John Millikin Date: Fri, 6 Apr 2018 15:43:24 -0700 Subject: [PATCH 2/5] Replace `git_sha_rewriter.py` with a generated linker option. Fixes #2625 Signed-off-by: John Millikin --- bazel/BUILD | 24 +++++++ bazel/envoy_build_system.bzl | 55 +++++++--------- ci/do_ci.sh | 10 +-- tools/BUILD | 1 - tools/git_sha_rewriter.py | 117 ----------------------------------- 5 files changed, 51 insertions(+), 156 deletions(-) delete mode 100755 tools/git_sha_rewriter.py diff --git a/bazel/BUILD b/bazel/BUILD index 0149762c3d403..c40a87c27fc17 100644 --- a/bazel/BUILD +++ b/bazel/BUILD @@ -7,6 +7,30 @@ exports_files([ "sh_test_wrapper.sh", ]) +genrule( + name = "gnu_build_id", + outs = ["gnu_build_id.ldscript"], + cmd = """ + echo --build-id=0x$$( + grep BUILD_SCM_REVISION bazel-out/volatile-status.txt \\ + | sed 's/^BUILD_SCM_REVISION //') \\ + > $@ + """, + stamp = 1, +) + +# For MacOS, which doesn't have GNU ld's `--build-id` flag. +genrule( + name = "raw_build_id", + outs = ["raw_build_id.ldscript"], + cmd = """ + grep BUILD_SCM_REVISION bazel-out/volatile-status.txt \\ + | sed 's/^BUILD_SCM_REVISION //' \\ + > $@ + """, + stamp = 1, +) + config_setting( name = "opt_build", values = {"compilation_mode": "opt"}, diff --git a/bazel/envoy_build_system.bzl b/bazel/envoy_build_system.bzl index 9b24616d3be9d..62fbc5e0c10bb 100644 --- a/bazel/envoy_build_system.bzl +++ b/bazel/envoy_build_system.bzl @@ -37,9 +37,6 @@ def envoy_linkopts(): return select({ # OSX provides system and stdc++ libraries dynamically, so they can't be linked statically. # Further, the system library transitively links common libraries (e.g., pthread). - # TODO(zuercher): build id could be supported via "-sectcreate __TEXT __build_id " - # The file could should contain the current git SHA (or enough placeholder data to allow - # it to be rewritten by tools/git_sha_rewriter.py). "@bazel_tools//tools/osx:darwin": [ # See note here: http://luajit.org/install.html "-pagezero_size 10000", "-image_base 100000000", @@ -48,15 +45,6 @@ def envoy_linkopts(): "-pthread", "-lrt", "-ldl", - # Force MD5 hash in build. This is part of the workaround for - # https://github.com/bazelbuild/bazel/issues/2805. Bazel actually - # does this by itself prior to - # https://github.com/bazelbuild/bazel/commit/724706ba4836c3366fc85b40ed50ccf92f4c3882. - # Ironically, forcing it here so that in future releases we will - # have the same behavior. When everyone is using an updated version - # of Bazel, we can use linkopts to set the git SHA1 directly in the - # --build-id and avoid doing the following. - '-Wl,--build-id=md5', '-Wl,--hash-style=gnu', "-static-libstdc++", "-static-libgcc", @@ -65,6 +53,26 @@ def envoy_linkopts(): ["-static-libstdc++", "-static-libgcc"]) \ + envoy_select_exported_symbols(["-Wl,-E"]) +def _envoy_stamped_linkopts(): + return select({ + "@bazel_tools//tools/osx:darwin": [ + "-sectcreate __TEXT __build_id", "$(location //bazel:raw_build_id.ldscript)" + ], + "//conditions:default": [ + "-Wl,@$(location //bazel:gnu_build_id.ldscript)", + ], + }) + +def _envoy_stamped_deps(): + return select({ + "@bazel_tools//tools/osx:darwin": [ + "//bazel:raw_build_id.ldscript" + ], + "//conditions:default": [ + "//bazel:gnu_build_id.ldscript", + ], + }) + # Compute the test linkopts based on various options. def envoy_test_linkopts(): return select({ @@ -149,21 +157,6 @@ def envoy_cc_library(name, strip_include_prefix = strip_include_prefix, ) -def _git_stamped_genrule(repository, name): - # To workaround https://github.com/bazelbuild/bazel/issues/2805, we - # do binary rewriting to replace the linker produced MD5 hash with the - # version_generated.cc git SHA1 hash (truncated). - rewriter = repository + "//tools:git_sha_rewriter.py" - native.genrule( - name = name + "_stamped", - srcs = [name], - outs = [name + ".stamped"], - cmd = "cp $(location " + name + ") $@ && " + - "chmod u+w $@ && " + - "$(location " + rewriter + ") $@", - tools = [rewriter], - ) - # Envoy C++ binary targets should be specified with this function. def envoy_cc_binary(name, srcs = [], @@ -178,11 +171,9 @@ def envoy_cc_binary(name, if not linkopts: linkopts = envoy_linkopts() - - # Implicit .stamped targets to obtain builds with the (truncated) git SHA1. if stamped: - _git_stamped_genrule(repository, name) - _git_stamped_genrule(repository, name + ".stripped") + linkopts = linkopts + _envoy_stamped_linkopts() + deps = deps + _envoy_stamped_deps() deps = deps + [envoy_external_dep_path(dep) for dep in external_deps] native.cc_binary( name = name, @@ -194,8 +185,6 @@ def envoy_cc_binary(name, linkstatic = 1, visibility = visibility, malloc = tcmalloc_external_dep(repository), - # See above comment on MD5 hash, this is another "force MD5 stamps" to make sure our - # rewriting is robust. stamp = 1, deps = deps, ) diff --git a/ci/do_ci.sh b/ci/do_ci.sh index fe6a2428fc2ee..fa9b61aa4c32c 100755 --- a/ci/do_ci.sh +++ b/ci/do_ci.sh @@ -10,11 +10,11 @@ echo "building using ${NUM_CPUS} CPUs" function bazel_release_binary_build() { echo "Building..." cd "${ENVOY_CI_DIR}" - bazel --batch build ${BAZEL_BUILD_OPTIONS} -c opt //source/exe:envoy-static.stamped + bazel --batch build ${BAZEL_BUILD_OPTIONS} -c opt //source/exe:envoy-static # Copy the envoy-static binary somewhere that we can access outside of the # container. cp -f \ - "${ENVOY_CI_DIR}"/bazel-genfiles/source/exe/envoy-static.stamped \ + "${ENVOY_CI_DIR}"/bazel-bin/source/exe/envoy-static \ "${ENVOY_DELIVERY_DIR}"/envoy # TODO(mattklein123): Replace this with caching and a different job which creates images. @@ -28,11 +28,11 @@ function bazel_release_binary_build() { function bazel_debug_binary_build() { echo "Building..." cd "${ENVOY_CI_DIR}" - bazel --batch build ${BAZEL_BUILD_OPTIONS} -c dbg //source/exe:envoy-static.stamped + bazel --batch build ${BAZEL_BUILD_OPTIONS} -c dbg //source/exe:envoy-static # Copy the envoy-static binary somewhere that we can access outside of the # container. cp -f \ - "${ENVOY_CI_DIR}"/bazel-genfiles/source/exe/envoy-static.stamped \ + "${ENVOY_CI_DIR}"/bazel-bin/source/exe/envoy-static \ "${ENVOY_DELIVERY_DIR}"/envoy-debug } @@ -146,7 +146,7 @@ elif [[ "$1" == "bazel.coverity" ]]; then echo "Building..." cd "${ENVOY_CI_DIR}" /build/cov-analysis/bin/cov-build --dir "${ENVOY_BUILD_DIR}"/cov-int bazel --batch build --action_env=LD_PRELOAD ${BAZEL_BUILD_OPTIONS} \ - -c opt //source/exe:envoy-static.stamped + -c opt //source/exe:envoy-static # tar up the coverity results tar czvf "${ENVOY_BUILD_DIR}"/envoy-coverity-output.tgz -C "${ENVOY_BUILD_DIR}" cov-int # Copy the Coverity results somewhere that we can access outside of the container. diff --git a/tools/BUILD b/tools/BUILD index e5dd2b3b56203..0f007ab342bb3 100644 --- a/tools/BUILD +++ b/tools/BUILD @@ -10,7 +10,6 @@ envoy_package() exports_files([ "gen_git_sha.sh", - "git_sha_rewriter.py", "stack_decode.py", "check_format.py", "header_order.py", diff --git a/tools/git_sha_rewriter.py b/tools/git_sha_rewriter.py deleted file mode 100755 index 08d78edfc4c23..0000000000000 --- a/tools/git_sha_rewriter.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env python - -# This tool takes an ELF binary that has been built with -Wl,--build-id=md5' -# '-Wl,--hash-style=gnu (as done by Bazel prior to -# https://github.com/bazelbuild/bazel/commit/724706ba4836c3366fc85b40ed50ccf92f4c3882, -# versions prior to 0.5), and replaces the MD5 compiler hash with a truncated -# git SHA1 hash found in Envoy's version_generated.cc. -# -# This is useful to folks who want the build commit in the .note.gnu.build-id -# section rather than the compiler hash of inputs. Please note that the hash is -# a 16 byte truncated git SHA1, rather than a complete 20 byte git SHA1. -# This is a workaround to https://github.com/bazelbuild/bazel/issues/2805. - -import binascii -import platform -import re -import subprocess as sp -import sys - -# This is what the part of .note.gnu.build-id prior to the MD5 hash looks like. -EXPECTED_BUILD_ID_NOTE_PREFIX = [ - # The "name" of the note is 4 bytes long. - 0x04, - 0x00, - 0x00, - 0x00, - # The "description" of the note is 16 bytes. - 0x10, - 0x00, - 0x00, - 0x00, - # The "type" of the note. - 0x03, - 0x00, - 0x00, - 0x00, - # 'G', 'N', 'U', '\0' (name) - 0x47, - 0x4e, - 0x55, - 0x00, -] -# We're expecting an MD5 hash, 16 bytes. -MD5_HASH_LEN = 16 -SHA1_HASH_LEN = 20 -EXPECTED_BUILD_ID_NOTE_LENGTH = len(EXPECTED_BUILD_ID_NOTE_PREFIX) + MD5_HASH_LEN - - -class RewriterException(Exception): - pass - - -# Extract git SHA1 hash from "envoy --version" stdout. -def ExtractGitSha(path): - version_output = sp.check_output([path, '--version']).strip() - sr = re.search('version: (\w+)/', version_output) - if not sr: - raise RewriterException('Bad envoy --version: %s' % version_output) - sha1_hash = sr.group(1) - if len(sha1_hash) != 2 * SHA1_HASH_LEN: - raise RewriterException('Bad SHA1 hash in %s: %s' % (path, sha1_hash)) - return sha1_hash - - -# Scrape the offset of .note.gnu.build-id via readelf from the binary. Also -# verify the note section is what we expect. -def ExtractBuildIdNoteOffset(path): - try: - readelf_output = sp.check_output('readelf -SW %s' % path, shell=True) - # Sanity check the ordering of fields from readelf. - if not re.search('Name\s+Type\s+Address\s+Off\s+Size\s', readelf_output): - raise RewriterException('Invalid readelf output: %s' % readelf_output) - sr = re.search('.note.gnu.build-id\s+NOTE\s+\w+\s+(\w+)\s(\w+)\s', - readelf_output) - if not sr: - raise RewriterException( - 'Unable to parse .note.gnu.build-id note: %s' % readelf_output) - raw_note_offset, raw_note_size = sr.groups() - if long(raw_note_size, 16) != EXPECTED_BUILD_ID_NOTE_LENGTH: - raise RewriterException( - 'Incorrect .note.gnu.build-id note size: %s' % readelf_output) - note_offset = long(raw_note_offset, 16) - with open(path, 'rb') as f: - f.seek(note_offset) - note_prefix = [ord(b) for b in f.read(len(EXPECTED_BUILD_ID_NOTE_PREFIX))] - if note_prefix != EXPECTED_BUILD_ID_NOTE_PREFIX: - raise RewriterException( - 'Unexpected .note.gnu.build-id prefix in %s: %s' % (path, - note_prefix)) - return note_offset - except sp.CalledProcessError as e: - raise RewriterException('%s %s' % (e, readelf_output.output)) - - -# Inplace binary rewriting of the 16 byte .note.gnu.build-id description with -# the truncated hash. -def RewriteBinary(path, offset, git5_sha1): - truncated_hash = git5_sha1[:2 * MD5_HASH_LEN] - print 'Writing %s truncated to %s at offset 0x%x in %s' % (git5_sha1, - truncated_hash, - offset, path) - with open(path, 'r+b') as f: - f.seek(offset + len(EXPECTED_BUILD_ID_NOTE_PREFIX)) - f.write(binascii.unhexlify(truncated_hash)) - - -if __name__ == '__main__': - if len(sys.argv) != 2: - print 'Usage: %s ' % sys.argv[0] - sys.exit(1) - if platform.system() == 'Darwin': - print 'Stamping not supported for Mach-O binaries' - sys.exit(0) - envoy_bin_path = sys.argv[1] - version_generated = ExtractGitSha(envoy_bin_path) - build_id_note_offset = ExtractBuildIdNoteOffset(envoy_bin_path) - RewriteBinary(envoy_bin_path, build_id_note_offset, version_generated) From 786acc75909c95a5ff4dcf4832def39acc4a01f2 Mon Sep 17 00:00:00 2001 From: John Millikin Date: Mon, 23 Apr 2018 09:14:40 -0700 Subject: [PATCH 3/5] Disable build ID handling in coverage mode due to gold internal error. Signed-off-by: John Millikin --- bazel/BUILD | 5 +++++ bazel/envoy_build_system.bzl | 9 +++++++++ 2 files changed, 14 insertions(+) diff --git a/bazel/BUILD b/bazel/BUILD index c40a87c27fc17..2f4419162b9a9 100644 --- a/bazel/BUILD +++ b/bazel/BUILD @@ -46,6 +46,11 @@ config_setting( values = {"compilation_mode": "dbg"}, ) +config_setting( + name = "coverage_build", + values = {"define": "ENVOY_CONFIG_COVERAGE=1"}, +) + config_setting( name = "disable_tcmalloc", values = {"define": "tcmalloc=disabled"}, diff --git a/bazel/envoy_build_system.bzl b/bazel/envoy_build_system.bzl index 62fbc5e0c10bb..41c588e24cffd 100644 --- a/bazel/envoy_build_system.bzl +++ b/bazel/envoy_build_system.bzl @@ -55,9 +55,18 @@ def envoy_linkopts(): def _envoy_stamped_linkopts(): return select({ + # Coverage builds in CI are failing to link when setting a build ID. + # + # /usr/bin/ld.gold: internal error in write_build_id, at ../../gold/layout.cc:5419 + "@envoy//bazel:coverage_build": [], + + # MacOS doesn't have an official equivalent to the `.note.gnu.build-id` + # ELF section, so just stuff the raw ID into a new text section. "@bazel_tools//tools/osx:darwin": [ "-sectcreate __TEXT __build_id", "$(location //bazel:raw_build_id.ldscript)" ], + + # Note: assumes GNU GCC (or compatible) handling of `--build-id` flag. "//conditions:default": [ "-Wl,@$(location //bazel:gnu_build_id.ldscript)", ], From ae6cf3343bdfa760b315eecbdae55f8a31d1f558 Mon Sep 17 00:00:00 2001 From: John Millikin Date: Mon, 23 Apr 2018 11:30:49 -0700 Subject: [PATCH 4/5] Add comments about undocumented genrule.stamp attr. Signed-off-by: John Millikin --- bazel/BUILD | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bazel/BUILD b/bazel/BUILD index 2f4419162b9a9..6a5258d5440b0 100644 --- a/bazel/BUILD +++ b/bazel/BUILD @@ -16,6 +16,8 @@ genrule( | sed 's/^BUILD_SCM_REVISION //') \\ > $@ """, + # Undocumented attr to depend on workspace status files. + # https://github.com/bazelbuild/bazel/issues/4942 stamp = 1, ) @@ -28,6 +30,8 @@ genrule( | sed 's/^BUILD_SCM_REVISION //' \\ > $@ """, + # Undocumented attr to depend on workspace status files. + # https://github.com/bazelbuild/bazel/issues/4942 stamp = 1, ) From 37361de05f8c9d6bc80af1865152c91830c519cb Mon Sep 17 00:00:00 2001 From: John Millikin Date: Mon, 23 Apr 2018 14:08:56 -0700 Subject: [PATCH 5/5] empty commit to poke DCO bot Signed-off-by: John Millikin