From fdbbc9c7137d04caa99cce795da6bfb9000affda Mon Sep 17 00:00:00 2001 From: Marc Herbert Date: Mon, 28 Feb 2022 21:52:41 -0800 Subject: [PATCH 1/2] sof-kernel-log-check: stop silently ignoring bad timestamp input The purpose of test code is to find problems, not hide them. Signed-off-by: Marc Herbert --- tools/sof-kernel-log-check.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/sof-kernel-log-check.sh b/tools/sof-kernel-log-check.sh index ffa1c0d1..131d807e 100755 --- a/tools/sof-kernel-log-check.sh +++ b/tools/sof-kernel-log-check.sh @@ -126,7 +126,11 @@ # Append some garbage to an ignore pattern to turn it off. Much easier # than deleting it. -begin_timestamp=${1:-0} +# Don't look at journalctl logs before this time in seconds since +# 1970. Defaults to zero which is a no-op because we always use -k or +# -b. +begin_timestamp=${1:-0000000000} + declare ignore_str # pwd resolves relative paths @@ -376,7 +380,7 @@ ignore_str="$ignore_str"'|iwlwifi [[:digit:].:]+: ' if [[ $begin_timestamp =~ ^[0-9]{10} ]]; then cmd="journalctl_cmd --since=@$begin_timestamp" else - cmd="journalctl_cmd" + die "Invalid begin_timestamp $1 argument: $begin_timestamp" fi declare -p cmd @@ -388,6 +392,7 @@ else fi [[ -z "$err" ]] || { + type journalctl_cmd echo "$(date -u '+%Y-%m-%d %T %Z')" "[ERROR]" "Caught kernel log error" echo "===========================>>" echo "$err" From 5b385d572e52b210b01ac21e82167b06490fa79c Mon Sep 17 00:00:00 2001 From: Marc Herbert Date: Mon, 28 Feb 2022 22:34:51 -0800 Subject: [PATCH 2/2] checkpoints: do not use SOF_TEST_INTERVAL in test _iterations_ The purpose of SOF_TEST_INTERVAL is to avoid log gaps between tests, _not_ between test iterations of the same test. Fixes issue where the (recent) check-kmod-load-unload.sh was catching _multiple_ firmware boots, including boots from previous test iterations, example: sof-test/test-case/check-mod-load-unload.sh -l 50 Polling 'sof_firmware_boot_complete --since=@1646100584' every 1s for 10s [ 2968.155103] kernel: sof-audio-pci-intel-tgl 0000:00:1f.3: firmware boot complete [ 2970.706560] kernel: sof-audio-pci-intel-tgl 0000:00:1f.3: firmware boot complete [ 2974.522111] kernel: sof-audio-pci-intel-tgl 0000:00:1f.3: firmware boot complete Completed 'sof_firmware_boot_complete --since=@1646100584' after 0s and 1 attempts This may fix other, similar tests too. Signed-off-by: Marc Herbert --- case-lib/hijack.sh | 2 ++ case-lib/lib.sh | 11 ++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/case-lib/hijack.sh b/case-lib/hijack.sh index f7bd67b0..ead9f31b 100644 --- a/case-lib/hijack.sh +++ b/case-lib/hijack.sh @@ -86,6 +86,8 @@ function func_exit_handler() fi if [[ "$KERNEL_CHECKPOINT" =~ ^[0-9]{10} ]]; then + # Do not collect the entire duration of the test but only the + # last iteration. journalctl_cmd --since=@"$KERNEL_CHECKPOINT" > "$LOG_ROOT/dmesg.txt" elif [[ "$KERNEL_CHECKPOINT" == "disabled" ]]; then journalctl_cmd > "$LOG_ROOT/dmesg.txt" diff --git a/case-lib/lib.sh b/case-lib/lib.sh index 2798ae56..cfcdf604 100644 --- a/case-lib/lib.sh +++ b/case-lib/lib.sh @@ -82,7 +82,14 @@ setup_kernel_check_point() # appear in the next one, see comments in config.sh. Add 3 extra # second to account for our own, sof-test delays after PASS/FAIL # decision: time spent collecting logs etc. - KERNEL_CHECKPOINT=$(($(date +%s) - SOF_TEST_INTERVAL - 3)) + if [ -z "$KERNEL_CHECKPOINT" ]; then + KERNEL_CHECKPOINT=$(($(date +%s) - SOF_TEST_INTERVAL - 3)) + else + # Not the first time we are called so this is a test + # _iteration_. Add just one extra second in case a test makes + # the mistake to call this function _after_ checking the logs. + KERNEL_CHECKPOINT=$(($(date +%s) - 1)) + fi } # This function adds a fake error to dmesg (which is always saved by @@ -425,6 +432,8 @@ journalctl_cmd() --no-hostname "$@" } +# Force the exit handler to collect all the logs since boot time instead +# of just the last test iteration. disable_kernel_check_point() { KERNEL_CHECKPOINT="disabled"