From 319f690d6ca979e220abe37bc2620092fe125ec5 Mon Sep 17 00:00:00 2001 From: Tomasz Wojcikowski Date: Tue, 9 Dec 2025 07:40:26 +0100 Subject: [PATCH 1/4] Add timeout handling to run_tracked function and corresponding test case --- .tool-versions | 1 + src/cets_long.erl | 97 +++++++++++++++++++++++++++++++-------------- test/cets_SUITE.erl | 30 ++++++++++++++ 3 files changed, 98 insertions(+), 30 deletions(-) create mode 100644 .tool-versions diff --git a/.tool-versions b/.tool-versions new file mode 100644 index 00000000..825f60e9 --- /dev/null +++ b/.tool-versions @@ -0,0 +1 @@ +rebar 3.25.1 diff --git a/src/cets_long.erl b/src/cets_long.erl index b97e84ca..939335b9 100644 --- a/src/cets_long.erl +++ b/src/cets_long.erl @@ -1,6 +1,7 @@ %% @doc Helper to log long running operations. -module(cets_long). --export([run_spawn/2, run_tracked/2]). +-export([run_spawn/2, run_tracked/2, run_tracked/3]). +-ignore_xref([run_tracked/3]). -ifdef(TEST). -export([pinfo/2]). @@ -19,6 +20,9 @@ -type task_fun() :: fun(() -> task_result()). %% User provided function to execute. +-type task_timeout() :: timeout(). +%% Timeout for task execution in milliseconds. + %% @doc Spawns a new process to do some memory-intensive task. %% %% This allows to reduce GC on the parent process. @@ -45,18 +49,26 @@ run_spawn(Info, F) -> erlang:raise(Class, Reason, Stacktrace) end. -%% @doc Runs function `Fun'. +%% @doc Runs function `Fun' without timeout. %% %% Logs errors. %% Logs if function execution takes too long. -%% Does not catches the errors - the caller would have to catch -%% if they want to prevent an error. +%% Catches errors and re-raises them after logging. -spec run_tracked(log_info(), task_fun()) -> task_result(). run_tracked(Info, Fun) -> + run_tracked(Info, Fun, infinity). + +%% @doc Runs function `Fun' with a specified timeout. +%% +%% Logs errors. +%% Logs if function execution takes too long. +%% Catches errors and re-raises them after logging. +-spec run_tracked(log_info(), task_fun(), task_timeout()) -> task_result(). +run_tracked(Info, Fun, Timeout) -> Parent = self(), Start = erlang:system_time(millisecond), ?LOG_INFO(Info#{what => task_started}), - Pid = spawn_mon(Info, Parent, Start), + Pid = spawn_mon(Info, Parent, Start, Timeout), try Fun() catch @@ -80,46 +92,71 @@ run_tracked(Info, Fun) -> Pid ! stop end. -spawn_mon(Info, Parent, Start) -> +spawn_mon(Info, Parent, Start, Timeout) -> Ref = make_ref(), %% We do not link, because we want to log if the Parent dies - Pid = spawn(fun() -> run_monitor(Info, Ref, Parent, Start) end), + Pid = spawn(fun() -> run_monitor(Info, Ref, Parent, Start, Timeout) end), %% Ensure there is no race conditions by waiting till the monitor is added receive {monitor_added, Ref} -> ok end, Pid. -run_monitor(Info, Ref, Parent, Start) -> +run_monitor(Info, Ref, Parent, Start, Timeout) -> Mon = erlang:monitor(process, Parent), Parent ! {monitor_added, Ref}, Interval = maps:get(report_interval, Info, 5000), - monitor_loop(Mon, Info, Parent, Start, Interval). + monitor_loop(Mon, Info, Parent, Start, Interval, Timeout). -monitor_loop(Mon, Info, Parent, Start, Interval) -> - receive - {'DOWN', _MonRef, process, _Pid, shutdown} -> - %% Special case, the long task is stopped using exit(Pid, shutdown) - ok; - {'DOWN', MonRef, process, _Pid, Reason} when Mon =:= MonRef -> +monitor_loop(Mon, Info, Parent, Start, Interval, Timeout) -> + Diff = diff(Start), + case Timeout =/= infinity andalso Diff >= Timeout of + true -> ?LOG_ERROR(Info#{ - what => task_failed, - reason => Reason, + what => task_timeout, caller_pid => Parent, - reported_by => monitor_process + time_ms => Diff, + timeout_ms => Timeout, + current_stacktrace => pinfo(Parent, current_stacktrace) }), - ok; - stop -> - ok - after Interval -> - Diff = diff(Start), - ?LOG_WARNING(Info#{ - what => long_task_progress, - caller_pid => Parent, - time_ms => Diff, - current_stacktrace => pinfo(Parent, current_stacktrace) - }), - monitor_loop(Mon, Info, Parent, Start, Interval) + exit(Parent, {task_timeout, Info}); + false -> + WaitTime = + case Timeout of + infinity -> Interval; + _ -> min(Interval, Timeout - Diff) + end, + receive + {'DOWN', _MonRef, process, _Pid, shutdown} -> + %% Special case, the long task is stopped using exit(Pid, shutdown) + ok; + {'DOWN', MonRef, process, _Pid, Reason} when Mon =:= MonRef -> + ?LOG_ERROR(Info#{ + what => task_failed, + reason => Reason, + caller_pid => Parent, + reported_by => monitor_process + }), + ok; + stop -> + ok + after WaitTime -> + Diff2 = diff(Start), + %% Check if timeout is reached before logging progress + case Timeout =/= infinity andalso Diff2 >= Timeout of + true -> + %% Don't log progress, let the next iteration handle timeout + ok; + false -> + ?LOG_WARNING(Info#{ + what => long_task_progress, + caller_pid => Parent, + time_ms => Diff2, + current_stacktrace => pinfo(Parent, current_stacktrace) + }) + end, + monitor_loop(Mon, Info, Parent, Start, Interval, Timeout) + end end. diff(Start) -> diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl index 3fa782f2..e5d68490 100644 --- a/test/cets_SUITE.erl +++ b/test/cets_SUITE.erl @@ -171,6 +171,7 @@ cases() -> only_for_logger_cases() -> [ run_tracked_logged_check_logger, + run_tracked_timeout, long_call_fails_because_linked_process_dies, pause_owner_crashed_is_logged, pause_owner_crashed_is_not_logged_if_reason_is_normal, @@ -1372,6 +1373,35 @@ run_tracked_logged_check_logger(_Config) -> ct:fail(timeout) end. +run_tracked_timeout(_Config) -> + logger_debug_h:start(#{id => ?FUNCTION_NAME}), + LogRef = make_ref(), + Me = self(), + F = fun() -> + Me ! task_started, + timer:sleep(infinity) + end, + %% Run it in a separate process with a 50ms timeout + %% Use high report_interval to avoid long_task_progress logs + Pid = proc_lib:spawn(fun() -> + cets_long:run_tracked(#{log_ref => LogRef, report_interval => 10000}, F, 50) + end), + receive_message(task_started), + wait_for_down(Pid), + %% Check that task_timeout was logged + [ + #{ + level := error, + msg := + {report, #{ + what := task_timeout, + log_ref := LogRef, + timeout_ms := 50 + }} + } + ] = + cets_test_log:receive_all_logs_with_log_ref(?FUNCTION_NAME, LogRef). + %% Improves code coverage, checks logs long_call_fails_because_linked_process_dies(_Config) -> logger_debug_h:start(#{id => ?FUNCTION_NAME}), From b74d93c9f50606efad8d3521b5e0e614b11e5cb8 Mon Sep 17 00:00:00 2001 From: Tomasz Wojcikowski Date: Tue, 9 Dec 2025 08:25:24 +0100 Subject: [PATCH 2/4] Add tests for run_tracked with infinity timeout and completion before timeout --- test/cets_SUITE.erl | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl index e5d68490..678bb00a 100644 --- a/test/cets_SUITE.erl +++ b/test/cets_SUITE.erl @@ -172,6 +172,8 @@ only_for_logger_cases() -> [ run_tracked_logged_check_logger, run_tracked_timeout, + run_tracked_with_infinity_timeout, + run_tracked_completes_before_timeout, long_call_fails_because_linked_process_dies, pause_owner_crashed_is_logged, pause_owner_crashed_is_not_logged_if_reason_is_normal, @@ -1402,6 +1404,43 @@ run_tracked_timeout(_Config) -> ] = cets_test_log:receive_all_logs_with_log_ref(?FUNCTION_NAME, LogRef). +run_tracked_with_infinity_timeout(_Config) -> + logger_debug_h:start(#{id => ?FUNCTION_NAME}), + LogRef = make_ref(), + Me = self(), + F = fun() -> + Me ! task_started, + timer:sleep(20), + Me ! task_done + end, + %% Test with explicit infinity timeout - should not timeout + cets_long:run_tracked(#{log_ref => LogRef}, F, infinity), + receive_message(task_started), + receive_message(task_done), + %% No timeout should be logged + [] = [ + Log + || #{msg := {report, #{what := task_timeout}}} = Log <- + cets_test_log:receive_all_logs_with_log_ref(?FUNCTION_NAME, LogRef) + ]. + +run_tracked_completes_before_timeout(_Config) -> + logger_debug_h:start(#{id => ?FUNCTION_NAME}), + LogRef = make_ref(), + Result = make_ref(), + F = fun() -> + timer:sleep(10), + Result + end, + %% Task completes in 10ms, timeout is 1000ms + Result = cets_long:run_tracked(#{log_ref => LogRef}, F, 1000), + %% No timeout should be logged + [] = [ + Log + || #{msg := {report, #{what := task_timeout}}} = Log <- + cets_test_log:receive_all_logs_with_log_ref(?FUNCTION_NAME, LogRef) + ]. + %% Improves code coverage, checks logs long_call_fails_because_linked_process_dies(_Config) -> logger_debug_h:start(#{id => ?FUNCTION_NAME}), From 41e1af68972926b909badf37c65907917bc16181 Mon Sep 17 00:00:00 2001 From: Tomasz Wojcikowski Date: Tue, 9 Dec 2025 08:44:29 +0100 Subject: [PATCH 3/4] Add Codecov configuration for coverage thresholds and comments --- .codecov.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 .codecov.yml diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 00000000..8aba9c74 --- /dev/null +++ b/.codecov.yml @@ -0,0 +1,16 @@ +coverage: + status: + project: + default: + # Allow coverage to drop by up to 0.5% + threshold: 0.5% + patch: + default: + # Allow patch coverage to be 0.5% lower than base + threshold: 0.5% + +comment: + # Only comment on coverage changes > 0.5% + require_changes: true + require_base: yes + require_head: yes From caa899904eea65e37a6f81ff2a9a1db8f241ed41 Mon Sep 17 00:00:00 2001 From: Tomasz Wojcikowski Date: Tue, 9 Dec 2025 09:11:44 +0100 Subject: [PATCH 4/4] Refactor timeout handling in monitor loop --- src/cets_long.erl | 102 +++++++++++++++++++++++++++------------------- 1 file changed, 59 insertions(+), 43 deletions(-) diff --git a/src/cets_long.erl b/src/cets_long.erl index 939335b9..c6385ca4 100644 --- a/src/cets_long.erl +++ b/src/cets_long.erl @@ -110,53 +110,69 @@ run_monitor(Info, Ref, Parent, Start, Timeout) -> monitor_loop(Mon, Info, Parent, Start, Interval, Timeout) -> Diff = diff(Start), - case Timeout =/= infinity andalso Diff >= Timeout of - true -> + case check_timeout(Timeout, Diff) of + timeout_exceeded -> + handle_timeout(Info, Parent, Diff, Timeout); + timeout_not_exceeded -> + WaitTime = calculate_wait_time(Timeout, Diff, Interval), + handle_monitor_messages(Mon, Info, Parent, Start, Interval, Timeout, WaitTime) + end. + +check_timeout(infinity, _Diff) -> + timeout_not_exceeded; +check_timeout(Timeout, Diff) when Diff >= Timeout -> + timeout_exceeded; +check_timeout(_Timeout, _Diff) -> + timeout_not_exceeded. + +handle_timeout(Info, Parent, Diff, Timeout) -> + ?LOG_ERROR(Info#{ + what => task_timeout, + caller_pid => Parent, + time_ms => Diff, + timeout_ms => Timeout, + current_stacktrace => pinfo(Parent, current_stacktrace) + }), + exit(Parent, {task_timeout, Info}). + +calculate_wait_time(infinity, _Diff, Interval) -> + Interval; +calculate_wait_time(Timeout, Diff, Interval) -> + min(Interval, Timeout - Diff). + +handle_monitor_messages(Mon, Info, Parent, Start, Interval, Timeout, WaitTime) -> + receive + {'DOWN', _MonRef, process, _Pid, shutdown} -> + %% Special case, the long task is stopped using exit(Pid, shutdown) + ok; + {'DOWN', MonRef, process, _Pid, Reason} when Mon =:= MonRef -> ?LOG_ERROR(Info#{ - what => task_timeout, + what => task_failed, + reason => Reason, + caller_pid => Parent, + reported_by => monitor_process + }), + ok; + stop -> + ok + after WaitTime -> + handle_progress_logging(Info, Parent, Start, Timeout), + monitor_loop(Mon, Info, Parent, Start, Interval, Timeout) + end. + +handle_progress_logging(Info, Parent, Start, Timeout) -> + Diff = diff(Start), + case check_timeout(Timeout, Diff) of + timeout_exceeded -> + %% Don't log progress, let the next iteration handle timeout + ok; + timeout_not_exceeded -> + ?LOG_WARNING(Info#{ + what => long_task_progress, caller_pid => Parent, time_ms => Diff, - timeout_ms => Timeout, current_stacktrace => pinfo(Parent, current_stacktrace) - }), - exit(Parent, {task_timeout, Info}); - false -> - WaitTime = - case Timeout of - infinity -> Interval; - _ -> min(Interval, Timeout - Diff) - end, - receive - {'DOWN', _MonRef, process, _Pid, shutdown} -> - %% Special case, the long task is stopped using exit(Pid, shutdown) - ok; - {'DOWN', MonRef, process, _Pid, Reason} when Mon =:= MonRef -> - ?LOG_ERROR(Info#{ - what => task_failed, - reason => Reason, - caller_pid => Parent, - reported_by => monitor_process - }), - ok; - stop -> - ok - after WaitTime -> - Diff2 = diff(Start), - %% Check if timeout is reached before logging progress - case Timeout =/= infinity andalso Diff2 >= Timeout of - true -> - %% Don't log progress, let the next iteration handle timeout - ok; - false -> - ?LOG_WARNING(Info#{ - what => long_task_progress, - caller_pid => Parent, - time_ms => Diff2, - current_stacktrace => pinfo(Parent, current_stacktrace) - }) - end, - monitor_loop(Mon, Info, Parent, Start, Interval, Timeout) - end + }) end. diff(Start) ->