From 11e899d0ef984fa4e154b4498a22eed886f2045a Mon Sep 17 00:00:00 2001
From: Colman Yau <yauho@amazon.com>
Date: Thu, 4 Mar 2021 15:47:19 +0000
Subject: [PATCH 1/5] Restrict CPU usage checker to monitor sampleAndAggregate
 cpu usge only

As sampleAndAggregate happens every sampling_interval, our cpu usage checker
estimate the cpu usage by taking the cpu time spent in sampleAndAggregate
divided by sampling_interval.

This change will not be merged until we also have a checker monitoring
the cpu usage for submitProfile and refreshConfig.
---
 codeguru_profiler_agent/profiler_disabler.py | 6 +++---
 codeguru_profiler_agent/profiler_runner.py   | 8 ++++++--
 test/acceptance/test_cpu_limit.py            | 2 +-
 test/unit/test_profiler_disabler.py          | 4 ++--
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/codeguru_profiler_agent/profiler_disabler.py b/codeguru_profiler_agent/profiler_disabler.py
index c912e3d..8ade9fa 100644
--- a/codeguru_profiler_agent/profiler_disabler.py
+++ b/codeguru_profiler_agent/profiler_disabler.py
@@ -39,12 +39,12 @@ def __init__(self, timer):
         self.timer = timer
 
     def is_cpu_usage_limit_reached(self, profile=None):
-        profiler_metric = self.timer.metrics.get("runProfiler")
-        if not profiler_metric or profiler_metric.counter < MINIMUM_MEASURES_IN_DURATION_METRICS:
+        sample_and_aggregate_metric = self.timer.metrics.get("sampleAndAggregate")
+        if not sample_and_aggregate_metric or sample_and_aggregate_metric.counter < MINIMUM_MEASURES_IN_DURATION_METRICS:
             return False
 
         sampling_interval_seconds = self._get_average_sampling_interval_seconds(profile)
-        used_time_percentage = 100 * profiler_metric.average() / sampling_interval_seconds
+        used_time_percentage = 100 * sample_and_aggregate_metric.average() / sampling_interval_seconds
 
         if used_time_percentage >= AgentConfiguration.get().cpu_limit_percentage:
             logger.debug(self.timer.metrics)
diff --git a/codeguru_profiler_agent/profiler_runner.py b/codeguru_profiler_agent/profiler_runner.py
index d2c433f..6cd99d1 100644
--- a/codeguru_profiler_agent/profiler_runner.py
+++ b/codeguru_profiler_agent/profiler_runner.py
@@ -89,10 +89,14 @@ def _run_profiler(self):
             if self.collector.flush():
                 self.is_profiling_in_progress = False
                 return True
-            sample = self.sampler.sample()
-            self.collector.add(sample)
+            self._sample_and_aggregate()
         return True
 
+    @with_timer("sampleAndAggregate")
+    def _sample_and_aggregate(self):
+        sample = self.sampler.sample()
+        self.collector.add(sample)
+
     def is_running(self):
         return self.scheduler.is_running()
 
diff --git a/test/acceptance/test_cpu_limit.py b/test/acceptance/test_cpu_limit.py
index e413a21..d73cf23 100644
--- a/test/acceptance/test_cpu_limit.py
+++ b/test/acceptance/test_cpu_limit.py
@@ -31,6 +31,6 @@ def test_profiler_terminates(self):
             # With sampling_interval to be 0.01 seconds, having runProfiler as 0.5 seconds should breach
             # the cpu limit. We need to sample 20 times before we check the CPU limit
             for i in range(20):
-                self.timer.record('runProfiler', 0.5)
+                self.timer.record('sampleAndAggregate', 0.5)
 
             assert wait_for(lambda: not self.profiler.is_running(), timeout_seconds=5)
diff --git a/test/unit/test_profiler_disabler.py b/test/unit/test_profiler_disabler.py
index 638aa68..e8eb7b8 100644
--- a/test/unit/test_profiler_disabler.py
+++ b/test/unit/test_profiler_disabler.py
@@ -150,7 +150,7 @@ def before(self):
         self.timer = Timer()
         self.profile = Mock(spec=Profile)
         for i in range(20):
-            self.timer.record('runProfiler', 0.5)
+            self.timer.record('sampleAndAggregate', 0.5)
         set_agent_config(sampling_interval_seconds=1, cpu_limit_percentage=10)
         self.process_duration_check = CpuUsageCheck(self.timer)
 
@@ -222,7 +222,7 @@ def before(self):
     def test_it_returns_false(self):
         self.timer.reset()
         for i in range(4):
-            self.timer.record('runProfiler', 0.5)
+            self.timer.record('sampleAndAggregate', 0.5)
         assert not self.process_duration_check.is_cpu_usage_limit_reached()
 
 

From 501420284be557bb7cd86caf708c2644c6fe2814 Mon Sep 17 00:00:00 2001
From: Colman Yau <yauho@amazon.com>
Date: Thu, 4 Mar 2021 18:21:54 +0000
Subject: [PATCH 2/5] New implementation of ProfilerDisabler: iteration 1

Introduce should_stop_sampling and should_stop_profiling.

should_stop_sampling is called whenever we sample while should_stop_profiling
is only called after we refresh config or submit profile.
---
 codeguru_profiler_agent/local_aggregator.py  | 10 +++----
 codeguru_profiler_agent/profiler_disabler.py | 29 +++++++++++++++++--
 codeguru_profiler_agent/profiler_runner.py   | 30 +++++++++++++++-----
 test/unit/test_profiler_disabler.py          |  6 ++--
 4 files changed, 58 insertions(+), 17 deletions(-)

diff --git a/codeguru_profiler_agent/local_aggregator.py b/codeguru_profiler_agent/local_aggregator.py
index fff2205..a04c304 100644
--- a/codeguru_profiler_agent/local_aggregator.py
+++ b/codeguru_profiler_agent/local_aggregator.py
@@ -44,7 +44,7 @@ def __init__(self, reporter, environment=dict()):
         self.memory_limit_bytes = environment["memory_limit_bytes"]
         self.last_report_attempted = current_milli_time(clock=self.clock)
 
-        self._reset()
+        self.reset()
 
     def add(self, sample):
         """
@@ -69,7 +69,7 @@ def _check_memory_limit(self):
                     "Profiler memory usage limit has been reached")
             self.flush(force=True)
 
-    def _reset(self):
+    def reset(self):
         self.profile = self.profile_factory(
             profiling_group_name=self.profiling_group_name,
             sampling_interval_seconds=AgentConfiguration.get().sampling_interval.total_seconds(),
@@ -80,7 +80,7 @@ def _reset(self):
         self.timer.reset()
 
     @with_timer("flush")
-    def flush(self, force=False):
+    def flush(self, force=False, reset=True):
         now = current_milli_time(clock=self.clock)
         reported = False
         if not force and not self._is_over_reporting_interval(now):
@@ -92,8 +92,8 @@ def flush(self, force=False):
             self._report_profile(now)
             reported = True
 
-        if force or reported:
-            self._reset()
+        if force or (reset and reported):
+            self.reset()
         return reported
 
     def refresh_configuration(self):
diff --git a/codeguru_profiler_agent/profiler_disabler.py b/codeguru_profiler_agent/profiler_disabler.py
index 8ade9fa..4c2e7a5 100644
--- a/codeguru_profiler_agent/profiler_disabler.py
+++ b/codeguru_profiler_agent/profiler_disabler.py
@@ -20,11 +20,16 @@ def __init__(self, environment, clock=time.time):
         self.killswitch = KillSwitch(environment['killswitch_filepath'], clock)
         self.memory_limit_bytes = environment['memory_limit_bytes']
 
-    def should_stop_profiling(self, profile=None):
+    def should_stop_sampling(self, profile=None):
         return (self.killswitch.is_killswitch_on()
                 or self.cpu_usage_check.is_cpu_usage_limit_reached(profile)
                 or profile is not None and self._is_memory_limit_reached(profile))
 
+    def should_stop_profiling(self, profile=None):
+        return self.killswitch.is_killswitch_on() or \
+               (profile is not None and self.cpu_usage_check.is_overall_cpu_limit_reached(profile)
+                and self._is_memory_limit_reached(profile))
+
     def _is_memory_limit_reached(self, profile):
         return profile.get_memory_usage_bytes() > self.memory_limit_bytes
 
@@ -38,9 +43,29 @@ class CpuUsageCheck:
     def __init__(self, timer):
         self.timer = timer
 
+    # This function carries out an overall cpu limit check that covers the cpu overhead caused for the full
+    # sampling cycle: sample -> aggregate -> report -> refresh config. This has to be called with a profile
+    # which captured the total cycle cpu time usage. hnhg
+    def is_overall_cpu_limit_reached(self, profile):
+        profiler_metric = self.timer.metrics.get("runProfiler")
+        if not profile or not profiler_metric or profiler_metric.counter < MINIMUM_MEASURES_IN_DURATION_METRICS:
+            return False
+
+        used_time_percentage = 100 * profiler_metric.total/profile.get_active_millis_since_start()
+
+        if used_time_percentage >= AgentConfiguration.get().cpu_limit_percentage:
+            logger.debug(self.timer.metrics)
+            logger.info(
+                "Profiler cpu usage limit reached: {:.2f} % (limit: {:.2f} %), will stop CodeGuru Profiler.".format(
+                    used_time_percentage, AgentConfiguration.get().cpu_limit_percentage))
+            return True
+        else:
+            return False
+
     def is_cpu_usage_limit_reached(self, profile=None):
         sample_and_aggregate_metric = self.timer.metrics.get("sampleAndAggregate")
-        if not sample_and_aggregate_metric or sample_and_aggregate_metric.counter < MINIMUM_MEASURES_IN_DURATION_METRICS:
+        if not sample_and_aggregate_metric or \
+                sample_and_aggregate_metric.counter < MINIMUM_MEASURES_IN_DURATION_METRICS:
             return False
 
         sampling_interval_seconds = self._get_average_sampling_interval_seconds(profile)
diff --git a/codeguru_profiler_agent/profiler_runner.py b/codeguru_profiler_agent/profiler_runner.py
index 6cd99d1..b692ff5 100644
--- a/codeguru_profiler_agent/profiler_runner.py
+++ b/codeguru_profiler_agent/profiler_runner.py
@@ -51,7 +51,7 @@ def start(self):
 
         :return: True if the profiler was started successfully; False otherwise.
         """
-        if self.profiler_disabler.should_stop_profiling():
+        if self.profiler_disabler.should_stop_sampling():
             logger.info("Profiler will not start.")
             return False
         self.scheduler.start()
@@ -71,26 +71,35 @@ def _profiling_command(self):
             if self._first_execution:
                 self.collector.setup()
                 self._first_execution = False
-            return self._run_profiler()
+            sample_result = self._run_profiler()
+            if sample_result.success and sample_result.should_check_overall:
+                if self.profiler_disabler.should_stop_profiling(profile=self.collector.profile):
+                    return False
+                if sample_result.should_reset:
+                    self.collector.reset()
+                    return True
+            return sample_result.success
         except:
             logger.info("An unexpected issue caused the profiling command to terminate.", exc_info=True)
             return False
 
     @with_timer("runProfiler")
     def _run_profiler(self):
-        if self.profiler_disabler.should_stop_profiling(self.collector.profile):
-            return False
+        if self.profiler_disabler.should_stop_sampling(self.collector.profile):
+            return RunProfilerStatus(success=False, should_check_overall=False, should_reset=False)
 
+        refreshed_config = False
         if not self.is_profiling_in_progress:
             self._refresh_configuration()
+            refreshed_config = True
 
         # after the refresh we may be working on a profile
         if self.is_profiling_in_progress:
-            if self.collector.flush():
+            if self.collector.flush(reset=False):
                 self.is_profiling_in_progress = False
-                return True
+                return RunProfilerStatus(success=True, should_check_overall=True, should_reset=True)
             self._sample_and_aggregate()
-        return True
+        return RunProfilerStatus(success=True, should_check_overall=refreshed_config, should_reset=False)
 
     @with_timer("sampleAndAggregate")
     def _sample_and_aggregate(self):
@@ -129,3 +138,10 @@ def pause(self, block=False):
         """
         self.scheduler.pause(block)
         self.collector.profile.pause()
+
+
+class RunProfilerStatus:
+    def __init__(self, success, should_check_overall, should_reset):
+        self.success = success
+        self.should_check_overall = should_check_overall
+        self.should_reset = should_reset
diff --git a/test/unit/test_profiler_disabler.py b/test/unit/test_profiler_disabler.py
index e8eb7b8..61d0770 100644
--- a/test/unit/test_profiler_disabler.py
+++ b/test/unit/test_profiler_disabler.py
@@ -72,15 +72,15 @@ def before(self):
 
     def test_it_stops_profiling_if_killswitch_is_on(self):
         self.disabler.killswitch.is_killswitch_on = Mock(return_value=True)
-        assert self.disabler.should_stop_profiling(self.profiler)
+        assert self.disabler.should_stop_sampling(self.profiler)
 
     def test_it_stops_profiling_if_memory_limit_is_reached(self):
         self.disabler._is_memory_limit_reached = Mock(return_value=True)
-        assert self.disabler.should_stop_profiling(self.profiler)
+        assert self.disabler.should_stop_sampling(self.profiler)
 
     def test_it_stops_profiling_if_process_duration_is_reached(self):
         self.disabler.cpu_usage_check.is_cpu_usage_limit_reached = Mock(return_value=True)
-        assert self.disabler.should_stop_profiling(self.profiler)
+        assert self.disabler.should_stop_sampling(self.profiler)
 
 
 class TestKillSwitch:

From 335a9abbbfe87588455770f832685661c54c7b4b Mon Sep 17 00:00:00 2001
From: Colman Yau <yauho@amazon.com>
Date: Tue, 9 Mar 2021 11:16:59 +0000
Subject: [PATCH 3/5] Add test for new ProfilerDisabler implementation

---
 codeguru_profiler_agent/profiler_disabler.py |  33 +++--
 codeguru_profiler_agent/profiler_runner.py   |   2 +-
 test/unit/test_profiler_disabler.py          | 126 +++++++++++++------
 test/unit/test_profiler_runner.py            |  13 +-
 4 files changed, 120 insertions(+), 54 deletions(-)

diff --git a/codeguru_profiler_agent/profiler_disabler.py b/codeguru_profiler_agent/profiler_disabler.py
index 4c2e7a5..bc6dc72 100644
--- a/codeguru_profiler_agent/profiler_disabler.py
+++ b/codeguru_profiler_agent/profiler_disabler.py
@@ -2,7 +2,6 @@
 import time
 import logging
 from codeguru_profiler_agent.reporter.agent_configuration import AgentConfiguration
-from codeguru_profiler_agent.utils.time import current_milli_time
 
 logger = logging.getLogger(__name__)
 CHECK_KILLSWITCH_FILE_INTERVAL_SECONDS = 60
@@ -21,17 +20,17 @@ def __init__(self, environment, clock=time.time):
         self.memory_limit_bytes = environment['memory_limit_bytes']
 
     def should_stop_sampling(self, profile=None):
-        return (self.killswitch.is_killswitch_on()
-                or self.cpu_usage_check.is_cpu_usage_limit_reached(profile)
-                or profile is not None and self._is_memory_limit_reached(profile))
+        return self.killswitch.is_killswitch_on() \
+               or self.cpu_usage_check.is_sampling_cpu_usage_limit_reached(profile) \
+               or self._is_memory_limit_reached(profile)
 
     def should_stop_profiling(self, profile=None):
-        return self.killswitch.is_killswitch_on() or \
-               (profile is not None and self.cpu_usage_check.is_overall_cpu_limit_reached(profile)
-                and self._is_memory_limit_reached(profile))
+        return self.killswitch.is_killswitch_on() \
+               or self.cpu_usage_check.is_overall_cpu_usage_limit_reached(profile) \
+               or self._is_memory_limit_reached(profile)
 
     def _is_memory_limit_reached(self, profile):
-        return profile.get_memory_usage_bytes() > self.memory_limit_bytes
+        return False if profile is None else profile.get_memory_usage_bytes() > self.memory_limit_bytes
 
 
 class CpuUsageCheck:
@@ -44,25 +43,25 @@ def __init__(self, timer):
         self.timer = timer
 
     # This function carries out an overall cpu limit check that covers the cpu overhead caused for the full
-    # sampling cycle: sample -> aggregate -> report -> refresh config. This has to be called with a profile
-    # which captured the total cycle cpu time usage. hnhg
-    def is_overall_cpu_limit_reached(self, profile):
+    # sampling cycle: sample -> aggregate -> report -> refresh config. We expect this function to be called after
+    # configuration refresh and profile submission.
+    def is_overall_cpu_usage_limit_reached(self, profile=None):
         profiler_metric = self.timer.metrics.get("runProfiler")
         if not profile or not profiler_metric or profiler_metric.counter < MINIMUM_MEASURES_IN_DURATION_METRICS:
             return False
 
-        used_time_percentage = 100 * profiler_metric.total/profile.get_active_millis_since_start()
+        used_time_percentage = 100 * profiler_metric.total/(profile.get_active_millis_since_start()/1000)
 
         if used_time_percentage >= AgentConfiguration.get().cpu_limit_percentage:
             logger.debug(self.timer.metrics)
             logger.info(
-                "Profiler cpu usage limit reached: {:.2f} % (limit: {:.2f} %), will stop CodeGuru Profiler.".format(
-                    used_time_percentage, AgentConfiguration.get().cpu_limit_percentage))
+                "Profiler overall cpu usage limit reached: {:.2f} % (limit: {:.2f} %), will stop CodeGuru Profiler."
+                .format(used_time_percentage, AgentConfiguration.get().cpu_limit_percentage))
             return True
         else:
             return False
 
-    def is_cpu_usage_limit_reached(self, profile=None):
+    def is_sampling_cpu_usage_limit_reached(self, profile=None):
         sample_and_aggregate_metric = self.timer.metrics.get("sampleAndAggregate")
         if not sample_and_aggregate_metric or \
                 sample_and_aggregate_metric.counter < MINIMUM_MEASURES_IN_DURATION_METRICS:
@@ -74,8 +73,8 @@ def is_cpu_usage_limit_reached(self, profile=None):
         if used_time_percentage >= AgentConfiguration.get().cpu_limit_percentage:
             logger.debug(self.timer.metrics)
             logger.info(
-                "Profiler cpu usage limit reached: {:.2f} % (limit: {:.2f} %), will stop CodeGuru Profiler.".format(
-                    used_time_percentage, AgentConfiguration.get().cpu_limit_percentage))
+                "Profiler sampling cpu usage limit reached: {:.2f} % (limit: {:.2f} %), will stop CodeGuru Profiler."
+                .format(used_time_percentage, AgentConfiguration.get().cpu_limit_percentage))
             return True
         else:
             return False
diff --git a/codeguru_profiler_agent/profiler_runner.py b/codeguru_profiler_agent/profiler_runner.py
index b692ff5..c868f9e 100644
--- a/codeguru_profiler_agent/profiler_runner.py
+++ b/codeguru_profiler_agent/profiler_runner.py
@@ -51,7 +51,7 @@ def start(self):
 
         :return: True if the profiler was started successfully; False otherwise.
         """
-        if self.profiler_disabler.should_stop_sampling():
+        if self.profiler_disabler.should_stop_profiling():
             logger.info("Profiler will not start.")
             return False
         self.scheduler.start()
diff --git a/test/unit/test_profiler_disabler.py b/test/unit/test_profiler_disabler.py
index 61d0770..ae711d5 100644
--- a/test/unit/test_profiler_disabler.py
+++ b/test/unit/test_profiler_disabler.py
@@ -28,10 +28,10 @@ def set_agent_config(sampling_interval_seconds=1, cpu_limit_percentage=DEFAULT_C
 
 
 def assert_config_sampling_interval_used(process_duration_check, profile):
-    assert process_duration_check.is_cpu_usage_limit_reached(profile)
+    assert process_duration_check.is_sampling_cpu_usage_limit_reached(profile)
 
     set_agent_config(sampling_interval_seconds=42, cpu_limit_percentage=80)
-    assert not process_duration_check.is_cpu_usage_limit_reached(profile)
+    assert not process_duration_check.is_sampling_cpu_usage_limit_reached(profile)
 
 
 class TestProfilerDisabler:
@@ -59,28 +59,57 @@ def test_it_sets_all_parameters(self):
         assert AgentConfiguration.get().cpu_limit_percentage == DEFAULT_CPU_LIMIT_PERCENTAGE
 
 
-class TestWhenAnyFails(TestProfilerDisabler):
-    @before
-    def before(self):
-        super().before()
-        self.profiler = Mock()
-        self.disabler.killswitch = Mock()
-        self.disabler.cpu_usage_check = Mock()
-        self.disabler._is_memory_limit_reached = Mock(return_value=False)
-        self.disabler.killswitch.is_killswitch_on = Mock(return_value=False)
-        self.disabler.killswitch.is_process_duration_limit_reached = Mock(return_value=False)
+class TestShouldStopSampling:
+    class TestWhenAnyFails(TestProfilerDisabler):
+        @before
+        def before(self):
+            super().before()
+            self.disabler.killswitch = Mock()
+            self.disabler.cpu_usage_check = Mock()
+            self.disabler.cpu_usage_check.is_sampling_cpu_usage_limit_reached = Mock(return_value=False)
+            self.disabler._is_memory_limit_reached = Mock(return_value=False)
+            self.disabler.killswitch.is_killswitch_on = Mock(return_value=False)
+            self.disabler.killswitch.is_process_duration_limit_reached = Mock(return_value=False)
+            assert not self.disabler.should_stop_sampling()
+
+        def test_it_stops_profiling_if_killswitch_is_on(self):
+            self.disabler.killswitch.is_killswitch_on = Mock(return_value=True)
+            assert self.disabler.should_stop_sampling()
+
+        def test_it_stops_profiling_if_memory_limit_is_reached(self):
+            self.disabler._is_memory_limit_reached = Mock(return_value=True)
+            assert self.disabler.should_stop_sampling()
+
+        def test_it_stops_profiling_if_process_duration_is_reached(self):
+            self.disabler.cpu_usage_check.is_sampling_cpu_usage_limit_reached = Mock(return_value=True)
+            assert self.disabler.should_stop_sampling()
+
+
+class TestShouldStopProfiling:
+    class TestWhenAnyFails(TestProfilerDisabler):
+        @before
+        def before(self):
+            super().before()
+            self.profiler = Mock()
+            self.disabler.killswitch = Mock()
+            self.disabler.cpu_usage_check = Mock()
+            self.disabler.cpu_usage_check.is_overall_cpu_usage_limit_reached = Mock(return_value=False)
+            self.disabler._is_memory_limit_reached = Mock(return_value=False)
+            self.disabler.killswitch.is_killswitch_on = Mock(return_value=False)
+            self.disabler.killswitch.is_process_duration_limit_reached = Mock(return_value=False)
+            assert not self.disabler.should_stop_profiling()
 
-    def test_it_stops_profiling_if_killswitch_is_on(self):
-        self.disabler.killswitch.is_killswitch_on = Mock(return_value=True)
-        assert self.disabler.should_stop_sampling(self.profiler)
+        def test_it_stops_profiling_if_killswitch_is_on(self):
+            self.disabler.killswitch.is_killswitch_on = Mock(return_value=True)
+            assert self.disabler.should_stop_profiling()
 
-    def test_it_stops_profiling_if_memory_limit_is_reached(self):
-        self.disabler._is_memory_limit_reached = Mock(return_value=True)
-        assert self.disabler.should_stop_sampling(self.profiler)
+        def test_it_stops_profiling_if_memory_limit_is_reached(self):
+            self.disabler._is_memory_limit_reached = Mock(return_value=True)
+            assert self.disabler.should_stop_profiling()
 
-    def test_it_stops_profiling_if_process_duration_is_reached(self):
-        self.disabler.cpu_usage_check.is_cpu_usage_limit_reached = Mock(return_value=True)
-        assert self.disabler.should_stop_sampling(self.profiler)
+        def test_it_stops_profiling_if_process_duration_is_reached(self):
+            self.disabler.cpu_usage_check.is_overall_cpu_usage_limit_reached = Mock(return_value=True)
+            assert self.disabler.should_stop_profiling()
 
 
 class TestKillSwitch:
@@ -145,7 +174,7 @@ def test_it_returns_false_after_a_minute(self):
         assert not self.killswitch.is_killswitch_on()
 
 
-class TestCpuUsageCheck:
+class TestSamplingCpuUsageCheck:
     def before(self):
         self.timer = Timer()
         self.profile = Mock(spec=Profile)
@@ -155,7 +184,7 @@ def before(self):
         self.process_duration_check = CpuUsageCheck(self.timer)
 
 
-class TestGetAverageSamplingIntervalSeconds(TestCpuUsageCheck):
+class TestGetAverageSamplingIntervalSeconds(TestSamplingCpuUsageCheck):
     @before
     def before(self):
         super().before()
@@ -176,7 +205,7 @@ def test_when_profiler_sample_count_less_than_min_samples_in_profile_it_returns_
         assert CpuUsageCheck._get_average_sampling_interval_seconds(self.profile) == 23
 
 
-class TestIsCpuUsageLimitReached(TestCpuUsageCheck):
+class TestIsSamplingCpuUsageLimitReached(TestSamplingCpuUsageCheck):
     @before
     def before(self):
         super().before()
@@ -187,43 +216,70 @@ def before(self):
             yield
 
     def test_it_calls_get_average_sampling_interval_with_profile(self):
-        self.process_duration_check.is_cpu_usage_limit_reached(self.profile)
+        self.process_duration_check.is_sampling_cpu_usage_limit_reached(self.profile)
         self.get_average_sampling_interval_mock.assert_called_once_with(self.profile)
 
     def test_when_average_duration_exceeds_limit_it_returns_true(self):
         # timer: (0.5/4) * 100= 12.5%
-        assert self.process_duration_check.is_cpu_usage_limit_reached()
+        assert self.process_duration_check.is_sampling_cpu_usage_limit_reached()
 
-    def test_when_average_duragtion_is_below_limit_it_returns_false(self):
+    def test_when_average_duration_is_below_limit_it_returns_false(self):
         # timer: (0.5/4) * 100= 12.5%
         set_agent_config(cpu_limit_percentage=13)
-        assert not self.process_duration_check.is_cpu_usage_limit_reached()
+        assert not self.process_duration_check.is_sampling_cpu_usage_limit_reached()
 
     def test_when_profile_is_none_it_calls_get_average_sampling_interval_without_profile(self):
-        self.process_duration_check.is_cpu_usage_limit_reached()
+        self.process_duration_check.is_sampling_cpu_usage_limit_reached()
         self.get_average_sampling_interval_mock.assert_called_once_with(None)
 
 
-class TestWhenTimerDoesNotHaveTheKey(TestCpuUsageCheck):
+class TestIsOverallCpuUsageLimitReached():
+    @before
+    def before(self):
+        self.timer = Timer()
+        self.profile = Mock(spec=Profile)
+        for i in range(20):
+            self.timer.record('runProfiler', 0.5)
+        set_agent_config(cpu_limit_percentage=9)
+        self.process_duration_check = CpuUsageCheck(self.timer)
+        self.profile.get_active_millis_since_start = Mock(return_value=100*1000)
+
+    def test_when_average_duration_exceeds_limit_it_returns_true(self):
+        # timer: (0.5*20/100) * 100= 10%
+        assert self.process_duration_check.is_overall_cpu_usage_limit_reached(self.profile)
+
+    def test_when_average_duragtion_is_below_limit_it_returns_false(self):
+        # timer: (0.5*20/100) * 100= 10%
+        set_agent_config(cpu_limit_percentage=11)
+        assert not self.process_duration_check.is_overall_cpu_usage_limit_reached(self.profile)
+
+    def test_when_profile_is_none_it_returns_false(self):
+        assert not self.process_duration_check.is_overall_cpu_usage_limit_reached()
+
+
+class TestWhenTimerDoesNotHaveTheKey(TestSamplingCpuUsageCheck):
     @before
     def before(self):
         super().before()
 
     def test_it_returns_false(self):
         self.process_duration_check.timer = Timer()
-        assert not self.process_duration_check.is_cpu_usage_limit_reached()
+        assert not self.process_duration_check.is_sampling_cpu_usage_limit_reached()
 
 
-class TestWhenTimerDoesNotHaveEnoughMeasures(TestCpuUsageCheck):
+class TestWhenTimerDoesNotHaveEnoughMeasures(TestSamplingCpuUsageCheck):
     @before
     def before(self):
         super().before()
-
-    def test_it_returns_false(self):
         self.timer.reset()
         for i in range(4):
             self.timer.record('sampleAndAggregate', 0.5)
-        assert not self.process_duration_check.is_cpu_usage_limit_reached()
+
+    def test_sampling_cpu_usage_limit_reached_returns_false(self):
+        assert not self.process_duration_check.is_sampling_cpu_usage_limit_reached()
+
+    def test_overall_cpu_usage_limit_reached_returns_false(self):
+        assert not self.process_duration_check.is_overall_cpu_usage_limit_reached()
 
 
 class TestMemoryLimitCheck:
diff --git a/test/unit/test_profiler_runner.py b/test/unit/test_profiler_runner.py
index a40127f..946b843 100644
--- a/test/unit/test_profiler_runner.py
+++ b/test/unit/test_profiler_runner.py
@@ -17,6 +17,7 @@ def before(self):
         self.mock_collector = MagicMock(name="collector", spec=LocalAggregator)
         self.mock_disabler = MagicMock(name="profile", spec=ProfilerDisabler)
         self.mock_disabler.should_stop_profiling.return_value = False
+        self.mock_disabler.should_stop_sampling.return_value = False
         self.mock_sampler = MagicMock(name="sampler", spec=Sampler)
 
         self.environment = {
@@ -80,11 +81,21 @@ def test_when_it_reports_it_does_not_sample(self):
         self.mock_collector.flush.assert_called_once()
         self.mock_collector.add.assert_not_called()
 
-    def test_when_disabler_say_to_stop(self):
+    def test_when_disabler_says_to_stop_profiling_it_does_not_start(self):
         self.mock_disabler.should_stop_profiling.return_value = True
+
+        assert not self.profiler_runner.start()
+
+        self.mock_collector.refresh_configuration.assert_not_called()
+        self.mock_collector.add.assert_not_called()
+
+    def test_when_disabler_says_to_stop_sampling_it_does_not_do_anything(self):
+        self.mock_disabler.should_stop_sampling.return_value = True
         self.profiler_runner._profiling_command()
 
+        # As disabler.stop_sampling() returns True, _profiling_command() should not attempt to carry out any action.
         self.mock_collector.refresh_configuration.assert_not_called()
+        self.mock_sampler.sample.assert_not_called()
         self.mock_collector.add.assert_not_called()
 
     def test_when_orchestrator_says_no_to_profiler(self):

From aed725ea85e8be1629fb24f2fe914ef72f65c24d Mon Sep 17 00:00:00 2001
From: Colman Yau <yauho@amazon.com>
Date: Tue, 9 Mar 2021 13:11:17 +0000
Subject: [PATCH 4/5] Include changes suggested in review

---
 codeguru_profiler_agent/profiler_disabler.py | 32 ++++++++++++--------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/codeguru_profiler_agent/profiler_disabler.py b/codeguru_profiler_agent/profiler_disabler.py
index bc6dc72..08e54f7 100644
--- a/codeguru_profiler_agent/profiler_disabler.py
+++ b/codeguru_profiler_agent/profiler_disabler.py
@@ -20,14 +20,14 @@ def __init__(self, environment, clock=time.time):
         self.memory_limit_bytes = environment['memory_limit_bytes']
 
     def should_stop_sampling(self, profile=None):
-        return self.killswitch.is_killswitch_on() \
-               or self.cpu_usage_check.is_sampling_cpu_usage_limit_reached(profile) \
-               or self._is_memory_limit_reached(profile)
+        return (self.killswitch.is_killswitch_on()
+                or self.cpu_usage_check.is_sampling_cpu_usage_limit_reached(profile)
+                or self._is_memory_limit_reached(profile))
 
     def should_stop_profiling(self, profile=None):
-        return self.killswitch.is_killswitch_on() \
-               or self.cpu_usage_check.is_overall_cpu_usage_limit_reached(profile) \
-               or self._is_memory_limit_reached(profile)
+        return (self.killswitch.is_killswitch_on()
+                or self.cpu_usage_check.is_overall_cpu_usage_limit_reached(profile)
+                or self._is_memory_limit_reached(profile))
 
     def _is_memory_limit_reached(self, profile):
         return False if profile is None else profile.get_memory_usage_bytes() > self.memory_limit_bytes
@@ -42,21 +42,25 @@ class CpuUsageCheck:
     def __init__(self, timer):
         self.timer = timer
 
-    # This function carries out an overall cpu limit check that covers the cpu overhead caused for the full
-    # sampling cycle: sample -> aggregate -> report -> refresh config. We expect this function to be called after
-    # configuration refresh and profile submission.
     def is_overall_cpu_usage_limit_reached(self, profile=None):
+        """
+        This function carries out an overall cpu limit check that covers the cpu overhead caused for the full
+        sampling cycle: refresh config -> (sample -> aggregate) * n -> profile submission. We expect this function to
+        be called after configuration refresh and profile submission.
+        """
         profiler_metric = self.timer.metrics.get("runProfiler")
         if not profile or not profiler_metric or profiler_metric.counter < MINIMUM_MEASURES_IN_DURATION_METRICS:
             return False
 
         used_time_percentage = 100 * profiler_metric.total/(profile.get_active_millis_since_start()/1000)
 
-        if used_time_percentage >= AgentConfiguration.get().cpu_limit_percentage:
+        cpu_limit_percentage = AgentConfiguration.get().cpu_limit_percentage
+
+        if used_time_percentage >= cpu_limit_percentage:
             logger.debug(self.timer.metrics)
             logger.info(
                 "Profiler overall cpu usage limit reached: {:.2f} % (limit: {:.2f} %), will stop CodeGuru Profiler."
-                .format(used_time_percentage, AgentConfiguration.get().cpu_limit_percentage))
+                .format(used_time_percentage, cpu_limit_percentage))
             return True
         else:
             return False
@@ -70,11 +74,13 @@ def is_sampling_cpu_usage_limit_reached(self, profile=None):
         sampling_interval_seconds = self._get_average_sampling_interval_seconds(profile)
         used_time_percentage = 100 * sample_and_aggregate_metric.average() / sampling_interval_seconds
 
-        if used_time_percentage >= AgentConfiguration.get().cpu_limit_percentage:
+        cpu_limit_percentage = AgentConfiguration.get().cpu_limit_percentage
+
+        if used_time_percentage >= cpu_limit_percentage:
             logger.debug(self.timer.metrics)
             logger.info(
                 "Profiler sampling cpu usage limit reached: {:.2f} % (limit: {:.2f} %), will stop CodeGuru Profiler."
-                .format(used_time_percentage, AgentConfiguration.get().cpu_limit_percentage))
+                .format(used_time_percentage, cpu_limit_percentage))
             return True
         else:
             return False

From 33152e2027b09e8762ad0f0a3f508f55b010dbcb Mon Sep 17 00:00:00 2001
From: Colman Yau <yauho@amazon.com>
Date: Tue, 9 Mar 2021 15:53:11 +0000
Subject: [PATCH 5/5] Update check to be carried out only after profile
 submission

---
 codeguru_profiler_agent/profiler_disabler.py |  2 +-
 codeguru_profiler_agent/profiler_runner.py   | 20 ++++++++------------
 test/unit/test_profiler_disabler.py          |  2 +-
 3 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/codeguru_profiler_agent/profiler_disabler.py b/codeguru_profiler_agent/profiler_disabler.py
index 08e54f7..906ec7c 100644
--- a/codeguru_profiler_agent/profiler_disabler.py
+++ b/codeguru_profiler_agent/profiler_disabler.py
@@ -46,7 +46,7 @@ def is_overall_cpu_usage_limit_reached(self, profile=None):
         """
         This function carries out an overall cpu limit check that covers the cpu overhead caused for the full
         sampling cycle: refresh config -> (sample -> aggregate) * n -> profile submission. We expect this function to
-        be called after configuration refresh and profile submission.
+        be called after profile submission.
         """
         profiler_metric = self.timer.metrics.get("runProfiler")
         if not profile or not profiler_metric or profiler_metric.counter < MINIMUM_MEASURES_IN_DURATION_METRICS:
diff --git a/codeguru_profiler_agent/profiler_runner.py b/codeguru_profiler_agent/profiler_runner.py
index c868f9e..ae36290 100644
--- a/codeguru_profiler_agent/profiler_runner.py
+++ b/codeguru_profiler_agent/profiler_runner.py
@@ -72,12 +72,11 @@ def _profiling_command(self):
                 self.collector.setup()
                 self._first_execution = False
             sample_result = self._run_profiler()
-            if sample_result.success and sample_result.should_check_overall:
+            if sample_result.success and sample_result.is_end_of_cycle:
                 if self.profiler_disabler.should_stop_profiling(profile=self.collector.profile):
                     return False
-                if sample_result.should_reset:
-                    self.collector.reset()
-                    return True
+                self.collector.reset()
+                return True
             return sample_result.success
         except:
             logger.info("An unexpected issue caused the profiling command to terminate.", exc_info=True)
@@ -86,20 +85,18 @@ def _profiling_command(self):
     @with_timer("runProfiler")
     def _run_profiler(self):
         if self.profiler_disabler.should_stop_sampling(self.collector.profile):
-            return RunProfilerStatus(success=False, should_check_overall=False, should_reset=False)
+            return RunProfilerStatus(success=False, is_end_of_cycle=False)
 
-        refreshed_config = False
         if not self.is_profiling_in_progress:
             self._refresh_configuration()
-            refreshed_config = True
 
         # after the refresh we may be working on a profile
         if self.is_profiling_in_progress:
             if self.collector.flush(reset=False):
                 self.is_profiling_in_progress = False
-                return RunProfilerStatus(success=True, should_check_overall=True, should_reset=True)
+                return RunProfilerStatus(success=True, is_end_of_cycle=True)
             self._sample_and_aggregate()
-        return RunProfilerStatus(success=True, should_check_overall=refreshed_config, should_reset=False)
+        return RunProfilerStatus(success=True, is_end_of_cycle=False)
 
     @with_timer("sampleAndAggregate")
     def _sample_and_aggregate(self):
@@ -141,7 +138,6 @@ def pause(self, block=False):
 
 
 class RunProfilerStatus:
-    def __init__(self, success, should_check_overall, should_reset):
+    def __init__(self, success, is_end_of_cycle):
         self.success = success
-        self.should_check_overall = should_check_overall
-        self.should_reset = should_reset
+        self.is_end_of_cycle = is_end_of_cycle
diff --git a/test/unit/test_profiler_disabler.py b/test/unit/test_profiler_disabler.py
index ae711d5..4d3cacd 100644
--- a/test/unit/test_profiler_disabler.py
+++ b/test/unit/test_profiler_disabler.py
@@ -248,7 +248,7 @@ def test_when_average_duration_exceeds_limit_it_returns_true(self):
         # timer: (0.5*20/100) * 100= 10%
         assert self.process_duration_check.is_overall_cpu_usage_limit_reached(self.profile)
 
-    def test_when_average_duragtion_is_below_limit_it_returns_false(self):
+    def test_when_average_duration_is_below_limit_it_returns_false(self):
         # timer: (0.5*20/100) * 100= 10%
         set_agent_config(cpu_limit_percentage=11)
         assert not self.process_duration_check.is_overall_cpu_usage_limit_reached(self.profile)