From 48acad4df7c8bb978b7a7327a63fd4fafb0badc1 Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Sat, 21 Aug 2021 22:04:18 +0200 Subject: [PATCH 01/20] Flush mechanism --- codecarbon/emissions_tracker.py | 23 +++++++++++++++++ examples/mnist_callback.py | 45 +++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 examples/mnist_callback.py diff --git a/codecarbon/emissions_tracker.py b/codecarbon/emissions_tracker.py index cc26120ec..9dbf869fb 100644 --- a/codecarbon/emissions_tracker.py +++ b/codecarbon/emissions_tracker.py @@ -301,6 +301,29 @@ def start(self) -> None: self._last_measured_time = self._start_time = time.time() self._scheduler.start() + @suppress(Exception) + def flush(self) -> Optional[float]: + """ + Write emission to disk or call the API depending of the configuration + but keep running the experiment. + :return: CO2 emissions in kgs + """ + if self._start_time is None: + logger.error("Need to first start the tracker") + return None + + # Run to calculate the power used from last + # scheduled measurement to shutdown + self._measure_power() + + emissions_data = self._prepare_emissions_data() + for persistence in self.persistence_objs: + if isinstance(persistence, CodeCarbonAPIOutput): + emissions_data = self._prepare_emissions_data(delta=True) + persistence.out(emissions_data) + + return emissions_data.emissions + @suppress(Exception) def stop(self) -> Optional[float]: """ diff --git a/examples/mnist_callback.py b/examples/mnist_callback.py new file mode 100644 index 000000000..994075d5e --- /dev/null +++ b/examples/mnist_callback.py @@ -0,0 +1,45 @@ +import tensorflow as tf +from tensorflow.keras.callbacks import Callback + +from codecarbon import EmissionsTracker + +""" +This sample code show how to use CodeCarbon as a Keras Callback +to save emissions after each epoch. +""" + + +class CodeCarbonCallBack(Callback): + def __init__(self, codecarbon_tracker): + self.codecarbon_tracker = codecarbon_tracker + pass + + def on_epoch_end(self, epoch, logs=None): + self.codecarbon_tracker.flush() + + +mnist = tf.keras.datasets.mnist + +(x_train, y_train), (x_test, y_test) = mnist.load_data() +x_train, x_test = x_train / 255.0, x_test / 255.0 + + +model = tf.keras.models.Sequential( + [ + tf.keras.layers.Flatten(input_shape=(28, 28)), + tf.keras.layers.Dense(128, activation="relu"), + tf.keras.layers.Dropout(0.2), + tf.keras.layers.Dense(10), + ] +) + +loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) + +model.compile(optimizer="adam", loss=loss_fn, metrics=["accuracy"]) + +tracker = EmissionsTracker() +tracker.start() +codecarbon_cb = CodeCarbonCallBack(tracker) +model.fit(x_train, y_train, epochs=4, callbacks=[codecarbon_cb]) +emissions: float = tracker.stop() +print(f"Emissions: {emissions} kg") From eff616dcda64a5515b48560f03ad111072aec1fa Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Sat, 21 Aug 2021 22:21:36 +0200 Subject: [PATCH 02/20] Docs --- README.md | 9 +++++++++ codecarbon/emissions_tracker.py | 3 ++- examples/README.md | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a0989fac6..99857ea18 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,16 @@ with EmissionsTracker() as tracker: # GPU Intensive code goes here ``` +### Flush data when running +By default, Code Carbon only write the CVS output file when it stop. +But for long run you may want to get intermediate data. +It is possible to call the flush() methode to do so. + +Have a look to [./examples/mnist_callback.py](./examples/mnist_callback.py) for an example. + +Note that if you use the API it will also call it when you call flush(). +You could set *api_call_interval* to -1, so that it will not be called automatically and then force a call at the end of each epoch to get the emission of each epoch. ### Using comet.ml diff --git a/codecarbon/emissions_tracker.py b/codecarbon/emissions_tracker.py index 9dbf869fb..907b80d26 100644 --- a/codecarbon/emissions_tracker.py +++ b/codecarbon/emissions_tracker.py @@ -153,6 +153,7 @@ def __init__( :param measure_power_secs: Interval (in seconds) to measure hardware power usage, defaults to 15 :param api_call_interval: Occurrence to wait before calling API : + -1 : only call api on flush() and at the end. 1 : at every measure 2 : every 2 measure, etc... :param api_endpoint: Optional URL of Code Carbon API endpoint for sending @@ -475,7 +476,7 @@ def _measure_power(self) -> None: ) self._last_measured_time = time.time() self._measure_occurrence += 1 - if self._cc_api__out is not None: + if self._cc_api__out is not None and self._api_call_interval != -1: if self._measure_occurrence >= self._api_call_interval: emissions = self._prepare_emissions_data(delta=True) logger.info( diff --git a/examples/README.md b/examples/README.md index e1029edea..c82eba712 100644 --- a/examples/README.md +++ b/examples/README.md @@ -9,6 +9,7 @@ pip install -r requirements-examples.txt ## Examples * [mnist.py](mnist.py): Usage using explicit `CO2Tracker` objects. * [mnist_decorator.py](mnist_decorator.py): Using the `@track_co2` decorator. +* [mnist_callback.py](mnist_callback.py): Using Keras callbacks to save emission after each epoch. * [comet-mnist.py](comet-mnist.py): Using `CO2Tracker` with [`Comet`](https://www.comet.ml/site) for automatic experiment and emissions tracking. * [api_call_demo.py](api_call_demo.py) : Simplest demo to send computer emissions to CodeCarbon API. * [api_call_debug.py](api_call_debug.py) : Script to send computer emissions to CodeCarbon API. Made for debugging : debug log and send data every 20 seconds. From 354c72497576ad7d5108bb2c9a9cb1bc0cd19dd9 Mon Sep 17 00:00:00 2001 From: Benoit Courty <6603048+benoit-cty@users.noreply.github.com> Date: Sun, 22 Aug 2021 08:11:06 +0200 Subject: [PATCH 03/20] language improvement Co-authored-by: Stas Bekman --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 99857ea18..67fdda91b 100644 --- a/README.md +++ b/README.md @@ -151,9 +151,9 @@ with EmissionsTracker() as tracker: ### Flush data when running -By default, Code Carbon only write the CVS output file when it stop. -But for long run you may want to get intermediate data. -It is possible to call the flush() methode to do so. +By default, Code Carbon only writes the CVS output file when it stops. +But for a long run you may want to save intermediate data. +It is possible to call the flush() method to do so. Have a look to [./examples/mnist_callback.py](./examples/mnist_callback.py) for an example. From 715f7cfb66c19070d40e0610e62a5c7793661140 Mon Sep 17 00:00:00 2001 From: Benoit Courty <6603048+benoit-cty@users.noreply.github.com> Date: Sun, 22 Aug 2021 08:11:27 +0200 Subject: [PATCH 04/20] language improvement Co-authored-by: Stas Bekman --- examples/mnist_callback.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/mnist_callback.py b/examples/mnist_callback.py index 994075d5e..adb5242d2 100644 --- a/examples/mnist_callback.py +++ b/examples/mnist_callback.py @@ -4,7 +4,7 @@ from codecarbon import EmissionsTracker """ -This sample code show how to use CodeCarbon as a Keras Callback +This sample code shows how to use CodeCarbon as a Keras Callback to save emissions after each epoch. """ From 4d12b87e179a1f2ae7aa0c578a091160ed4835ad Mon Sep 17 00:00:00 2001 From: Benoit Courty <6603048+benoit-cty@users.noreply.github.com> Date: Sun, 22 Aug 2021 08:11:40 +0200 Subject: [PATCH 05/20] language improvement Co-authored-by: Stas Bekman --- examples/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/README.md b/examples/README.md index c82eba712..6f71b5530 100644 --- a/examples/README.md +++ b/examples/README.md @@ -9,7 +9,7 @@ pip install -r requirements-examples.txt ## Examples * [mnist.py](mnist.py): Usage using explicit `CO2Tracker` objects. * [mnist_decorator.py](mnist_decorator.py): Using the `@track_co2` decorator. -* [mnist_callback.py](mnist_callback.py): Using Keras callbacks to save emission after each epoch. +* [mnist_callback.py](mnist_callback.py): Using Keras callbacks to save emissions after each epoch. * [comet-mnist.py](comet-mnist.py): Using `CO2Tracker` with [`Comet`](https://www.comet.ml/site) for automatic experiment and emissions tracking. * [api_call_demo.py](api_call_demo.py) : Simplest demo to send computer emissions to CodeCarbon API. * [api_call_debug.py](api_call_debug.py) : Script to send computer emissions to CodeCarbon API. Made for debugging : debug log and send data every 20 seconds. From a681a0a1c5c7799fc56947d3bcf5ab59fe01dcc7 Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Sun, 22 Aug 2021 11:10:39 +0200 Subject: [PATCH 06/20] Return function result when using decorator --- codecarbon/emissions_tracker.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/codecarbon/emissions_tracker.py b/codecarbon/emissions_tracker.py index 907b80d26..483a08c6b 100644 --- a/codecarbon/emissions_tracker.py +++ b/codecarbon/emissions_tracker.py @@ -674,6 +674,7 @@ def track_emissions( def _decorate(fn: Callable): @wraps(fn) def wrapped_fn(*args, **kwargs): + fn_result = None if offline and offline is not _sentinel: if (country_iso_code is None or country_iso_code is _sentinel) and ( cloud_provider is None or cloud_provider is _sentinel @@ -694,7 +695,7 @@ def wrapped_fn(*args, **kwargs): co2_signal_api_token=co2_signal_api_token, ) tracker.start() - fn(*args, **kwargs) + fn_result = fn(*args, **kwargs) tracker.stop() else: tracker = EmissionsTracker( @@ -715,7 +716,7 @@ def wrapped_fn(*args, **kwargs): ) tracker.start() try: - fn(*args, **kwargs) + fn_result = fn(*args, **kwargs) finally: logger.info( "\nGraceful stopping: collecting and writing information.\n" @@ -723,6 +724,7 @@ def wrapped_fn(*args, **kwargs): ) tracker.stop() logger.info("Done!\n") + return fn_result return wrapped_fn From 5363f0d1f20287197f7be30f00932999033646f9 Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Sun, 22 Aug 2021 11:14:08 +0200 Subject: [PATCH 07/20] API calls logs improvement --- codecarbon/core/api_client.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/codecarbon/core/api_client.py b/codecarbon/core/api_client.py index 54b0af76a..67235462d 100644 --- a/codecarbon/core/api_client.py +++ b/codecarbon/core/api_client.py @@ -52,14 +52,19 @@ def add_emission(self, carbon_emission: dict): self._previous_call = time.time() if self.run_id is None: # TODO : raise an Exception ? - logger.error( - "add_emissionadd_emission need a run_id : the initial call may " + logger.debug( + "ApiClient.add_emission need a run_id : the initial call may " + "have failed. Retrying..." ) self._create_run(self.experiment_id) + if self.run_id is None: + logger.error( + "ApiClient.add_emission still no run_id, arborting for this time !" + ) + return False if carbon_emission["duration"] < 1: logger.warning( - "Warning : emission not send because of a duration smaller than 1." + "ApiClient : emissions not send because of a duration smaller than 1." ) return False emission = EmissionCreate( @@ -80,10 +85,10 @@ def add_emission(self, carbon_emission: dict): payload = dataclasses.asdict(emission) url = self.url + "/emission" r = requests.post(url=url, json=payload, timeout=2) - logger.debug(f"Successful upload emission {payload} to {url}") if r.status_code != 201: self._log_error(url, payload, r) return False + logger.debug(f"ApiClient - Successful upload emission {payload} to {url}") except Exception as e: logger.error(e, exc_info=True) return False @@ -96,7 +101,7 @@ def _create_run(self, experiment_id): """ if self.experiment_id is None: # TODO : raise an Exception ? - logger.error("FATAL The API _create_run need an experiment_id !") + logger.error("ApiClient FATAL The API _create_run need an experiment_id !") return None try: run = RunCreate( @@ -110,7 +115,7 @@ def _create_run(self, experiment_id): return None self.run_id = r.json()["id"] logger.info( - "Successfully registered your run on the API.\n\n" + "ApiClient Successfully registered your run on the API.\n\n" + f"Run ID: {self.run_id}\n" + f"Experiment ID: {self.experiment_id}\n" ) @@ -134,10 +139,10 @@ def add_experiment(self, experiment: ExperimentCreate): def _log_error(self, url, payload, response): logger.error( - f" Error when calling the API on {url} with : {json.dumps(payload)}" + f"ApiClient Error when calling the API on {url} with : {json.dumps(payload)}" ) logger.error( - f" API return http code {response.status_code} and answer : {response.text}" + f"ApiClient API return http code {response.status_code} and answer : {response.text}" ) def close_experiment(self): From f3ae6874fc788cea2873233a1cb18b02fe6a49f3 Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Sun, 22 Aug 2021 11:14:41 +0200 Subject: [PATCH 08/20] Add test --- tests/test_emissions_tracker_constant.py | 27 ++++++-- tests/test_emissions_tracker_flush.py | 82 ++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 5 deletions(-) create mode 100644 tests/test_emissions_tracker_flush.py diff --git a/tests/test_emissions_tracker_constant.py b/tests/test_emissions_tracker_constant.py index f657132b6..4a46b85ea 100644 --- a/tests/test_emissions_tracker_constant.py +++ b/tests/test_emissions_tracker_constant.py @@ -1,4 +1,5 @@ import os +import tempfile import time import unittest @@ -17,15 +18,23 @@ def heavy_computation(run_time_secs: int = 3): class TestCarbonTrackerConstant(unittest.TestCase): def setUp(self) -> None: - self.project_name = "project_foo" - self.emissions_file_path = os.path.join(os.getcwd(), "emissions.csv") + self.project_name = "project_TestCarbonTrackerConstant" + self.emissions_file = "emissions-test-TestCarbonTrackerConstant.csv" + self.emissions_path = tempfile.gettempdir() + self.emissions_file_path = os.path.join( + self.emissions_path, self.emissions_file + ) + if os.path.isfile(self.emissions_file_path): + os.remove(self.emissions_file_path) def tearDown(self) -> None: if os.path.isfile(self.emissions_file_path): os.remove(self.emissions_file_path) def test_carbon_tracker_online_constant(self): - tracker = EmissionsTracker() + tracker = EmissionsTracker( + output_dir=self.emissions_path, output_file=self.emissions_file + ) tracker.start() heavy_computation(run_time_secs=1) emissions = tracker.stop() @@ -35,7 +44,11 @@ def test_carbon_tracker_online_constant(self): self.verify_output_file(self.emissions_file_path) def test_carbon_tracker_offline_constant(self): - tracker = OfflineEmissionsTracker(country_iso_code="USA") + tracker = OfflineEmissionsTracker( + country_iso_code="USA", + output_dir=self.emissions_path, + output_file=self.emissions_file, + ) tracker.start() heavy_computation(run_time_secs=1) emissions = tracker.stop() @@ -45,7 +58,11 @@ def test_carbon_tracker_offline_constant(self): self.verify_output_file(self.emissions_file_path) def test_decorator_constant(self): - @track_emissions(project_name=self.project_name) + @track_emissions( + project_name=self.project_name, + output_dir=self.emissions_path, + output_file=self.emissions_file, + ) def dummy_train_model(): return 42 diff --git a/tests/test_emissions_tracker_flush.py b/tests/test_emissions_tracker_flush.py new file mode 100644 index 000000000..227d8683e --- /dev/null +++ b/tests/test_emissions_tracker_flush.py @@ -0,0 +1,82 @@ +import os +import tempfile +import time +import unittest + +from codecarbon.emissions_tracker import ( + EmissionsTracker, + OfflineEmissionsTracker, + track_emissions, +) + + +def heavy_computation(run_time_secs: int = 3): + end_time: float = time.time() + run_time_secs # Run for `run_time_secs` seconds + while time.time() < end_time: + pass + + +class TestCarbonTrackerFlush(unittest.TestCase): + def setUp(self) -> None: + self.project_name = "project_TestCarbonTrackerFlush" + self.emissions_file = "emissions-test-TestCarbonTrackerFlush.csv" + self.emissions_path = tempfile.gettempdir() + self.emissions_file_path = os.path.join( + self.emissions_path, self.emissions_file + ) + if os.path.isfile(self.emissions_file_path): + os.remove(self.emissions_file_path) + + def tearDown(self) -> None: + if os.path.isfile(self.emissions_file_path): + os.remove(self.emissions_file_path) + + def test_carbon_tracker_online_flush(self): + tracker = EmissionsTracker( + output_dir=self.emissions_path, output_file=self.emissions_file + ) + tracker.start() + heavy_computation(run_time_secs=1) + tracker.flush() + heavy_computation(run_time_secs=1) + emissions = tracker.stop() + assert isinstance(emissions, float) + self.assertNotEqual(emissions, 0.0) + self.assertAlmostEqual(emissions, 6.262572537957655e-05, places=2) + self.verify_output_file(self.emissions_file_path) + + def test_carbon_tracker_offline_flush(self): + tracker = OfflineEmissionsTracker( + country_iso_code="USA", + output_dir=self.emissions_path, + output_file=self.emissions_file, + ) + tracker.start() + heavy_computation(run_time_secs=1) + tracker.flush() + heavy_computation(run_time_secs=1) + emissions = tracker.stop() + assert isinstance(emissions, float) + self.assertNotEqual(emissions, 0.0) + self.assertAlmostEqual(emissions, 6.262572537957655e-05, places=2) + self.verify_output_file(self.emissions_file_path) + + def test_decorator_flush(self): + @track_emissions( + project_name=self.project_name, + output_dir=self.emissions_path, + output_file=self.emissions_file, + ) + def dummy_train_model(): + # I don't know how to call flush() in decorator mode + return 42 + + res = dummy_train_model() + self.assertEqual(res, 42) + + self.verify_output_file(self.emissions_file_path, 2) + + def verify_output_file(self, file_path: str, expected_lines=3) -> None: + with open(file_path, "r") as f: + lines = [line.rstrip() for line in f] + assert len(lines) == expected_lines From ddb92c48a63f2217809d2e4ee1c13e92528052ac Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Sun, 22 Aug 2021 15:22:12 +0200 Subject: [PATCH 09/20] Add run_id to CSV --- codecarbon/emissions_tracker.py | 5 +++++ codecarbon/output.py | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/codecarbon/emissions_tracker.py b/codecarbon/emissions_tracker.py index 483a08c6b..be16bdfa9 100644 --- a/codecarbon/emissions_tracker.py +++ b/codecarbon/emissions_tracker.py @@ -5,6 +5,7 @@ import dataclasses import os import time +import uuid from abc import ABC, abstractmethod from datetime import datetime from functools import wraps @@ -286,7 +287,10 @@ def __init__( experiment_id=experiment_id, api_key=api_key, ) + self.run_id = self._cc_api__out.run_id self.persistence_objs.append(self._cc_api__out) + else: + self.run_id = uuid.uuid4() @suppress(Exception) def start(self) -> None: @@ -381,6 +385,7 @@ def _prepare_emissions_data(self, delta=False) -> EmissionsData: total_emissions = EmissionsData( timestamp=datetime.now().strftime("%Y-%m-%dT%H:%M:%S"), project_name=self._project_name, + run_id=self.run_id, duration=duration.seconds, emissions=emissions, emissions_rate=emissions * 1000 / duration.seconds, diff --git a/codecarbon/output.py b/codecarbon/output.py index 0da9e0d0b..be89f966b 100644 --- a/codecarbon/output.py +++ b/codecarbon/output.py @@ -25,6 +25,7 @@ class EmissionsData: timestamp: str project_name: str + run_id: str duration: float emissions: float emissions_rate: float @@ -132,6 +133,8 @@ class CodeCarbonAPIOutput(BaseOutput): Send emissions data to HTTP endpoint """ + run_id = None + def __init__(self, endpoint_url: str, experiment_id: str, api_key: str): self.endpoint_url: str = endpoint_url self.api = ApiClient( @@ -139,6 +142,7 @@ def __init__(self, endpoint_url: str, experiment_id: str, api_key: str): endpoint_url=endpoint_url, api_key=api_key, ) + self.run_id = self.api.run_id def out(self, data: EmissionsData): try: From 664e0ee21037537ba29045a85617007e3bd485ca Mon Sep 17 00:00:00 2001 From: Victor Schmidt <9283470+vict0rsch@users.noreply.github.com> Date: Mon, 23 Aug 2021 10:03:50 +0200 Subject: [PATCH 10/20] Apply suggestions from code review Co-authored-by: Stas Bekman --- codecarbon/core/api_client.py | 6 +++--- codecarbon/emissions_tracker.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/codecarbon/core/api_client.py b/codecarbon/core/api_client.py index 67235462d..d95e62ee5 100644 --- a/codecarbon/core/api_client.py +++ b/codecarbon/core/api_client.py @@ -59,12 +59,12 @@ def add_emission(self, carbon_emission: dict): self._create_run(self.experiment_id) if self.run_id is None: logger.error( - "ApiClient.add_emission still no run_id, arborting for this time !" + "ApiClient.add_emission still no run_id, aborting for this time !" ) return False if carbon_emission["duration"] < 1: logger.warning( - "ApiClient : emissions not send because of a duration smaller than 1." + "ApiClient : emissions not sent because of a duration smaller than 1." ) return False emission = EmissionCreate( @@ -101,7 +101,7 @@ def _create_run(self, experiment_id): """ if self.experiment_id is None: # TODO : raise an Exception ? - logger.error("ApiClient FATAL The API _create_run need an experiment_id !") + logger.error("ApiClient FATAL The API _create_run needs an experiment_id !") return None try: run = RunCreate( diff --git a/codecarbon/emissions_tracker.py b/codecarbon/emissions_tracker.py index be16bdfa9..9df033c15 100644 --- a/codecarbon/emissions_tracker.py +++ b/codecarbon/emissions_tracker.py @@ -309,7 +309,7 @@ def start(self) -> None: @suppress(Exception) def flush(self) -> Optional[float]: """ - Write emission to disk or call the API depending of the configuration + Write emission to disk or call the API depending on the configuration but keep running the experiment. :return: CO2 emissions in kgs """ From 7f7c0068a8e868179c1c8b1d9ad37a5657ce66ab Mon Sep 17 00:00:00 2001 From: vict0rsch Date: Mon, 23 Aug 2021 10:08:24 +0200 Subject: [PATCH 11/20] multiple backups --- codecarbon/output.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/codecarbon/output.py b/codecarbon/output.py index be89f966b..2a961033b 100644 --- a/codecarbon/output.py +++ b/codecarbon/output.py @@ -94,7 +94,12 @@ def out(self, data: EmissionsData): file_exists: bool = os.path.isfile(self.save_file_path) if file_exists and not self.has_valid_headers(data): logger.info("Backing up old emission file") - os.rename(self.save_file_path, self.save_file_path + ".bak") + new_name = self.save_file_path + ".bak" + idx = 1 + while os.path.isfile(new_name): + new_name = self.save_file_path + f"_{idx}.bak" + idx += 1 + os.rename(self.save_file_path, new_name) file_exists = False with open(self.save_file_path, "a+") as f: From b9af0aeb76465fab102f36bf4052fbb5e8e2eb85 Mon Sep 17 00:00:00 2001 From: vict0rsch Date: Mon, 23 Aug 2021 10:16:06 +0200 Subject: [PATCH 12/20] use pandas & update --- codecarbon/output.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/codecarbon/output.py b/codecarbon/output.py index 2a961033b..752179610 100644 --- a/codecarbon/output.py +++ b/codecarbon/output.py @@ -11,6 +11,7 @@ from dataclasses import dataclass import requests +import pandas as pd # from core.schema import EmissionCreate, Emission from codecarbon.core.api_client import ApiClient @@ -102,11 +103,27 @@ def out(self, data: EmissionsData): os.rename(self.save_file_path, new_name) file_exists = False - with open(self.save_file_path, "a+") as f: - writer = csv.DictWriter(f, fieldnames=data.values.keys()) - if not file_exists: - writer.writeheader() - writer.writerow(data.values) + if not file_exists: + df = pd.DataFrame(columns=data.values.keys()) + df = df.append(dict(data.values), ignore_index=True) + else: + df = pd.read_csv(self.save_file_path) + df_run = df.loc[df.run_id == data.run_id] + if len(df_run) < 1: + df = df.append(dict(data.values), ignore_index=True) + elif len(df_run) > 1: + logger.warning( + f"CSV contains more than 1 ({len(len(df_run))})" + + f" rows with current run ID ({data.run_id})." + + "Appending instead of updating." + ) + df = df.append(dict(data.values), ignore_index=True) + else: + df.at[ + df.run_id == data.run_id, data.values.keys() + ] = data.values.values() + + df.to_csv(self.save_file_path, index=False) class HTTPOutput(BaseOutput): From 9a877b38801f501e0ac6f3c52387a7d02d78c811 Mon Sep 17 00:00:00 2001 From: vict0rsch Date: Mon, 23 Aug 2021 11:31:22 +0200 Subject: [PATCH 13/20] add on_csv_write argument --- codecarbon/emissions_tracker.py | 12 +++++++++++- codecarbon/output.py | 11 ++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/codecarbon/emissions_tracker.py b/codecarbon/emissions_tracker.py index 9df033c15..011731623 100644 --- a/codecarbon/emissions_tracker.py +++ b/codecarbon/emissions_tracker.py @@ -147,6 +147,7 @@ def __init__( co2_signal_api_token: Optional[str] = _sentinel, tracking_mode: Optional[str] = _sentinel, log_level: Optional[Union[int, str]] = _sentinel, + on_csv_write: Optional[str] = _sentinel, ): """ :param project_name: Project name for current experiment run, default name @@ -180,6 +181,10 @@ def __init__( :param log_level: Global codecarbon log level. Accepts one of: {"debug", "info", "warning", "error", "critical"}. Defaults to "info". + :param on_csv_write: "append" or "update". Whether to always append a new line + to the csv when writing or to update the existing `run_id` + row (useful when calling`tracker.flush()` manually). + Accepts one of "append" or "update". """ self._external_conf = get_hierarchical_config() @@ -196,6 +201,7 @@ def __init__( self._set_from_conf(save_to_api, "save_to_api", False, bool) self._set_from_conf(save_to_file, "save_to_file", True, bool) self._set_from_conf(tracking_mode, "tracking_mode", "machine") + self._set_from_conf(on_csv_write, "on_csv_write", "append") assert self._tracking_mode in ["machine", "process"] set_log_level(self._log_level) @@ -272,7 +278,11 @@ def __init__( if self._save_to_file: self.persistence_objs.append( - FileOutput(os.path.join(self._output_dir, self._output_file)) + FileOutput( + os.path.join( + self._output_dir, self._output_file, self._on_csv_write + ) + ) ) if self._emissions_endpoint: diff --git a/codecarbon/output.py b/codecarbon/output.py index 752179610..bf073f3bd 100644 --- a/codecarbon/output.py +++ b/codecarbon/output.py @@ -81,7 +81,13 @@ class FileOutput(BaseOutput): Saves experiment artifacts to a file """ - def __init__(self, save_file_path: str): + def __init__(self, save_file_path: str, on_flush: str = "append"): + if on_flush not in {"append", "update"}: + raise ValueError( + f"Unknown `on_flush` value: {on_flush}" + + " (should be one of 'append' or 'update'" + ) + self.on_flush: str = on_flush self.save_file_path: str = save_file_path def has_valid_headers(self, data: EmissionsData): @@ -106,6 +112,9 @@ def out(self, data: EmissionsData): if not file_exists: df = pd.DataFrame(columns=data.values.keys()) df = df.append(dict(data.values), ignore_index=True) + elif self.on_flush == "append": + df = pd.read_csv(self.save_file_path) + df = df.append(dict(data.values), ignore_index=True) else: df = pd.read_csv(self.save_file_path) df_run = df.loc[df.run_id == data.run_id] From 4ed27687ca99f2c13bb05bb67f6a70d95e558682 Mon Sep 17 00:00:00 2001 From: vict0rsch Date: Mon, 23 Aug 2021 11:42:35 +0200 Subject: [PATCH 14/20] typo --- codecarbon/emissions_tracker.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/codecarbon/emissions_tracker.py b/codecarbon/emissions_tracker.py index 011731623..710abe4ac 100644 --- a/codecarbon/emissions_tracker.py +++ b/codecarbon/emissions_tracker.py @@ -279,9 +279,8 @@ def __init__( if self._save_to_file: self.persistence_objs.append( FileOutput( - os.path.join( - self._output_dir, self._output_file, self._on_csv_write - ) + os.path.join(self._output_dir, self._output_file), + self._on_csv_write, ) ) From 37b56806e44735c3b3671223572635dc3ea6728d Mon Sep 17 00:00:00 2001 From: vict0rsch Date: Mon, 23 Aug 2021 11:55:06 +0200 Subject: [PATCH 15/20] store final emissions_data --- codecarbon/emissions_tracker.py | 1 + 1 file changed, 1 insertion(+) diff --git a/codecarbon/emissions_tracker.py b/codecarbon/emissions_tracker.py index 710abe4ac..15efdf95e 100644 --- a/codecarbon/emissions_tracker.py +++ b/codecarbon/emissions_tracker.py @@ -362,6 +362,7 @@ def stop(self) -> Optional[float]: persistence.out(emissions_data) + self.final_emissions_data = emissions_data self.final_emissions = emissions_data.emissions return emissions_data.emissions From 80c0e9e0a0c5d7a5189bd976fee27de24bad237d Mon Sep 17 00:00:00 2001 From: vict0rsch Date: Mon, 23 Aug 2021 11:55:12 +0200 Subject: [PATCH 16/20] update ref valid_headers --- tests/test_data/emissions_valid_headers.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_data/emissions_valid_headers.csv b/tests/test_data/emissions_valid_headers.csv index 50dcc40f2..01a198319 100644 --- a/tests/test_data/emissions_valid_headers.csv +++ b/tests/test_data/emissions_valid_headers.csv @@ -1,2 +1,2 @@ -timestamp,project_name,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,on_cloud,cloud_provider,cloud_region -2021-07-13T17:36:33,codecarbon,175.7496521,0.001327206,0.000663603,0.27,0,0.154618823,0.001079437,0,0.000618153,0.00169759,Morocco,MAR,casablanca-settat,N,, +timestamp,project_name,run_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,on_cloud,cloud_provider,cloud_region +2021-07-13T17:36:33,codecarbon,3b3639f6-776c-4216-97de-bc52fc895bca,175.7496521,0.001327206,0.000663603,0.27,0,0.154618823,0.001079437,0,0.000618153,0.00169759,Morocco,MAR,casablanca-settat,N,, From f6633439845d4d48ff88cf3a59e80a2148a93e7d Mon Sep 17 00:00:00 2001 From: vict0rsch Date: Tue, 24 Aug 2021 14:50:37 +0200 Subject: [PATCH 17/20] on_flush to on_csv_write --- codecarbon/output.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/codecarbon/output.py b/codecarbon/output.py index bf073f3bd..67830c54d 100644 --- a/codecarbon/output.py +++ b/codecarbon/output.py @@ -81,13 +81,13 @@ class FileOutput(BaseOutput): Saves experiment artifacts to a file """ - def __init__(self, save_file_path: str, on_flush: str = "append"): - if on_flush not in {"append", "update"}: + def __init__(self, save_file_path: str, on_csv_write: str = "append"): + if on_csv_write not in {"append", "update"}: raise ValueError( - f"Unknown `on_flush` value: {on_flush}" + f"Unknown `on_csv_write` value: {on_csv_write}" + " (should be one of 'append' or 'update'" ) - self.on_flush: str = on_flush + self.on_csv_write: str = on_csv_write self.save_file_path: str = save_file_path def has_valid_headers(self, data: EmissionsData): @@ -112,7 +112,7 @@ def out(self, data: EmissionsData): if not file_exists: df = pd.DataFrame(columns=data.values.keys()) df = df.append(dict(data.values), ignore_index=True) - elif self.on_flush == "append": + elif self.on_csv_write == "append": df = pd.read_csv(self.save_file_path) df = df.append(dict(data.values), ignore_index=True) else: From 57f15459542ad9bbad6a0bc91e09d7b40950def5 Mon Sep 17 00:00:00 2001 From: vict0rsch Date: Tue, 24 Aug 2021 15:11:54 +0200 Subject: [PATCH 18/20] add `backup` util --- codecarbon/core/util.py | 46 +++++++++++++++++++++++++++++++++++++++++ codecarbon/output.py | 9 +++----- 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/codecarbon/core/util.py b/codecarbon/core/util.py index 5ce0c79ea..bc0f477bc 100644 --- a/codecarbon/core/util.py +++ b/codecarbon/core/util.py @@ -1,5 +1,9 @@ import logging from contextlib import contextmanager +from os.path import expandvars +from pathlib import Path +import pathlib +from typing import Optional, Union from codecarbon.external.logger import logger @@ -36,3 +40,45 @@ def set_log_level(level: str): logger.setLevel(getattr(logging, level)) return logger.error(f"Unknown log level: {level}") + + +def resolve_path(path: Union[str, Path]) -> None: + + """ + Fully resolve a path: + resolve env vars ($HOME etc.) -> expand user (~) -> make absolute + + Args: + path (Union[str, Path]): Path to a file or repository to resolve as + string or pathlib.Path + + Returns: + pathlib.Path: resolved absolute path + """ + return Path(expandvars(str(path))).expanduser().resolve() + + +def backup(file_path: Union[str, Path], ext: Optional[str] = ".bak") -> None: + """ + Resolves the path to a path then backs it up, adding the extension provided. + + Args: + file_path (Union[str, Path]): Path to a file to backup. + ext (Optional[str], optional): extension to append to the filename when + backing it up. Defaults to ".bak". + """ + file_path = resolve_path(file_path) + if not file_path.exists(): + return + assert file_path.is_file() + idx = 0 + parent = file_path.parent + file_name = f"{file_path.name}{ext}" + backup = parent / file_name + + while backup.exists(): + file_name = f"{file_path.name}_{idx}{ext}" + backup = parent / file_name + idx += 1 + + file_path.rename(backup) diff --git a/codecarbon/output.py b/codecarbon/output.py index 67830c54d..3fbf59510 100644 --- a/codecarbon/output.py +++ b/codecarbon/output.py @@ -9,12 +9,14 @@ from abc import ABC, abstractmethod from collections import OrderedDict from dataclasses import dataclass +from pathlib import Path import requests import pandas as pd # from core.schema import EmissionCreate, Emission from codecarbon.core.api_client import ApiClient +from codecarbon.core.util import backup from codecarbon.external.logger import logger @@ -101,12 +103,7 @@ def out(self, data: EmissionsData): file_exists: bool = os.path.isfile(self.save_file_path) if file_exists and not self.has_valid_headers(data): logger.info("Backing up old emission file") - new_name = self.save_file_path + ".bak" - idx = 1 - while os.path.isfile(new_name): - new_name = self.save_file_path + f"_{idx}.bak" - idx += 1 - os.rename(self.save_file_path, new_name) + backup(self.save_file_path) file_exists = False if not file_exists: From 3065ddfd816ce02c43f8b6390f29c3e2baad7a10 Mon Sep 17 00:00:00 2001 From: vict0rsch Date: Tue, 24 Aug 2021 15:35:38 +0200 Subject: [PATCH 19/20] add uuid4 run_id --- tests/test_api_call.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_api_call.py b/tests/test_api_call.py index 41d3ab9db..7374d2dd8 100644 --- a/tests/test_api_call.py +++ b/tests/test_api_call.py @@ -1,5 +1,7 @@ import dataclasses +from uuid import uuid4 + import requests_mock from codecarbon.core.api_client import ApiClient @@ -25,6 +27,7 @@ def test_call_api(): carbon_emission = EmissionsData( timestamp="222", project_name="", + run_id=uuid4(), duration=1.5, emissions=2.0, emissions_rate=2.0, From 5fe1e137cc0e0cd08af22227575e222f73dde310 Mon Sep 17 00:00:00 2001 From: vict0rsch Date: Wed, 25 Aug 2021 09:47:48 +0200 Subject: [PATCH 20/20] pre-commit --- codecarbon/core/util.py | 1 - codecarbon/output.py | 3 +-- tests/test_api_call.py | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/codecarbon/core/util.py b/codecarbon/core/util.py index bc0f477bc..368d59e47 100644 --- a/codecarbon/core/util.py +++ b/codecarbon/core/util.py @@ -2,7 +2,6 @@ from contextlib import contextmanager from os.path import expandvars from pathlib import Path -import pathlib from typing import Optional, Union from codecarbon.external.logger import logger diff --git a/codecarbon/output.py b/codecarbon/output.py index 3fbf59510..6b3bce78b 100644 --- a/codecarbon/output.py +++ b/codecarbon/output.py @@ -9,10 +9,9 @@ from abc import ABC, abstractmethod from collections import OrderedDict from dataclasses import dataclass -from pathlib import Path -import requests import pandas as pd +import requests # from core.schema import EmissionCreate, Emission from codecarbon.core.api_client import ApiClient diff --git a/tests/test_api_call.py b/tests/test_api_call.py index 7374d2dd8..d945773e3 100644 --- a/tests/test_api_call.py +++ b/tests/test_api_call.py @@ -1,5 +1,4 @@ import dataclasses - from uuid import uuid4 import requests_mock