From 7da825e353c42342c2351a7203476bc8d6703b83 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Sun, 15 Jan 2023 09:37:41 +0000 Subject: [PATCH 01/43] Support saving of pickleable Python objects --- simvue/offline.py | 6 +++++ simvue/remote.py | 43 ++++++++++++++++++++++++++------ simvue/run.py | 62 +++++++++++++++++++++++++++++++++-------------- 3 files changed, 85 insertions(+), 26 deletions(-) diff --git a/simvue/offline.py b/simvue/offline.py index 8494d293..e2e90128 100644 --- a/simvue/offline.py +++ b/simvue/offline.py @@ -2,6 +2,7 @@ import logging import os import time +import uuid from .utilities import get_offline_directory, create_file @@ -90,6 +91,11 @@ def save_file(self, data): """ Save file """ + if 'pickled' in data: + temp_file = f"{self._directory}/temp-{str(uuid.uuid4())}.pickle" + with open(temp_file, 'wb') as fh: + fh.write(data['pickled']) + data['pickledFile'] = temp_file unique_id = time.time() filename = f"{self._directory}/file-{unique_id}.json" self._write_json(filename, data) diff --git a/simvue/remote.py b/simvue/remote.py index 31f3b8d2..fb1cf4ab 100644 --- a/simvue/remote.py +++ b/simvue/remote.py @@ -10,6 +10,17 @@ UPLOAD_TIMEOUT = 30 DEFAULT_API_TIMEOUT = 10 +def prepare_for_api(data_in): + """ + Remove references to pickling + """ + data = data_in.copy() + if 'pickled' in data: + del data['pickled'] + if 'pickledFile' in data: + del data['pickledFile'] + return data + class Remote(object): """ Class which interacts with Simvue REST API @@ -90,8 +101,9 @@ def save_file(self, data): Save file """ # Get presigned URL + print('god=', prepare_for_api(data)) try: - response = post(f"{self._url}/api/data", self._headers, data) + response = post(f"{self._url}/api/data", self._headers, prepare_for_api(data)) except Exception as err: self._error(f"Got exception when preparing to upload file {data['name']} to object storage: {str(err)}") return False @@ -105,15 +117,30 @@ def save_file(self, data): if 'url' in response.json(): url = response.json()['url'] - try: - with open(data['originalPath'], 'rb') as fh: - response = put(url, {}, fh, is_json=False, timeout=UPLOAD_TIMEOUT) + if 'pickled' in data and 'pickledFile' not in data: + try: + response = put(url, {}, data['pickled'], is_json=False, timeout=UPLOAD_TIMEOUT) if response.status_code != 200: - self._error(f"Got status code {response.status_code} when uploading file {data['name']} to object storage") + self._error(f"Got status code {response.status_code} when uploading object {data['name']} to object storage") return None - except Exception as err: - self._error(f"Got exception when uploading file {data['name']} to object storage: {str(err)}") - return None + except Exception as err: + self._error(f"Got exception when uploading object {data['name']} to object storage: {str(err)}") + return None + else: + if 'pickledFile' in data: + use_filename = data['pickledFile'] + else: + use_filename = data['originalPath'] + + try: + with open(use_filename, 'rb') as fh: + response = put(url, {}, fh, is_json=False, timeout=UPLOAD_TIMEOUT) + if response.status_code != 200: + self._error(f"Got status code {response.status_code} when uploading file {data['name']} to object storage") + return None + except Exception as err: + self._error(f"Got exception when uploading file {data['name']} to object storage: {str(err)}") + return None return True diff --git a/simvue/run.py b/simvue/run.py index 51af4587..4258186b 100644 --- a/simvue/run.py +++ b/simvue/run.py @@ -1,11 +1,13 @@ import configparser import datetime +import dill import hashlib import logging import mimetypes import os import re import multiprocessing +import pickle import socket import subprocess import sys @@ -96,18 +98,22 @@ def get_system(): return system -def calculate_sha256(filename): +def calculate_sha256(filename, is_file): """ Calculate sha256 checksum of the specified file """ sha256_hash = hashlib.sha256() - try: - with open(filename, "rb") as fd: - for byte_block in iter(lambda: fd.read(CHECKSUM_BLOCK_SIZE), b""): - sha256_hash.update(byte_block) - return sha256_hash.hexdigest() - except: - pass + if is_file: + try: + with open(filename, "rb") as fd: + for byte_block in iter(lambda: fd.read(CHECKSUM_BLOCK_SIZE), b""): + sha256_hash.update(byte_block) + return sha256_hash.hexdigest() + except: + pass + else: + sha256_hash.update(bytes(filename)) + return sha256_hash.hexdigest() return None @@ -474,9 +480,9 @@ def log_metrics(self, metrics, step=None, time=None, timestamp=None): return True - def save(self, filename, category, filetype=None, preserve_path=False): + def save(self, filename, category, filetype=None, preserve_path=False, name=None): """ - Upload file + Upload file or object """ if self._mode == 'disabled': return True @@ -489,9 +495,13 @@ def save(self, filename, category, filetype=None, preserve_path=False): self._error('Run is not active') return False - if not os.path.isfile(filename): - self._error(f"File {filename} does not exist") - return False + is_file = False + if isinstance(filename, str): + if not os.path.isfile(filename): + self._error(f"File {filename} does not exist") + return False + else: + is_file = True if filetype: mimetypes_valid = [] @@ -508,16 +518,25 @@ def save(self, filename, category, filetype=None, preserve_path=False): data['name'] = filename if data['name'].startswith('./'): data['name'] = data['name'][2:] - else: + elif is_file: data['name'] = os.path.basename(filename) + + if name: + data['name'] = name + data['run'] = self._name data['category'] = category - data['checksum'] = calculate_sha256(filename) - data['size'] = os.path.getsize(filename) - data['originalPath'] = os.path.abspath(os.path.expanduser(os.path.expandvars(filename))) + + data['checksum'] = calculate_sha256(filename, is_file) + if is_file: + data['size'] = os.path.getsize(filename) + data['originalPath'] = os.path.abspath(os.path.expanduser(os.path.expandvars(filename))) + else: + data['size'] = sys.getsizeof(filename) + data['originalPath'] = '' # Determine mimetype - if not filetype: + if not filetype and is_file: mimetypes.init() mimetype = mimetypes.guess_type(filename)[0] if not mimetype: @@ -527,6 +546,13 @@ def save(self, filename, category, filetype=None, preserve_path=False): data['type'] = mimetype + # Pickle object if necessary + if dill.pickles(filename) and not is_file: + if not name: + self._error('To save an object a name must be specified') + data['pickled'] = pickle.dumps(filename) + data['type'] = 'application/octet-stream' + # Register file if not self._simvue.save_file(data): return False From 378c9f2ad72568d66e82b9893cff93f2c2e2fbce Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 16 Jan 2023 09:10:57 +0000 Subject: [PATCH 02/43] Add dependencies required for plots --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 14fba73b..ca4613c4 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ long_description_content_type="text/markdown", url="https://simvue.io", platforms=["any"], - install_requires=["requests", "msgpack", "tenacity", "pyjwt", "psutil", "pydantic"], + install_requires=["requests", "msgpack", "tenacity", "pyjwt", "psutil", "pydantic", "plotly", "matplotlib"], package_dir={'': '.'}, packages=["simvue"], package_data={"": ["README.md"]}, From 6afade8c246e2556628388433def40bd32b1298f Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 16 Jan 2023 09:11:48 +0000 Subject: [PATCH 03/43] Remove accidental commit of testing line --- simvue/remote.py | 1 - 1 file changed, 1 deletion(-) diff --git a/simvue/remote.py b/simvue/remote.py index fb1cf4ab..3a008aa4 100644 --- a/simvue/remote.py +++ b/simvue/remote.py @@ -101,7 +101,6 @@ def save_file(self, data): Save file """ # Get presigned URL - print('god=', prepare_for_api(data)) try: response = post(f"{self._url}/api/data", self._headers, prepare_for_api(data)) except Exception as err: From 694491ebdad44dd42b8509df05b7c5b5aa0582c0 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 16 Jan 2023 09:18:00 +0000 Subject: [PATCH 04/43] Support matplotlib & plotly plots --- simvue/run.py | 55 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/simvue/run.py b/simvue/run.py index 4258186b..23ee1833 100644 --- a/simvue/run.py +++ b/simvue/run.py @@ -1,6 +1,4 @@ -import configparser import datetime -import dill import hashlib import logging import mimetypes @@ -14,7 +12,9 @@ import time as tm import platform import uuid -import requests +import dill +import matplotlib.pyplot as plt +import plotly from .worker import Worker from .simvue import Simvue @@ -112,7 +112,10 @@ def calculate_sha256(filename, is_file): except: pass else: - sha256_hash.update(bytes(filename)) + if isinstance(filename, str): + sha256_hash.update(bytes(filename, 'utf-8')) + else: + sha256_hash.update(bytes(filename)) return sha256_hash.hexdigest() return None @@ -261,11 +264,11 @@ def init(self, name=None, metadata={}, tags=[], description=None, folder='/', ru self._check_token() - # compare with pydantic RunInput model + # compare with pydantic RunInput model try: runinput = RunInput(**data) - except ValidationError as e: - self._error(e) + except ValidationError as err: + self._error(err) self._simvue = Simvue(self._name, self._uuid, self._mode, self._suppress_errors) name = self._simvue.create_run(data) @@ -504,7 +507,7 @@ def save(self, filename, category, filetype=None, preserve_path=False, name=None is_file = True if filetype: - mimetypes_valid = [] + mimetypes_valid = ['application/vnd.plotly.v1+json'] mimetypes.init() for _, value in mimetypes.types_map.items(): mimetypes_valid.append(value) @@ -527,10 +530,10 @@ def save(self, filename, category, filetype=None, preserve_path=False, name=None data['run'] = self._name data['category'] = category - data['checksum'] = calculate_sha256(filename, is_file) if is_file: data['size'] = os.path.getsize(filename) data['originalPath'] = os.path.abspath(os.path.expanduser(os.path.expandvars(filename))) + data['checksum'] = calculate_sha256(filename, is_file) else: data['size'] = sys.getsizeof(filename) data['originalPath'] = '' @@ -550,8 +553,35 @@ def save(self, filename, category, filetype=None, preserve_path=False, name=None if dill.pickles(filename) and not is_file: if not name: self._error('To save an object a name must be specified') - data['pickled'] = pickle.dumps(filename) - data['type'] = 'application/octet-stream' + + # Handle matplotlib & plotly + is_plotly = False + module_name = filename.__class__.__module__ + class_name = filename.__class__.__name__ + + if module_name == 'plotly.graph_objs._figure' and class_name == 'Figure': + data_out = filename + is_plotly = True + elif module_name == 'matplotlib.figure' and class_name == 'Figure': + data_out = plotly.tools.mpl_to_plotly(filename) + is_plotly = True + else: + try: + figure = filename.gcf() + data_out = plotly.tools.mpl_to_plotly(figure) + except: + pass + else: + is_plotly = True + + if is_plotly: + data['type'] = 'application/vnd.plotly.v1+json' + data['pickled'] = plotly.io.to_json(data_out, 'json') + else: + data['type'] = 'application/octet-stream' + data['pickled'] = pickle.dumps(filename) + + data['checksum'] = calculate_sha256(data['pickled'], False) # Register file if not self._simvue.save_file(data): @@ -704,7 +734,8 @@ def add_alert(self, notification='none', pattern=None): """ - Creates an alert with the specified name (if it doesn't exist) and applies it to the current run + Creates an alert with the specified name (if it doesn't exist) + and applies it to the current run """ if self._mode == 'disabled': return True From 83a9cc220332d684d8ec6907c12a70ab989e0c01 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 16 Jan 2023 09:24:22 +0000 Subject: [PATCH 05/43] matplotlib not used --- setup.py | 2 +- simvue/run.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index ca4613c4..02e8cc41 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ long_description_content_type="text/markdown", url="https://simvue.io", platforms=["any"], - install_requires=["requests", "msgpack", "tenacity", "pyjwt", "psutil", "pydantic", "plotly", "matplotlib"], + install_requires=["requests", "msgpack", "tenacity", "pyjwt", "psutil", "pydantic", "plotly"], package_dir={'': '.'}, packages=["simvue"], package_data={"": ["README.md"]}, diff --git a/simvue/run.py b/simvue/run.py index 23ee1833..294ba418 100644 --- a/simvue/run.py +++ b/simvue/run.py @@ -13,7 +13,6 @@ import platform import uuid import dill -import matplotlib.pyplot as plt import plotly from .worker import Worker From c2f70d7e2c8a0b4ce2b150085efe785debeffc7a Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 16 Jan 2023 10:34:29 +0000 Subject: [PATCH 06/43] Move function to utilities --- simvue/remote.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/simvue/remote.py b/simvue/remote.py index 3a008aa4..3ab98d76 100644 --- a/simvue/remote.py +++ b/simvue/remote.py @@ -3,24 +3,13 @@ import requests from .api import post, put -from .utilities import get_auth, get_expiry +from .utilities import get_auth, get_expiry, prepare_for_api logger = logging.getLogger(__name__) UPLOAD_TIMEOUT = 30 DEFAULT_API_TIMEOUT = 10 -def prepare_for_api(data_in): - """ - Remove references to pickling - """ - data = data_in.copy() - if 'pickled' in data: - del data['pickled'] - if 'pickledFile' in data: - del data['pickledFile'] - return data - class Remote(object): """ Class which interacts with Simvue REST API From 6e60697c93546c913f42aaf0ceabc9a16f0cd0c4 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 16 Jan 2023 10:34:43 +0000 Subject: [PATCH 07/43] Add prepare_for_api function --- simvue/utilities.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/simvue/utilities.py b/simvue/utilities.py index 7bb59149..8fc005d3 100644 --- a/simvue/utilities.py +++ b/simvue/utilities.py @@ -77,3 +77,14 @@ def get_expiry(token): except: pass return expiry + +def prepare_for_api(data_in, all=True): + """ + Remove references to pickling + """ + data = data_in.copy() + if 'pickled' in data: + del data['pickled'] + if 'pickledFile' in data and all: + del data['pickledFile'] + return data From b0dc689865611c8b1d3fc335a8dddc3f413cccc3 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 16 Jan 2023 12:49:06 +0000 Subject: [PATCH 08/43] Need to remove pickled object from json sent to API --- simvue/offline.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/simvue/offline.py b/simvue/offline.py index e2e90128..7414db3a 100644 --- a/simvue/offline.py +++ b/simvue/offline.py @@ -1,10 +1,11 @@ +import codecs import json import logging import os import time import uuid -from .utilities import get_offline_directory, create_file +from .utilities import get_offline_directory, create_file, prepare_for_api logger = logging.getLogger(__name__) @@ -98,7 +99,7 @@ def save_file(self, data): data['pickledFile'] = temp_file unique_id = time.time() filename = f"{self._directory}/file-{unique_id}.json" - self._write_json(filename, data) + self._write_json(filename, prepare_for_api(data, False)) return True def add_alert(self, data): From c74ad3d641e1db4a26d0be1c114b0ab46920b888 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 16 Jan 2023 12:50:19 +0000 Subject: [PATCH 09/43] Correct assignment of run name when sending json to API --- simvue/remote.py | 30 ++++++++++++++++++++++++------ simvue/sender.py | 12 ++++++------ 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/simvue/remote.py b/simvue/remote.py index 3ab98d76..c241dadd 100644 --- a/simvue/remote.py +++ b/simvue/remote.py @@ -53,10 +53,13 @@ def create_run(self, data): return self._name - def update(self, data): + def update(self, data, run=None): """ Update metadata, tags or status """ + if run is not None: + data['name'] = run + try: response = put(f"{self._url}/api/runs", self._headers, data) except Exception as err: @@ -69,10 +72,13 @@ def update(self, data): self._error(f"Got status code {response.status_code} when updating run") return False - def set_folder_details(self, data): + def set_folder_details(self, data, run=None): """ Set folder details """ + if run is not None: + data['run'] = run + try: response = put(f"{self._url}/api/folders", self._headers, data) except Exception as err: @@ -85,10 +91,13 @@ def set_folder_details(self, data): self._error(f"Got status code {response.status_code} when updating folder details") return False - def save_file(self, data): + def save_file(self, data, run=None): """ Save file """ + if run is not None: + data['run'] = run + # Get presigned URL try: response = post(f"{self._url}/api/data", self._headers, prepare_for_api(data)) @@ -132,10 +141,13 @@ def save_file(self, data): return True - def add_alert(self, data): + def add_alert(self, data, run=None): """ Add an alert """ + if run is not None: + data['run'] = run + try: response = post(f"{self._url}/api/alerts", self._headers, data) except Exception as err: @@ -148,10 +160,13 @@ def add_alert(self, data): self._error(f"Got status code {response.status_code} when creating alert") return False - def send_metrics(self, data): + def send_metrics(self, data, run=None): """ Send metrics """ + if run is not None: + data['run'] = run + try: response = post(f"{self._url}/api/metrics", self._headers_mp, data, is_json=False) except Exception as err: @@ -164,10 +179,13 @@ def send_metrics(self, data): self._error(f"Got status code {response.status_code} when sending metrics") return False - def send_event(self, data): + def send_event(self, data, run=None): """ Send events """ + if run is not None: + data['run'] = run + try: response = post(f"{self._url}/api/events", self._headers_mp, data, is_json=False) except Exception as err: diff --git a/simvue/sender.py b/simvue/sender.py index d090e392..908babec 100644 --- a/simvue/sender.py +++ b/simvue/sender.py @@ -147,37 +147,37 @@ def sender(): # Handle metrics if '/metrics-' in record: logger.info('Sending metrics for run %s', run_init['name']) - remote.send_metrics(msgpack.packb(get_json(record, name), use_bin_type=True)) + remote.send_metrics(msgpack.packb(get_json(record, name), use_bin_type=True), run_init['name']) rename = True # Handle events if '/event-' in record: logger.info('Sending event for run %s', run_init['name']) - remote.send_event(msgpack.packb(get_json(record, name), use_bin_type=True)) + remote.send_event(msgpack.packb(get_json(record, name), use_bin_type=True), run_init['name']) rename = True # Handle updates if '/update-' in record: logger.info('Sending update for run %s', run_init['name']) - remote.update(get_json(record, name)) + remote.update(get_json(record, name), run_init['name']) rename = True # Handle folders if '/folder-' in record: logger.info('Sending folder details for run %s', run_init['name']) - remote.set_folder_details(get_json(record, name)) + remote.set_folder_details(get_json(record, name), run_init['name']) rename = True # Handle alerts if '/alert-' in record: logger.info('Sending alert details for run %s', run_init['name']) - remote.add_alert(get_json(record, name)) + remote.add_alert(get_json(record, name), run_init['name']) rename = True # Handle files if '/file-' in record: logger.info('Saving file for run %s', run_init['name']) - remote.save_file(get_json(record, name)) + remote.save_file(get_json(record, name), run_init['name']) rename = True # Rename processed files From 0eb73e9b417ca5c5e5545c2de390a99bc182d936 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 16 Jan 2023 13:48:58 +0000 Subject: [PATCH 10/43] Bug fixes to metrics & events --- simvue/remote.py | 10 ++-------- simvue/sender.py | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/simvue/remote.py b/simvue/remote.py index c241dadd..43df9f42 100644 --- a/simvue/remote.py +++ b/simvue/remote.py @@ -160,13 +160,10 @@ def add_alert(self, data, run=None): self._error(f"Got status code {response.status_code} when creating alert") return False - def send_metrics(self, data, run=None): + def send_metrics(self, data): """ Send metrics """ - if run is not None: - data['run'] = run - try: response = post(f"{self._url}/api/metrics", self._headers_mp, data, is_json=False) except Exception as err: @@ -179,13 +176,10 @@ def send_metrics(self, data, run=None): self._error(f"Got status code {response.status_code} when sending metrics") return False - def send_event(self, data, run=None): + def send_event(self, data): """ Send events """ - if run is not None: - data['run'] = run - try: response = post(f"{self._url}/api/events", self._headers_mp, data, is_json=False) except Exception as err: diff --git a/simvue/sender.py b/simvue/sender.py index 908babec..c630657f 100644 --- a/simvue/sender.py +++ b/simvue/sender.py @@ -12,6 +12,13 @@ logger = logging.getLogger(__name__) +def update_name(name, data): + """ + Update name in metrics/events + """ + for item in data: + item['run'] = name + def add_name(name, data, filename): """ Update name in JSON @@ -147,13 +154,17 @@ def sender(): # Handle metrics if '/metrics-' in record: logger.info('Sending metrics for run %s', run_init['name']) - remote.send_metrics(msgpack.packb(get_json(record, name), use_bin_type=True), run_init['name']) + data = get_json(record, name) + update_name(run_init['name'], data) + remote.send_metrics(msgpack.packb(data, use_bin_type=True)) rename = True # Handle events if '/event-' in record: logger.info('Sending event for run %s', run_init['name']) - remote.send_event(msgpack.packb(get_json(record, name), use_bin_type=True), run_init['name']) + data = get_json(record, name) + update_name(run_init['name'], data) + remote.send_event(msgpack.packb(data, use_bin_type=True)) rename = True # Handle updates From 2c4c14bf07fa5ea34afa69b310dafc74e4cb9a13 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 16 Jan 2023 14:10:05 +0000 Subject: [PATCH 11/43] Clarify error message --- simvue/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simvue/run.py b/simvue/run.py index 294ba418..b1119b3d 100644 --- a/simvue/run.py +++ b/simvue/run.py @@ -551,7 +551,7 @@ def save(self, filename, category, filetype=None, preserve_path=False, name=None # Pickle object if necessary if dill.pickles(filename) and not is_file: if not name: - self._error('To save an object a name must be specified') + self._error('To save a Python object a name must be specified') # Handle matplotlib & plotly is_plotly = False From aa99ad511d5bb5b9fdfed598d24dd3cf68bdaad6 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 16 Jan 2023 21:21:59 +0000 Subject: [PATCH 12/43] Update --- CHANGELOG.md | 4 ++++ simvue/__init__.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e74ce6f..30bc54cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Change log +## v0.8.0 + +* Support picklable Python objects, Matplotlib and Plotly plots as artifacts + ## v0.7.2 * Pydantic model is used for input validation. diff --git a/simvue/__init__.py b/simvue/__init__.py index d1a83ba7..3d1137ac 100644 --- a/simvue/__init__.py +++ b/simvue/__init__.py @@ -2,4 +2,4 @@ from simvue.client import Client from simvue.handler import Handler from simvue.models import RunInput -__version__ = '0.7.2' +__version__ = '0.8.0' From a9ea94fa9c7efa84b79f12a1ed0eb8c0f898f910 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 16 Jan 2023 21:26:34 +0000 Subject: [PATCH 13/43] Shorten line --- simvue/sender.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/simvue/sender.py b/simvue/sender.py index c630657f..dc6e7bf6 100644 --- a/simvue/sender.py +++ b/simvue/sender.py @@ -74,7 +74,11 @@ def sender(): elif run.endswith('terminated'): status = 'terminated' - current = run.replace('/running', '').replace('/completed', '').replace('/failed', '').replace('/terminated', '').replace('/created', '') + current = run.replace('/running', '').\ + replace('/completed', '').\ + replace('/failed', '').\ + replace('/terminated', '').\ + replace('/created', '') if os.path.isfile("f{current}/sent"): if status == 'running': From 95d01e4d3e0ade6d013cd2a1386b31c0aed20b4b Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 16 Jan 2023 22:09:58 +0000 Subject: [PATCH 14/43] Very minor adjustment --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30bc54cd..5bc4c641 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ## v0.8.0 -* Support picklable Python objects, Matplotlib and Plotly plots as artifacts +* Support picklable Python objects, Matplotlib and Plotly plots as artifacts. ## v0.7.2 From 5c75f55ef2ca55ff55777cde2ee4009f3d3d9e29 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 16 Jan 2023 22:13:24 +0000 Subject: [PATCH 15/43] Filename was wrong --- simvue/sender.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simvue/sender.py b/simvue/sender.py index dc6e7bf6..dfd5f13c 100644 --- a/simvue/sender.py +++ b/simvue/sender.py @@ -164,7 +164,7 @@ def sender(): rename = True # Handle events - if '/event-' in record: + if '/events-' in record: logger.info('Sending event for run %s', run_init['name']) data = get_json(record, name) update_name(run_init['name'], data) From e1e7c069dbe24f6632dc6564788d8575d572fd3e Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 16 Jan 2023 22:15:28 +0000 Subject: [PATCH 16/43] Minor adjustment to message --- simvue/sender.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simvue/sender.py b/simvue/sender.py index dfd5f13c..9a5bedf6 100644 --- a/simvue/sender.py +++ b/simvue/sender.py @@ -165,7 +165,7 @@ def sender(): # Handle events if '/events-' in record: - logger.info('Sending event for run %s', run_init['name']) + logger.info('Sending events for run %s', run_init['name']) data = get_json(record, name) update_name(run_init['name'], data) remote.send_event(msgpack.packb(data, use_bin_type=True)) From 229ac42581f73a6af0e9420daefd464e6cd90fa5 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 16 Jan 2023 22:23:50 +0000 Subject: [PATCH 17/43] Update --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bc4c641..da82d5de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## v0.8.0 * Support picklable Python objects, Matplotlib and Plotly plots as artifacts. +* (Bug fix) Events in offline mode didn't work. ## v0.7.2 From 21b9e78aeb72f0e0afb24b7a016de8d95b431e73 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 16 Jan 2023 22:29:33 +0000 Subject: [PATCH 18/43] Requests module not used --- simvue/remote.py | 1 - 1 file changed, 1 deletion(-) diff --git a/simvue/remote.py b/simvue/remote.py index 43df9f42..04af7079 100644 --- a/simvue/remote.py +++ b/simvue/remote.py @@ -1,6 +1,5 @@ import logging import time -import requests from .api import post, put from .utilities import get_auth, get_expiry, prepare_for_api From 95c40819a3d7f78e2a918fd92ebbf1c0d7b21c8b Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 16 Jan 2023 23:24:04 +0000 Subject: [PATCH 19/43] dill was forgotten --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 02e8cc41..4efc10c6 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ long_description_content_type="text/markdown", url="https://simvue.io", platforms=["any"], - install_requires=["requests", "msgpack", "tenacity", "pyjwt", "psutil", "pydantic", "plotly"], + install_requires=["dill", "requests", "msgpack", "tenacity", "pyjwt", "psutil", "pydantic", "plotly"], package_dir={'': '.'}, packages=["simvue"], package_data={"": ["README.md"]}, From 36f8eb13824046b5c10f14ff90bb598fe153ac90 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 11:04:55 +0000 Subject: [PATCH 20/43] Tidied up serialization --- simvue/run.py | 46 +++++++---------------------------- simvue/serialization.py | 53 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 37 deletions(-) create mode 100644 simvue/serialization.py diff --git a/simvue/run.py b/simvue/run.py index b1119b3d..8074ba4e 100644 --- a/simvue/run.py +++ b/simvue/run.py @@ -17,6 +17,7 @@ from .worker import Worker from .simvue import Simvue +from .serialization import Serializer from .models import RunInput from .utilities import get_auth, get_expiry from pydantic import ValidationError @@ -533,54 +534,25 @@ def save(self, filename, category, filetype=None, preserve_path=False, name=None data['size'] = os.path.getsize(filename) data['originalPath'] = os.path.abspath(os.path.expanduser(os.path.expandvars(filename))) data['checksum'] = calculate_sha256(filename, is_file) - else: - data['size'] = sys.getsizeof(filename) - data['originalPath'] = '' # Determine mimetype + mimetype = None if not filetype and is_file: mimetypes.init() mimetype = mimetypes.guess_type(filename)[0] if not mimetype: mimetype = 'application/octet-stream' - else: + elif is_file: mimetype = filetype - data['type'] = mimetype - - # Pickle object if necessary - if dill.pickles(filename) and not is_file: - if not name: - self._error('To save a Python object a name must be specified') - - # Handle matplotlib & plotly - is_plotly = False - module_name = filename.__class__.__module__ - class_name = filename.__class__.__name__ - - if module_name == 'plotly.graph_objs._figure' and class_name == 'Figure': - data_out = filename - is_plotly = True - elif module_name == 'matplotlib.figure' and class_name == 'Figure': - data_out = plotly.tools.mpl_to_plotly(filename) - is_plotly = True - else: - try: - figure = filename.gcf() - data_out = plotly.tools.mpl_to_plotly(figure) - except: - pass - else: - is_plotly = True - - if is_plotly: - data['type'] = 'application/vnd.plotly.v1+json' - data['pickled'] = plotly.io.to_json(data_out, 'json') - else: - data['type'] = 'application/octet-stream' - data['pickled'] = pickle.dumps(filename) + if mimetype: + data['type'] = mimetype + if not is_file: + data['pickled'], data['type'] = Serializer().serialize(filename) data['checksum'] = calculate_sha256(data['pickled'], False) + data['originalPath'] = '' + data['size'] = sys.getsizeof(data['pickled']) # Register file if not self._simvue.save_file(data): diff --git a/simvue/serialization.py b/simvue/serialization.py new file mode 100644 index 00000000..030f7c3a --- /dev/null +++ b/simvue/serialization.py @@ -0,0 +1,53 @@ +from io import BytesIO +import os +import numpy as np +import plotly + +class Serializer: + def serialize(self, data): + serializer = get_serializer(data) + return serializer(data) + +def get_serializer(data): + """ + Determine which serializer to use + """ + module_name = data.__class__.__module__ + class_name = data.__class__.__name__ + + if module_name == 'plotly.graph_objs._figure' and class_name == 'Figure': + return _serialize_plotly_figure + elif module_name == 'matplotlib.figure' and class_name == 'Figure': + return _serialize_matplotlib_figure + elif module_name == 'numpy' and class_name == 'ndarray': + return _serialize_numpy_array + elif module_name == 'pandas.core.frame' and class_name == 'DataFrame': + return _serialize_dataframe + else: + return None, None + +def _serialize_plotly_figure(data): + mimetype = 'application/vnd.plotly.v1+json' + data = plotly.io.to_json(data, 'json') + return data, mimetype + +def _serialize_matplotlib_figure(data): + mimetype = 'application/vnd.plotly.v1+json' + data = plotly.io.to_json(plotly.tools.mpl_to_plotly(data), 'json') + return data, mimetype + +def _serialize_numpy_array(data): + mimetype = 'application/vnd.simvue.numpy.v1' + mfile = BytesIO() + np.save(mfile, data, allow_pickle=False) + mfile.seek(0) + data = mfile.read() + return data, mimetype + +def _serialize_dataframe(data): + mimetype = 'application/vnd.simvue.df.v1' + mfile = BytesIO() + data.to_csv(mfile) + mfile.seek(0) + data = mfile.read() + return data, mimetype From 9409e3cf9f68c40fa74d34f9be2d22d189e4b910 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 13:08:08 +0000 Subject: [PATCH 21/43] Adjustments to serialization; add deserialization --- simvue/serialization.py | 49 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/simvue/serialization.py b/simvue/serialization.py index 030f7c3a..eaecbe08 100644 --- a/simvue/serialization.py +++ b/simvue/serialization.py @@ -1,12 +1,15 @@ from io import BytesIO import os import numpy as np +import pandas as pd import plotly class Serializer: def serialize(self, data): serializer = get_serializer(data) - return serializer(data) + if serializer: + return serializer(data) + return None, None def get_serializer(data): """ @@ -23,8 +26,7 @@ def get_serializer(data): return _serialize_numpy_array elif module_name == 'pandas.core.frame' and class_name == 'DataFrame': return _serialize_dataframe - else: - return None, None + return None def _serialize_plotly_figure(data): mimetype = 'application/vnd.plotly.v1+json' @@ -51,3 +53,44 @@ def _serialize_dataframe(data): mfile.seek(0) data = mfile.read() return data, mimetype + +class Deserializer: + def deserialize(self, data, mimetype): + deserializer = get_deserializer(data, mimetype) + if deserializer: + return deserializer(data) + return None + +def get_deserializer(data, mimetype): + """ + Determine which deserializer to use + """ + if mimetype == 'application/vnd.plotly.v1+json': + return _deserialize_plotly_figure + elif mimetype == 'application/vnd.plotly.v1+json': + return _deserialize_matplotlib_figure + elif mimetype == 'application/vnd.simvue.numpy.v1': + return _deserialize_numpy_array + elif mimetype == 'application/vnd.simvue.df.v1': + return _deserialize_dataframe + return None + +def _deserialize_plotly_figure(data): + data = plotly.io.from_json(data) + return data + +def _deserialize_matplotlib_figure(data): + data = plotly.io.from_json(data) + return data + +def _deserialize_numpy_array(data): + mfile = BytesIO(data) + mfile.seek(0) + data = np.load(mfile, allow_pickle=False) + return data + +def _deserialize_dataframe(data): + mfile = BytesIO(data) + mfile.seek(0) + data = pd.read_csv(mfile) + return data From bd0d0e03397ec9fee5f503d759d9ac1332e99ec4 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 13:08:28 +0000 Subject: [PATCH 22/43] Support deserialization in get_artifact --- simvue/client.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/simvue/client.py b/simvue/client.py index ae857edf..8053ca48 100644 --- a/simvue/client.py +++ b/simvue/client.py @@ -3,6 +3,7 @@ import pickle import requests +from .serialization import Deserializer from .utilities import get_auth CONCURRENT_DOWNLOADS = 10 @@ -62,23 +63,23 @@ def get_artifact(self, run, name): except requests.exceptions.RequestException: return None - if response.status_code == 200 and response.json(): - url = response.json()[0]['url'] - - try: - response = requests.get(url, timeout=DOWNLOAD_TIMEOUT) - except requests.exceptions.RequestException: - return None - else: + if response.status_code != 200: return None + url = response.json()[0]['url'] + mimetype = response.json()[0]['type'] + try: - content = pickle.loads(response.content) - except: - return response.content - else: + response = requests.get(url, timeout=DOWNLOAD_TIMEOUT) + except requests.exceptions.RequestException: + return None + + content = Deserializer().deserialize(response.content, mimetype) + if content is not None: return content + return response.content + def get_artifact_as_file(self, run, name, path='./'): """ Download an artifact From 601f226cd22221370b342cecab39107d749dae30 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 13:08:42 +0000 Subject: [PATCH 23/43] Remove modules not used --- simvue/run.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/simvue/run.py b/simvue/run.py index 8074ba4e..dcf47adc 100644 --- a/simvue/run.py +++ b/simvue/run.py @@ -12,8 +12,6 @@ import time as tm import platform import uuid -import dill -import plotly from .worker import Worker from .simvue import Simvue From eaa5d377e5d8e209f1b1285de92950ec98297572 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 13:20:03 +0000 Subject: [PATCH 24/43] Support optional pickling --- simvue/serialization.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/simvue/serialization.py b/simvue/serialization.py index eaecbe08..c989588f 100644 --- a/simvue/serialization.py +++ b/simvue/serialization.py @@ -1,17 +1,18 @@ from io import BytesIO import os +import pickle import numpy as np import pandas as pd import plotly class Serializer: - def serialize(self, data): - serializer = get_serializer(data) + def serialize(self, data, allow_pickle=False): + serializer = get_serializer(data, allow_pickle) if serializer: return serializer(data) return None, None -def get_serializer(data): +def get_serializer(data, allow_pickle): """ Determine which serializer to use """ @@ -26,6 +27,8 @@ def get_serializer(data): return _serialize_numpy_array elif module_name == 'pandas.core.frame' and class_name == 'DataFrame': return _serialize_dataframe + elif allow_pickle: + return _serialize_pickle return None def _serialize_plotly_figure(data): @@ -54,14 +57,19 @@ def _serialize_dataframe(data): data = mfile.read() return data, mimetype +def _serialize_pickle(data): + mimetype = 'application/octet-stream' + data = pickle.dumps(data) + return data + class Deserializer: - def deserialize(self, data, mimetype): - deserializer = get_deserializer(data, mimetype) + def deserialize(self, data, mimetype, allow_pickle=False): + deserializer = get_deserializer(data, mimetype, allow_pickle) if deserializer: return deserializer(data) return None -def get_deserializer(data, mimetype): +def get_deserializer(data, mimetype, allow_pickle): """ Determine which deserializer to use """ @@ -73,6 +81,8 @@ def get_deserializer(data, mimetype): return _deserialize_numpy_array elif mimetype == 'application/vnd.simvue.df.v1': return _deserialize_dataframe + elif mimetype == 'application/octet-stream' and allow_pickle: + return _deserialize_pickle return None def _deserialize_plotly_figure(data): @@ -94,3 +104,7 @@ def _deserialize_dataframe(data): mfile.seek(0) data = pd.read_csv(mfile) return data + +def _deserialize_pickle(data): + data = pickle.loads(data) + return data From a198cc675f3c1d075918a06c9ba7487c1ab37ff1 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 13:20:44 +0000 Subject: [PATCH 25/43] Support optional pickling --- simvue/client.py | 4 ++-- simvue/run.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/simvue/client.py b/simvue/client.py index 8053ca48..345845f6 100644 --- a/simvue/client.py +++ b/simvue/client.py @@ -52,7 +52,7 @@ def list_artifacts(self, run, category=None): return None - def get_artifact(self, run, name): + def get_artifact(self, run, name, allow_pickle=False): """ Return the contents of the specified artifact """ @@ -74,7 +74,7 @@ def get_artifact(self, run, name): except requests.exceptions.RequestException: return None - content = Deserializer().deserialize(response.content, mimetype) + content = Deserializer().deserialize(response.content, mimetype, allow_pickle) if content is not None: return content diff --git a/simvue/run.py b/simvue/run.py index dcf47adc..15fd5392 100644 --- a/simvue/run.py +++ b/simvue/run.py @@ -481,7 +481,7 @@ def log_metrics(self, metrics, step=None, time=None, timestamp=None): return True - def save(self, filename, category, filetype=None, preserve_path=False, name=None): + def save(self, filename, category, filetype=None, preserve_path=False, name=None, allow_pickle=False): """ Upload file or object """ @@ -547,7 +547,7 @@ def save(self, filename, category, filetype=None, preserve_path=False, name=None data['type'] = mimetype if not is_file: - data['pickled'], data['type'] = Serializer().serialize(filename) + data['pickled'], data['type'] = Serializer().serialize(filename, allow_pickle) data['checksum'] = calculate_sha256(data['pickled'], False) data['originalPath'] = '' data['size'] = sys.getsizeof(data['pickled']) From 28671531c6e83a0ffd25cc63701fcfa49e5a3037 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 14:02:16 +0000 Subject: [PATCH 26/43] Support pytorch tensors; adding tensorflow tensors --- simvue/serialization.py | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/simvue/serialization.py b/simvue/serialization.py index c989588f..8dcf6844 100644 --- a/simvue/serialization.py +++ b/simvue/serialization.py @@ -27,6 +27,10 @@ def get_serializer(data, allow_pickle): return _serialize_numpy_array elif module_name == 'pandas.core.frame' and class_name == 'DataFrame': return _serialize_dataframe + elif module_name == 'tensorflow.python.framework.ops' and class_name == 'EagerTensor': + return _serialize_tf_tensor + elif module_name == 'torch' and class_name == 'Tensor': + return _serialize_torch_tensor elif allow_pickle: return _serialize_pickle return None @@ -57,6 +61,24 @@ def _serialize_dataframe(data): data = mfile.read() return data, mimetype +def _serialize_tf_tensor(data): + mimetype = 'application/vnd.simvue.tf.v1' + return data, mimetype + +def _serialize_torch_tensor(data): + try: + import torch + except ImportError: + torch = None + return None + + mimetype = 'application/vnd.simvue.torch.v1' + mfile = BytesIO() + torch.save(data, mfile) + mfile.seek(0) + data = mfile.read() + return data, mimetype + def _serialize_pickle(data): mimetype = 'application/octet-stream' data = pickle.dumps(data) @@ -64,12 +86,12 @@ def _serialize_pickle(data): class Deserializer: def deserialize(self, data, mimetype, allow_pickle=False): - deserializer = get_deserializer(data, mimetype, allow_pickle) + deserializer = get_deserializer(mimetype, allow_pickle) if deserializer: return deserializer(data) return None -def get_deserializer(data, mimetype, allow_pickle): +def get_deserializer(mimetype, allow_pickle): """ Determine which deserializer to use """ @@ -83,6 +105,8 @@ def get_deserializer(data, mimetype, allow_pickle): return _deserialize_dataframe elif mimetype == 'application/octet-stream' and allow_pickle: return _deserialize_pickle + elif mimetype == 'application/vnd.simvue.torch.v1': + return _deserialize_torch_tensor return None def _deserialize_plotly_figure(data): @@ -105,6 +129,18 @@ def _deserialize_dataframe(data): data = pd.read_csv(mfile) return data +def _deserialize_torch_tensor(data): + try: + import torch + except ImportError: + torch = None + return None + + mfile = BytesIO(data) + mfile.seek(0) + data = torch.load(mfile) + return data + def _deserialize_pickle(data): data = pickle.loads(data) return data From 8ea88d429b9db1a1c895522f3e430881046a61ba Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 14:06:05 +0000 Subject: [PATCH 27/43] Use numpy/pandas only when needed --- simvue/serialization.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/simvue/serialization.py b/simvue/serialization.py index 8dcf6844..a5645c22 100644 --- a/simvue/serialization.py +++ b/simvue/serialization.py @@ -1,8 +1,6 @@ from io import BytesIO import os import pickle -import numpy as np -import pandas as pd import plotly class Serializer: @@ -46,6 +44,12 @@ def _serialize_matplotlib_figure(data): return data, mimetype def _serialize_numpy_array(data): + try: + import numpy as np + except ImportError: + np = None + return None + mimetype = 'application/vnd.simvue.numpy.v1' mfile = BytesIO() np.save(mfile, data, allow_pickle=False) @@ -118,12 +122,24 @@ def _deserialize_matplotlib_figure(data): return data def _deserialize_numpy_array(data): + try: + import numpy as np + except ImportError: + np = None + return None + mfile = BytesIO(data) mfile.seek(0) data = np.load(mfile, allow_pickle=False) return data def _deserialize_dataframe(data): + try: + import pandas as pd + except ImportError: + pd = None + return None + mfile = BytesIO(data) mfile.seek(0) data = pd.read_csv(mfile) From bf9e9df8a8819f94f5464b80c1b15477292ba5a9 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 14:10:27 +0000 Subject: [PATCH 28/43] Remove partial tensorflow support --- simvue/serialization.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/simvue/serialization.py b/simvue/serialization.py index a5645c22..904f48ef 100644 --- a/simvue/serialization.py +++ b/simvue/serialization.py @@ -25,8 +25,6 @@ def get_serializer(data, allow_pickle): return _serialize_numpy_array elif module_name == 'pandas.core.frame' and class_name == 'DataFrame': return _serialize_dataframe - elif module_name == 'tensorflow.python.framework.ops' and class_name == 'EagerTensor': - return _serialize_tf_tensor elif module_name == 'torch' and class_name == 'Tensor': return _serialize_torch_tensor elif allow_pickle: @@ -65,10 +63,6 @@ def _serialize_dataframe(data): data = mfile.read() return data, mimetype -def _serialize_tf_tensor(data): - mimetype = 'application/vnd.simvue.tf.v1' - return data, mimetype - def _serialize_torch_tensor(data): try: import torch @@ -107,10 +101,10 @@ def get_deserializer(mimetype, allow_pickle): return _deserialize_numpy_array elif mimetype == 'application/vnd.simvue.df.v1': return _deserialize_dataframe - elif mimetype == 'application/octet-stream' and allow_pickle: - return _deserialize_pickle elif mimetype == 'application/vnd.simvue.torch.v1': return _deserialize_torch_tensor + elif mimetype == 'application/octet-stream' and allow_pickle: + return _deserialize_pickle return None def _deserialize_plotly_figure(data): From a21fcb1c7e943a1f904ddf60b16158b8a55fd53b Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 14:19:01 +0000 Subject: [PATCH 29/43] Remove unused module import --- simvue/serialization.py | 1 - 1 file changed, 1 deletion(-) diff --git a/simvue/serialization.py b/simvue/serialization.py index 904f48ef..36d1b972 100644 --- a/simvue/serialization.py +++ b/simvue/serialization.py @@ -1,5 +1,4 @@ from io import BytesIO -import os import pickle import plotly From 6fec3809f5649be7129f64ba44a5c01417212182 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 14:20:38 +0000 Subject: [PATCH 30/43] Bug fix - forgot to return the mimetype --- simvue/serialization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simvue/serialization.py b/simvue/serialization.py index 36d1b972..843ca7f8 100644 --- a/simvue/serialization.py +++ b/simvue/serialization.py @@ -79,7 +79,7 @@ def _serialize_torch_tensor(data): def _serialize_pickle(data): mimetype = 'application/octet-stream' data = pickle.dumps(data) - return data + return data, mimetype class Deserializer: def deserialize(self, data, mimetype, allow_pickle=False): From 31e95a94e2f9443b99ade66c88e20286db4386d6 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 16:33:35 +0000 Subject: [PATCH 31/43] Add serialization/deserialization tests for numpy & PyTorch arrays --- tests/unit/test_simvue.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/unit/test_simvue.py b/tests/unit/test_simvue.py index b507b79b..fd39b23f 100644 --- a/tests/unit/test_simvue.py +++ b/tests/unit/test_simvue.py @@ -1,6 +1,9 @@ import os from simvue import Run +from simvue.serialization import Serializer, Deserializer import pytest +import numpy as np +import torch def test_suppress_errors(): """ @@ -68,3 +71,25 @@ def test_run_init_folder(): assert exc_info.match(r"string does not match regex") +def test_numpy_array_serialization(): + """ + Check that a numpy array can be serialized then deserialized successfully + """ + array = np.array([1, 2, 3, 4, 5]) + + serialized, mime_type = Serializer().serialize(array) + array_out = Deserializer().deserialize(serialized, mime_type) + + assert (array == array_out).all() + +def test_pytorch_tensor_serialization(): + """ + Check that a PyTorch tensor can be serialized then deserialized successfully + """ + torch.manual_seed(1724) + array = torch.rand(2, 3) + + serialized, mime_type = Serializer().serialize(array) + array_out = Deserializer().deserialize(serialized, mime_type) + + assert (array == array_out).all() From ac21a1ad7f9317488cfd8772246f03455db3fc5b Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 16:39:28 +0000 Subject: [PATCH 32/43] Add requirements.txt for tests --- test-requirements.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 test-requirements.txt diff --git a/test-requirements.txt b/test-requirements.txt new file mode 100644 index 00000000..372fdbaf --- /dev/null +++ b/test-requirements.txt @@ -0,0 +1,11 @@ +dill +requests +msgpack +tenacity +pyjwt +psutil +pydantic +plotly +torch +pandas +numpy From 83de128b771235fad0aa6298b898a051c2fdcf26 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 16:41:00 +0000 Subject: [PATCH 33/43] Use test-requirements.txt for tests --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index fbd7def6..3b6b0d5a 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -28,7 +28,7 @@ jobs: python -m pip install --upgrade pip pip install flake8 pytest pip install -e . - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + if [ -f test-requirements.txt ]; then pip install -r test-requirements.txt; fi - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names From 202403b7f5ae1a696cdaf5e078a6da0fe9ba8125 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 16:44:21 +0000 Subject: [PATCH 34/43] Add tests for numpy array & pytorch tensor mime types --- tests/unit/test_simvue.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/unit/test_simvue.py b/tests/unit/test_simvue.py index fd39b23f..9ca00545 100644 --- a/tests/unit/test_simvue.py +++ b/tests/unit/test_simvue.py @@ -71,6 +71,25 @@ def test_run_init_folder(): assert exc_info.match(r"string does not match regex") +def test_numpy_array_mime_type(): + """ + Check that the mimetype for numpy arrays is correct + """ + array = np.array([1, 2, 3, 4, 5]) + _, mime_type = Serializer().serialize(array) + + assert (mime_type == 'application/vnd.simvue.numpy.v1') + +def test_pytorch_tensor_mime_type(): + """ + """ + torch.manual_seed(1724) + array = torch.rand(2, 3) + _, mime_type = Serializer().serialize(array) + + assert (mime_type == 'application/vnd.simvue.torch.v1') + + def test_numpy_array_serialization(): """ Check that a numpy array can be serialized then deserialized successfully From a5a1fbfb47a191402e11c6ab7ebee502a7af0f1e Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 16:47:32 +0000 Subject: [PATCH 35/43] Add mime-type tests for matplotlib & plotly --- tests/unit/test_simvue.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/unit/test_simvue.py b/tests/unit/test_simvue.py index 9ca00545..cf54023c 100644 --- a/tests/unit/test_simvue.py +++ b/tests/unit/test_simvue.py @@ -4,6 +4,8 @@ import pytest import numpy as np import torch +import plotly +import matplotlib.pyplot as plt def test_suppress_errors(): """ @@ -89,6 +91,26 @@ def test_pytorch_tensor_mime_type(): assert (mime_type == 'application/vnd.simvue.torch.v1') +def test_matplotlib_figure_mime_type(): + """ + """ + plt.plot([1, 2, 3, 4]) + figure = plt.gcf() + + _, mime_type = Serializer().serialize(figure) + + assert (mime_type == 'application/vnd.plotly.v1+json') + +def test_matplotlib_figure_mime_type(): + """ + """ + plt.plot([1, 2, 3, 4]) + figure = plt.gcf() + plotly_figure = plotly.tools.mpl_to_plotly(figure) + + _, mime_type = Serializer().serialize(plotly_figure) + + assert (mime_type == 'application/vnd.plotly.v1+json') def test_numpy_array_serialization(): """ From 9386bbf336bfc2cdd689332d2b89a34696fb4a8e Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 16:47:49 +0000 Subject: [PATCH 36/43] Add matplotlib dependency --- test-requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/test-requirements.txt b/test-requirements.txt index 372fdbaf..cd484dcd 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -9,3 +9,4 @@ plotly torch pandas numpy +matplotlib From d6fc48a9351c88a40dc625f8dea4965662754de3 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 16:50:42 +0000 Subject: [PATCH 37/43] Add test for pickle serialization --- tests/unit/test_simvue.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/unit/test_simvue.py b/tests/unit/test_simvue.py index cf54023c..f2cbafe7 100644 --- a/tests/unit/test_simvue.py +++ b/tests/unit/test_simvue.py @@ -134,3 +134,14 @@ def test_pytorch_tensor_serialization(): array_out = Deserializer().deserialize(serialized, mime_type) assert (array == array_out).all() + +def test_pickle_serialization(): + """ + Check that a dictionary can be serialized then deserialized successfully + """ + data = {'a': 1.0, 'b': 'test'} + + serialized, mime_type = Serializer().serialize(data, allow_pickle=True) + data_out = Deserializer().deserialize(serialized, mime_type, allow_pickle=True) + + assert (data == data_out) From 4ca0de7f02a59e609d44c66a6f068617fc47bfbc Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 16:52:22 +0000 Subject: [PATCH 38/43] Add missing comments --- tests/unit/test_simvue.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_simvue.py b/tests/unit/test_simvue.py index f2cbafe7..951a584e 100644 --- a/tests/unit/test_simvue.py +++ b/tests/unit/test_simvue.py @@ -84,6 +84,7 @@ def test_numpy_array_mime_type(): def test_pytorch_tensor_mime_type(): """ + Check that a PyTorch tensor has the correct mime-type """ torch.manual_seed(1724) array = torch.rand(2, 3) @@ -93,6 +94,7 @@ def test_pytorch_tensor_mime_type(): def test_matplotlib_figure_mime_type(): """ + Check that a matplotlib figure has the correct mime-type """ plt.plot([1, 2, 3, 4]) figure = plt.gcf() @@ -101,8 +103,9 @@ def test_matplotlib_figure_mime_type(): assert (mime_type == 'application/vnd.plotly.v1+json') -def test_matplotlib_figure_mime_type(): +def test_plotly_figure_mime_type(): """ + Check that a plotly figure has the correct mime-type """ plt.plot([1, 2, 3, 4]) figure = plt.gcf() From e478e5ca7b50a3495104f9734a7bc1186c63a643 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 17:03:33 +0000 Subject: [PATCH 39/43] Need to read first column as index --- simvue/serialization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simvue/serialization.py b/simvue/serialization.py index 843ca7f8..a9e4c8c3 100644 --- a/simvue/serialization.py +++ b/simvue/serialization.py @@ -135,7 +135,7 @@ def _deserialize_dataframe(data): mfile = BytesIO(data) mfile.seek(0) - data = pd.read_csv(mfile) + data = pd.read_csv(mfile, index_col=0) return data def _deserialize_torch_tensor(data): From 851ca7a13e355db4a99f1799af898c2876844a5a Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 17:04:00 +0000 Subject: [PATCH 40/43] Add tests for Pandas dataframe mime-type and serialization/deserialization --- tests/unit/test_simvue.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/unit/test_simvue.py b/tests/unit/test_simvue.py index 951a584e..50e91181 100644 --- a/tests/unit/test_simvue.py +++ b/tests/unit/test_simvue.py @@ -6,6 +6,7 @@ import torch import plotly import matplotlib.pyplot as plt +import pandas as pd def test_suppress_errors(): """ @@ -148,3 +149,26 @@ def test_pickle_serialization(): data_out = Deserializer().deserialize(serialized, mime_type, allow_pickle=True) assert (data == data_out) + +def test_pandas_dataframe_mimetype(): + """ + Check that the mime-type of a Pandas dataframe is correct + """ + data = {'col1': [1, 2], 'col2': [3, 4]} + df = pd.DataFrame(data=data) + + _, mime_type = Serializer().serialize(df) + + assert (mime_type == 'application/vnd.simvue.df.v1') + +def test_pandas_dataframe_serialization(): + """ + Check that a Pandas dataframe can be serialized then deserialized successfully + """ + data = {'col1': [1, 2], 'col2': [3, 4]} + df = pd.DataFrame(data=data) + + serialized, mime_type = Serializer().serialize(df) + df_out = Deserializer().deserialize(serialized, mime_type) + + assert (df.equals(df_out)) From e1cfdf2a6004a6ae2d63aec421801f7ca87716d7 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 20:17:52 +0000 Subject: [PATCH 41/43] Support PyTorch state_dict serialization --- simvue/serialization.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/simvue/serialization.py b/simvue/serialization.py index a9e4c8c3..93e21084 100644 --- a/simvue/serialization.py +++ b/simvue/serialization.py @@ -9,6 +9,27 @@ def serialize(self, data, allow_pickle=False): return serializer(data) return None, None +def _is_torch_tensor(data): + """ + Check if a dictionary is a PyTorch tensor or state dict + """ + module_name = data.__class__.__module__ + class_name = data.__class__.__name__ + + if module_name == 'collections' and class_name == 'OrderedDict': + valid = True + for item in data: + module_name = data[item].__class__.__module__ + class_name = data[item].__class__.__name__ + if module_name != 'torch' or class_name != 'Tensor': + valid = False + if valid: + return True + elif module_name == 'torch' and class_name == 'Tensor': + return True + + return False + def get_serializer(data, allow_pickle): """ Determine which serializer to use @@ -24,7 +45,7 @@ def get_serializer(data, allow_pickle): return _serialize_numpy_array elif module_name == 'pandas.core.frame' and class_name == 'DataFrame': return _serialize_dataframe - elif module_name == 'torch' and class_name == 'Tensor': + elif _is_torch_tensor(data): return _serialize_torch_tensor elif allow_pickle: return _serialize_pickle From e9cc491da26b045866210122f24493e0c0c94725 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Tue, 17 Jan 2023 22:35:53 +0000 Subject: [PATCH 42/43] Add simple PyTorch example --- examples/PyTorch/main.py | 158 ++++++++++++++++++++++++++++++ examples/PyTorch/requirements.txt | 3 + 2 files changed, 161 insertions(+) create mode 100644 examples/PyTorch/main.py create mode 100644 examples/PyTorch/requirements.txt diff --git a/examples/PyTorch/main.py b/examples/PyTorch/main.py new file mode 100644 index 00000000..60d3a08e --- /dev/null +++ b/examples/PyTorch/main.py @@ -0,0 +1,158 @@ +# Taken from https://github.com/pytorch/examples/blob/main/mnist/main.py +from __future__ import print_function +import argparse +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torchvision import datasets, transforms +from torch.optim.lr_scheduler import StepLR +from simvue import Run + + +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 32, 3, 1) + self.conv2 = nn.Conv2d(32, 64, 3, 1) + self.dropout1 = nn.Dropout(0.25) + self.dropout2 = nn.Dropout(0.5) + self.fc1 = nn.Linear(9216, 128) + self.fc2 = nn.Linear(128, 10) + + def forward(self, x): + x = self.conv1(x) + x = F.relu(x) + x = self.conv2(x) + x = F.relu(x) + x = F.max_pool2d(x, 2) + x = self.dropout1(x) + x = torch.flatten(x, 1) + x = self.fc1(x) + x = F.relu(x) + x = self.dropout2(x) + x = self.fc2(x) + output = F.log_softmax(x, dim=1) + return output + + +def train(args, model, device, train_loader, optimizer, epoch, run): + model.train() + for batch_idx, (data, target) in enumerate(train_loader): + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = F.nll_loss(output, target) + loss.backward() + optimizer.step() + if batch_idx % args.log_interval == 0: + print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( + epoch, batch_idx * len(data), len(train_loader.dataset), + 100. * batch_idx / len(train_loader), loss.item())) + run.log_metrics({"train.loss.%d" % epoch: float(loss.item())}, step=batch_idx) + if args.dry_run: + break + + +def test(model, device, test_loader, epoch, run): + model.eval() + test_loss = 0 + correct = 0 + with torch.no_grad(): + for data, target in test_loader: + data, target = data.to(device), target.to(device) + output = model(data) + test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss + pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability + correct += pred.eq(target.view_as(pred)).sum().item() + + test_loss /= len(test_loader.dataset) + test_accuracy = 100. * correct / len(test_loader.dataset) + + print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( + test_loss, correct, len(test_loader.dataset), + test_accuracy)) + run.log_metrics({'test.loss': test_loss, + 'test.accuracy': test_accuracy}, step=epoch) + + +def main(): + # Training settings + parser = argparse.ArgumentParser(description='PyTorch MNIST Example') + parser.add_argument('--batch-size', type=int, default=64, metavar='N', + help='input batch size for training (default: 64)') + parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', + help='input batch size for testing (default: 1000)') + parser.add_argument('--epochs', type=int, default=14, metavar='N', + help='number of epochs to train (default: 14)') + parser.add_argument('--lr', type=float, default=1.0, metavar='LR', + help='learning rate (default: 1.0)') + parser.add_argument('--gamma', type=float, default=0.7, metavar='M', + help='Learning rate step gamma (default: 0.7)') + parser.add_argument('--no-cuda', action='store_true', default=False, + help='disables CUDA training') + parser.add_argument('--no-mps', action='store_true', default=False, + help='disables macOS GPU training') + parser.add_argument('--dry-run', action='store_true', default=False, + help='quickly check a single pass') + parser.add_argument('--seed', type=int, default=1, metavar='S', + help='random seed (default: 1)') + parser.add_argument('--log-interval', type=int, default=10, metavar='N', + help='how many batches to wait before logging training status') + parser.add_argument('--save-model', action='store_true', default=False, + help='For Saving the current Model') + args = parser.parse_args() + use_cuda = not args.no_cuda and torch.cuda.is_available() + use_mps = not args.no_mps and torch.backends.mps.is_available() + + torch.manual_seed(args.seed) + + if use_cuda: + device = torch.device("cuda") + elif use_mps: + device = torch.device("mps") + else: + device = torch.device("cpu") + + train_kwargs = {'batch_size': args.batch_size} + test_kwargs = {'batch_size': args.test_batch_size} + if use_cuda: + cuda_kwargs = {'num_workers': 1, + 'pin_memory': True, + 'shuffle': True} + train_kwargs.update(cuda_kwargs) + test_kwargs.update(cuda_kwargs) + + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ]) + dataset1 = datasets.MNIST('../data', train=True, download=True, + transform=transform) + dataset2 = datasets.MNIST('../data', train=False, + transform=transform) + train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs) + test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs) + + model = Net().to(device) + optimizer = optim.Adadelta(model.parameters(), lr=args.lr) + + scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) + + run = Run() + run.init(tags=['PyTorch']) + + for epoch in range(1, args.epochs + 1): + train(args, model, device, train_loader, optimizer, epoch, run) + test(model, device, test_loader, epoch, run) + scheduler.step() + + if args.save_model: + run.save(model.state_dict(), "output", name="mnist_cnn.pt") + + run.close() + + +if __name__ == '__main__': + main() + diff --git a/examples/PyTorch/requirements.txt b/examples/PyTorch/requirements.txt new file mode 100644 index 00000000..06228016 --- /dev/null +++ b/examples/PyTorch/requirements.txt @@ -0,0 +1,3 @@ +torch +torchvision +simvue From 97bf37833d0160e055841a4caec0fbae33e3bf04 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Thu, 19 Jan 2023 13:33:44 +0000 Subject: [PATCH 43/43] Not everything is pickled --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da82d5de..c9415ae3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ## v0.8.0 -* Support picklable Python objects, Matplotlib and Plotly plots as artifacts. +* Support NumPy arrays, PyTorch tensors, Matplotlib and Plotly plots and picklable Python objects as artifacts. * (Bug fix) Events in offline mode didn't work. ## v0.7.2