From a931fb77c24ceb046336736aaf6f9fee2052cf6f Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 5 Dec 2022 06:39:32 +0000 Subject: [PATCH 1/7] Remove dependency on randomname --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 68c6946b..8f122cfb 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ long_description_content_type="text/markdown", url="https://github.com/simvue-io/client", platforms=["any"], - install_requires=["requests", "randomname", "msgpack", "tenacity", "pyjwt", "psutil", "nvidia-ml-py"], + install_requires=["requests", "msgpack", "tenacity", "pyjwt", "psutil", "nvidia-ml-py"], package_dir={'': '.'}, packages=["simvue"], package_data={"": ["README.md"]}, From 0d448b52afd9382201ac32dee9a61b07959f2017 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 5 Dec 2022 06:41:05 +0000 Subject: [PATCH 2/7] Update version --- simvue/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simvue/__init__.py b/simvue/__init__.py index c28de1e8..5a5438dd 100644 --- a/simvue/__init__.py +++ b/simvue/__init__.py @@ -1,4 +1,4 @@ from simvue.run import Run from simvue.client import Client from simvue.handler import Handler -__version__ = '0.0.1' +__version__ = '0.0.7' From 6d33ead5bb0491b4cee9325c2e94703026b6fb59 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 5 Dec 2022 06:41:18 +0000 Subject: [PATCH 3/7] Update author details --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 8f122cfb..d52ca33f 100644 --- a/setup.py +++ b/setup.py @@ -10,8 +10,7 @@ setuptools.setup( name="simvue", version=version, - author="Andrew Lahiff", - author_email="andrew.lahiff@ukaea.uk", + author_email="info@simvue.io", description="Simulation tracking and monitoring", long_description=long_description, long_description_content_type="text/markdown", From a31c5e31d036b8f454228693b5e07207c3408f95 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 5 Dec 2022 06:41:34 +0000 Subject: [PATCH 4/7] Update --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 41116448..2a0af5f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ * Collect CPU, GPU and memory resource metrics. * Automatically delete temporary files used in offline mode once runs have entered a terminal state. * Warn users if their access token has expired. +* Remove dependency on the randomname module, instead handle name generation server side. ## v0.6.0 From fb365a6131211b1bae26ba68f3c86fd6c64ba5c4 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 5 Dec 2022 06:55:50 +0000 Subject: [PATCH 5/7] Support creation of random names serverside (many changes needed for offline mode) --- simvue/offline.py | 10 +++++--- simvue/remote.py | 8 ++++-- simvue/run.py | 60 ++++++++++++++++++++++++++++----------------- simvue/sender.py | 49 ++++++++++++++++++++++++------------ simvue/simvue.py | 6 ++--- simvue/utilities.py | 7 ------ simvue/worker.py | 28 +++++++++++++-------- 7 files changed, 104 insertions(+), 64 deletions(-) diff --git a/simvue/offline.py b/simvue/offline.py index 24325c74..8494d293 100644 --- a/simvue/offline.py +++ b/simvue/offline.py @@ -3,7 +3,7 @@ import os import time -from .utilities import get_offline_directory, get_directory_name, create_file +from .utilities import get_offline_directory, create_file logger = logging.getLogger(__name__) @@ -11,9 +11,10 @@ class Offline(object): """ Class for offline runs """ - def __init__(self, name, suppress_errors=False): + def __init__(self, name, uuid, suppress_errors=False): self._name = name - self._directory = os.path.join(get_offline_directory(), get_directory_name(name)) + self._uuid = uuid + self._directory = os.path.join(get_offline_directory(), self._uuid) self._suppress_errors = suppress_errors def _error(self, message): @@ -45,6 +46,9 @@ def create_run(self, data): logger.error('Unable to create directory %s due to: %s', self._directory, str(err)) filename = f"{self._directory}/run.json" + if 'name' not in data: + data['name'] = None + self._write_json(filename, data) status = data['status'] diff --git a/simvue/remote.py b/simvue/remote.py index a1e096e4..b78a4a59 100644 --- a/simvue/remote.py +++ b/simvue/remote.py @@ -13,8 +13,9 @@ class Remote(object): """ Class which interacts with Simvue REST API """ - def __init__(self, name, suppress_errors=False): + def __init__(self, name, uuid, suppress_errors=False): self._name = name + self._uuid = uuid self._suppress_errors = suppress_errors self._url, self._token = get_auth() self._headers = {"Authorization": f"Bearer {self._token}"} @@ -49,7 +50,10 @@ def create_run(self, data): self._error(f"Got status code {response.status_code} when creating run") return False - return True + if 'name' in response.json(): + self._name = response.json()['name'] + + return self._name def update(self, data): """ diff --git a/simvue/run.py b/simvue/run.py index f11d7adc..5032d3ba 100644 --- a/simvue/run.py +++ b/simvue/run.py @@ -11,8 +11,8 @@ import sys import time as tm import platform +import uuid import requests -import randomname from .worker import Worker from .simvue import Simvue @@ -127,6 +127,7 @@ class Run(object): Track simulation details based on token and URL """ def __init__(self, mode='online'): + self._uuid = str(uuid.uuid4()) self._mode = mode self._name = None self._suppress_errors = False @@ -186,6 +187,7 @@ def _start(self, reconnect=False): self._events_queue = multiprocessing.Manager().Queue(maxsize=self._queue_size) self._worker = Worker(self._metrics_queue, self._events_queue, + self._uuid, self._name, self._url, self._headers, @@ -217,14 +219,12 @@ def init(self, name=None, metadata={}, tags=[], description=None, folder='/', ru if self._mode == 'disabled': return True - if not name: - name = randomname.get_name() - if not self._token or not self._url: self._error('Unable to get URL and token from environment variables or config file') - if not re.match(r'^[a-zA-Z0-9\-\_\s\/\.:]+$', name): - self._error('specified name is invalid') + if name: + if not re.match(r'^[a-zA-Z0-9\-\_\s\/\.:]+$', name): + self._error('specified name is invalid') if not isinstance(tags, list): self._error('tags must be a list') @@ -239,14 +239,16 @@ def init(self, name=None, metadata={}, tags=[], description=None, folder='/', ru else: self._status = 'created' - data = {'name': name, - 'metadata': metadata, + data = {'metadata': metadata, 'tags': tags, 'system': {'cpu': {}, 'gpu': {}, 'platform': {}}, 'status': self._status} + if name: + data['name'] = name + if description: data['description'] = description @@ -260,13 +262,16 @@ def init(self, name=None, metadata={}, tags=[], description=None, folder='/', ru self._check_token() - self._simvue = Simvue(self._name, self._mode, self._suppress_errors) - if not self._simvue.create_run(data): + self._simvue = Simvue(self._name, self._uuid, self._mode, self._suppress_errors) + name = self._simvue.create_run(data) + + if not name: return False + elif name is not True: + self._name = name if self._status == 'running': self._start() - return True @property @@ -276,7 +281,14 @@ def name(self): """ return self._name - def reconnect(self, name): + @property + def uid(self): + """ + Return the local unique identifier of the run + """ + return self._uuid + + def reconnect(self, name=None, uid=None): """ Reconnect to a run in the created state """ @@ -285,7 +297,9 @@ def reconnect(self, name): self._status = 'running' self._name = name - self._simvue = Simvue(self._name, self._mode, self._suppress_errors) + self._uuid = uid + + self._simvue = Simvue(self._name, self._uuid, self._mode, self._suppress_errors) self._start(reconnect=True) def set_pid(self, pid): @@ -332,7 +346,7 @@ def update_metadata(self, metadata): if self._mode == 'disabled': return True - if not self._name: + if not self._uuid and not self._name: self._error(INIT_MISSING) return False @@ -358,7 +372,7 @@ def update_tags(self, tags): if self._mode == 'disabled': return True - if not self._name: + if not self._uuid and not self._name: self._error(INIT_MISSING) return False @@ -380,7 +394,7 @@ def log_event(self, message, timestamp=None): if self._mode == 'disabled': return True - if not self._name: + if not self._uuid and not self._name: self._error(INIT_MISSING) return False @@ -417,7 +431,7 @@ def log_metrics(self, metrics, step=None, time=None, timestamp=None): if self._mode == 'disabled': return True - if not self._name: + if not self._uuid and not self._name: self._error(INIT_MISSING) return False @@ -468,7 +482,7 @@ def save(self, filename, category, filetype=None, preserve_path=False): if self._mode == 'disabled': return True - if not self._name: + if not self._uuid and not self._name: self._error(INIT_MISSING) return False @@ -527,7 +541,7 @@ def save_directory(self, directory, category, filetype=None, preserve_path=False if self._mode == 'disabled': return True - if not self._name: + if not self._uuid and not self._name: self._error(INIT_MISSING) return False @@ -577,7 +591,7 @@ def set_status(self, status): if self._mode == 'disabled': return True - if not self._name: + if not self._uuid and not self._name: self._error(INIT_MISSING) return False @@ -603,7 +617,7 @@ def close(self): if self._mode == 'disabled': return True - if not self._name: + if not self._uuid and not self._name: self._error(INIT_MISSING) return False @@ -620,7 +634,7 @@ def set_folder_details(self, path, metadata={}, tags=[], description=None): if self._mode == 'disabled': return True - if not self._name: + if not self._uuid and not self._name: self._error(INIT_MISSING) return False @@ -670,7 +684,7 @@ def add_alert(self, if self._mode == 'disabled': return True - if not self._name: + if not self._uuid and not self._name: self._error(INIT_MISSING) return False diff --git a/simvue/sender.py b/simvue/sender.py index 82f03479..b3d2b562 100644 --- a/simvue/sender.py +++ b/simvue/sender.py @@ -5,25 +5,39 @@ import shutil import time +import msgpack + from .remote import Remote from .utilities import get_offline_directory, create_file, remove_file logger = logging.getLogger(__name__) -def get_json(filename): +def add_name(name, data, filename): """ - Get JSON from a file + Update name in JSON """ - with open(filename, 'r') as fh: - data = json.load(fh) + if not data['name']: + data['name'] = name + with open(filename, 'w') as fh: + json.dump(data, fh) + return data -def get_binary(filename): +def get_json(filename, name=None): """ - Get binary content from a file + Get JSON from a file """ - with open(filename, 'rb') as fh: - data = fh.read() + with open(filename, 'r') as fh: + data = json.load(fh) + if name: + if 'name' in data: + if not data['name']: + data['name'] = name + else: + for item in data: + if 'run' in item: + if not item['run']: + item['run'] = name return data def sender(): @@ -75,16 +89,19 @@ def sender(): logger.info('Considering run with name %s and id %s', run_init['name'], id) - remote = Remote(run_init['name'], suppress_errors=True) + remote = Remote(run_init['name'], id, suppress_errors=True) # Check token remote.check_token() # Create run if it hasn't previously been created created_file = f"{current}/init" + name = None if not os.path.isfile(created_file): logger.info('Creating run with name %s', run_init['name']) - remote.create_run(run_init) + name = remote.create_run(run_init) + print('Got name=', name) + run_init = add_name(name, run_init, f"{current}/run.json") create_file(created_file) if status == 'running': @@ -131,37 +148,37 @@ def sender(): # Handle metrics if '/metrics-' in record: logger.info('Sending metrics for run %s', run_init['name']) - remote.send_metrics(get_binary(record)) + remote.send_metrics(msgpack.packb(get_json(record, name), use_bin_type=True)) rename = True # Handle events if '/event-' in record: logger.info('Sending event for run %s', run_init['name']) - remote.send_event(get_binary(record)) + remote.send_event(msgpack.packb(get_json(record, name), use_bin_type=True)) rename = True # Handle updates if '/update-' in record: logger.info('Sending update for run %s', run_init['name']) - remote.update(get_json(record)) + remote.update(get_json(record, name)) rename = True # Handle folders if '/folder-' in record: logger.info('Sending folder details for run %s', run_init['name']) - remote.set_folder_details(get_json(record)) + remote.set_folder_details(get_json(record, name)) rename = True # Handle alerts if '/alert-' in record: logger.info('Sending alert details for run %s', run_init['name']) - remote.add_alert(get_json(record)) + remote.add_alert(get_json(record, name)) rename = True # Handle files if '/file-' in record: logger.info('Saving file for run %s', run_init['name']) - remote.save_file(get_json(record)) + remote.save_file(get_json(record, name)) rename = True # Rename processed files diff --git a/simvue/simvue.py b/simvue/simvue.py index c15c7288..93555290 100644 --- a/simvue/simvue.py +++ b/simvue/simvue.py @@ -1,8 +1,8 @@ from .remote import Remote from .offline import Offline -def Simvue(name, mode, suppress_errors=False): +def Simvue(name, uuid, mode, suppress_errors=False): if mode == 'offline': - return Offline(name, suppress_errors=False) + return Offline(name, uuid, suppress_errors=False) else: - return Remote(name, suppress_errors=False) + return Remote(name, uuid, suppress_errors=False) diff --git a/simvue/utilities.py b/simvue/utilities.py index 76c54b3c..7bb59149 100644 --- a/simvue/utilities.py +++ b/simvue/utilities.py @@ -1,6 +1,5 @@ import configparser import jwt -import hashlib import logging import os @@ -48,12 +47,6 @@ def get_offline_directory(): return directory -def get_directory_name(name): - """ - Return the SHA256 sum of the provided name - """ - return hashlib.sha256(name.encode('utf-8')).hexdigest() - def create_file(filename): """ Create an empty file diff --git a/simvue/worker.py b/simvue/worker.py index 0fc03003..abd1f4f9 100644 --- a/simvue/worker.py +++ b/simvue/worker.py @@ -1,4 +1,5 @@ import datetime +import json import os import psutil import sys @@ -9,7 +10,7 @@ from tenacity import retry, wait_exponential, stop_after_attempt from .metrics import get_process_memory, get_process_cpu, get_gpu_metrics -from .utilities import get_offline_directory, get_directory_name, create_file +from .utilities import get_offline_directory, create_file HEARTBEAT_INTERVAL = 60 POLLING_INTERVAL = 20 @@ -28,18 +29,19 @@ def update_processes(parent, processes): class Worker(threading.Thread): - def __init__(self, metrics_queue, events_queue, name, url, headers, mode, pid, resources_metrics_interval): + def __init__(self, metrics_queue, events_queue, uuid, run_name, url, headers, mode, pid, resources_metrics_interval): threading.Thread.__init__(self) self._parent_thread = threading.currentThread() self._metrics_queue = metrics_queue self._events_queue = events_queue - self._name = name + self._run_name = run_name + self._uuid = uuid self._url = url self._headers = headers self._headers_mp = headers.copy() self._headers_mp['Content-Type'] = 'application/msgpack' self._mode = mode - self._directory = os.path.join(get_offline_directory(), get_directory_name(name)) + self._directory = os.path.join(get_offline_directory(), self._uuid) self._start_time = time.time() self._processes = [] self._resources_metrics_interval = resources_metrics_interval @@ -55,7 +57,7 @@ def heartbeat(self): if self._mode == 'online': response = requests.put(f"{self._url}/api/runs/heartbeat", headers=self._headers, - json={'name': self._name}) + json={'name': self._run_name}) response.raise_for_status() else: create_file(f"{self._directory}/heartbeat") @@ -73,8 +75,8 @@ def post(self, endpoint, data): else: unique_id = time.time() filename = f"{self._directory}/{endpoint}-{unique_id}" - with open(filename, 'wb') as fh: - fh.write(data) + with open(filename, 'w') as fh: + json.dump(data, fh) def run(self): """ @@ -98,7 +100,7 @@ def run(self): if memory is not None and cpu is not None: data = {} data['step'] = 0 - data['run'] = self._name + data['run'] = self._run_name data['values'] = {'resources/cpu.usage.percent': cpu, 'resources/memory.usage': memory} if gpu: @@ -129,7 +131,10 @@ def run(self): if buffer: try: - self.post('metrics', msgpack.packb(buffer, use_bin_type=True)) + if self._mode == 'online': + self.post('metrics', msgpack.packb(buffer, use_bin_type=True)) + else: + self.post('metrics', buffer) except: pass buffer = [] @@ -143,7 +148,10 @@ def run(self): if buffer: try: - self.post('events', msgpack.packb(buffer, use_bin_type=True)) + if self._mode == 'online': + self.post('events', msgpack.packb(buffer, use_bin_type=True)) + else: + self.post('events', buffer) except: pass buffer = [] From f7fc2c4e5f4c674d5e20e7527733c29944d8a8fa Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 5 Dec 2022 07:38:47 +0000 Subject: [PATCH 6/7] Remove debug line, log message now displays correct name --- simvue/sender.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/simvue/sender.py b/simvue/sender.py index b3d2b562..d090e392 100644 --- a/simvue/sender.py +++ b/simvue/sender.py @@ -98,9 +98,8 @@ def sender(): created_file = f"{current}/init" name = None if not os.path.isfile(created_file): - logger.info('Creating run with name %s', run_init['name']) name = remote.create_run(run_init) - print('Got name=', name) + logger.info('Creating run with name %s', run_init['name']) run_init = add_name(name, run_init, f"{current}/run.json") create_file(created_file) From 99ab4054d9fbb7905eee79c22f80aae6b7483c7d Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Mon, 5 Dec 2022 08:27:24 +0000 Subject: [PATCH 7/7] Tidy up --- simvue/worker.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/simvue/worker.py b/simvue/worker.py index abd1f4f9..7463317b 100644 --- a/simvue/worker.py +++ b/simvue/worker.py @@ -131,10 +131,8 @@ def run(self): if buffer: try: - if self._mode == 'online': - self.post('metrics', msgpack.packb(buffer, use_bin_type=True)) - else: - self.post('metrics', buffer) + if self._mode == 'online': buffer = msgpack.packb(buffer, use_bin_type=True) + self.post('metrics', buffer) except: pass buffer = [] @@ -148,10 +146,8 @@ def run(self): if buffer: try: - if self._mode == 'online': - self.post('events', msgpack.packb(buffer, use_bin_type=True)) - else: - self.post('events', buffer) + if self._mode == 'online': buffer = msgpack.packb(buffer, use_bin_type=True) + self.post('events', buffer) except: pass buffer = []