diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index fbd7def6..3b6b0d5a 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -28,7 +28,7 @@ jobs: python -m pip install --upgrade pip pip install flake8 pytest pip install -e . - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + if [ -f test-requirements.txt ]; then pip install -r test-requirements.txt; fi - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e74ce6f..c9415ae3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Change log +## v0.8.0 + +* Support NumPy arrays, PyTorch tensors, Matplotlib and Plotly plots and picklable Python objects as artifacts. +* (Bug fix) Events in offline mode didn't work. + ## v0.7.2 * Pydantic model is used for input validation. diff --git a/examples/PyTorch/main.py b/examples/PyTorch/main.py new file mode 100644 index 00000000..60d3a08e --- /dev/null +++ b/examples/PyTorch/main.py @@ -0,0 +1,158 @@ +# Taken from https://github.com/pytorch/examples/blob/main/mnist/main.py +from __future__ import print_function +import argparse +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torchvision import datasets, transforms +from torch.optim.lr_scheduler import StepLR +from simvue import Run + + +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 32, 3, 1) + self.conv2 = nn.Conv2d(32, 64, 3, 1) + self.dropout1 = nn.Dropout(0.25) + self.dropout2 = nn.Dropout(0.5) + self.fc1 = nn.Linear(9216, 128) + self.fc2 = nn.Linear(128, 10) + + def forward(self, x): + x = self.conv1(x) + x = F.relu(x) + x = self.conv2(x) + x = F.relu(x) + x = F.max_pool2d(x, 2) + x = self.dropout1(x) + x = torch.flatten(x, 1) + x = self.fc1(x) + x = F.relu(x) + x = self.dropout2(x) + x = self.fc2(x) + output = F.log_softmax(x, dim=1) + return output + + +def train(args, model, device, train_loader, optimizer, epoch, run): + model.train() + for batch_idx, (data, target) in enumerate(train_loader): + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = F.nll_loss(output, target) + loss.backward() + optimizer.step() + if batch_idx % args.log_interval == 0: + print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( + epoch, batch_idx * len(data), len(train_loader.dataset), + 100. * batch_idx / len(train_loader), loss.item())) + run.log_metrics({"train.loss.%d" % epoch: float(loss.item())}, step=batch_idx) + if args.dry_run: + break + + +def test(model, device, test_loader, epoch, run): + model.eval() + test_loss = 0 + correct = 0 + with torch.no_grad(): + for data, target in test_loader: + data, target = data.to(device), target.to(device) + output = model(data) + test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss + pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability + correct += pred.eq(target.view_as(pred)).sum().item() + + test_loss /= len(test_loader.dataset) + test_accuracy = 100. * correct / len(test_loader.dataset) + + print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( + test_loss, correct, len(test_loader.dataset), + test_accuracy)) + run.log_metrics({'test.loss': test_loss, + 'test.accuracy': test_accuracy}, step=epoch) + + +def main(): + # Training settings + parser = argparse.ArgumentParser(description='PyTorch MNIST Example') + parser.add_argument('--batch-size', type=int, default=64, metavar='N', + help='input batch size for training (default: 64)') + parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', + help='input batch size for testing (default: 1000)') + parser.add_argument('--epochs', type=int, default=14, metavar='N', + help='number of epochs to train (default: 14)') + parser.add_argument('--lr', type=float, default=1.0, metavar='LR', + help='learning rate (default: 1.0)') + parser.add_argument('--gamma', type=float, default=0.7, metavar='M', + help='Learning rate step gamma (default: 0.7)') + parser.add_argument('--no-cuda', action='store_true', default=False, + help='disables CUDA training') + parser.add_argument('--no-mps', action='store_true', default=False, + help='disables macOS GPU training') + parser.add_argument('--dry-run', action='store_true', default=False, + help='quickly check a single pass') + parser.add_argument('--seed', type=int, default=1, metavar='S', + help='random seed (default: 1)') + parser.add_argument('--log-interval', type=int, default=10, metavar='N', + help='how many batches to wait before logging training status') + parser.add_argument('--save-model', action='store_true', default=False, + help='For Saving the current Model') + args = parser.parse_args() + use_cuda = not args.no_cuda and torch.cuda.is_available() + use_mps = not args.no_mps and torch.backends.mps.is_available() + + torch.manual_seed(args.seed) + + if use_cuda: + device = torch.device("cuda") + elif use_mps: + device = torch.device("mps") + else: + device = torch.device("cpu") + + train_kwargs = {'batch_size': args.batch_size} + test_kwargs = {'batch_size': args.test_batch_size} + if use_cuda: + cuda_kwargs = {'num_workers': 1, + 'pin_memory': True, + 'shuffle': True} + train_kwargs.update(cuda_kwargs) + test_kwargs.update(cuda_kwargs) + + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ]) + dataset1 = datasets.MNIST('../data', train=True, download=True, + transform=transform) + dataset2 = datasets.MNIST('../data', train=False, + transform=transform) + train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs) + test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs) + + model = Net().to(device) + optimizer = optim.Adadelta(model.parameters(), lr=args.lr) + + scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) + + run = Run() + run.init(tags=['PyTorch']) + + for epoch in range(1, args.epochs + 1): + train(args, model, device, train_loader, optimizer, epoch, run) + test(model, device, test_loader, epoch, run) + scheduler.step() + + if args.save_model: + run.save(model.state_dict(), "output", name="mnist_cnn.pt") + + run.close() + + +if __name__ == '__main__': + main() + diff --git a/examples/PyTorch/requirements.txt b/examples/PyTorch/requirements.txt new file mode 100644 index 00000000..06228016 --- /dev/null +++ b/examples/PyTorch/requirements.txt @@ -0,0 +1,3 @@ +torch +torchvision +simvue diff --git a/setup.py b/setup.py index 14fba73b..4efc10c6 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ long_description_content_type="text/markdown", url="https://simvue.io", platforms=["any"], - install_requires=["requests", "msgpack", "tenacity", "pyjwt", "psutil", "pydantic"], + install_requires=["dill", "requests", "msgpack", "tenacity", "pyjwt", "psutil", "pydantic", "plotly"], package_dir={'': '.'}, packages=["simvue"], package_data={"": ["README.md"]}, diff --git a/simvue/__init__.py b/simvue/__init__.py index d1a83ba7..3d1137ac 100644 --- a/simvue/__init__.py +++ b/simvue/__init__.py @@ -2,4 +2,4 @@ from simvue.client import Client from simvue.handler import Handler from simvue.models import RunInput -__version__ = '0.7.2' +__version__ = '0.8.0' diff --git a/simvue/client.py b/simvue/client.py index ae857edf..345845f6 100644 --- a/simvue/client.py +++ b/simvue/client.py @@ -3,6 +3,7 @@ import pickle import requests +from .serialization import Deserializer from .utilities import get_auth CONCURRENT_DOWNLOADS = 10 @@ -51,7 +52,7 @@ def list_artifacts(self, run, category=None): return None - def get_artifact(self, run, name): + def get_artifact(self, run, name, allow_pickle=False): """ Return the contents of the specified artifact """ @@ -62,23 +63,23 @@ def get_artifact(self, run, name): except requests.exceptions.RequestException: return None - if response.status_code == 200 and response.json(): - url = response.json()[0]['url'] - - try: - response = requests.get(url, timeout=DOWNLOAD_TIMEOUT) - except requests.exceptions.RequestException: - return None - else: + if response.status_code != 200: return None + url = response.json()[0]['url'] + mimetype = response.json()[0]['type'] + try: - content = pickle.loads(response.content) - except: - return response.content - else: + response = requests.get(url, timeout=DOWNLOAD_TIMEOUT) + except requests.exceptions.RequestException: + return None + + content = Deserializer().deserialize(response.content, mimetype, allow_pickle) + if content is not None: return content + return response.content + def get_artifact_as_file(self, run, name, path='./'): """ Download an artifact diff --git a/simvue/offline.py b/simvue/offline.py index 8494d293..7414db3a 100644 --- a/simvue/offline.py +++ b/simvue/offline.py @@ -1,9 +1,11 @@ +import codecs import json import logging import os import time +import uuid -from .utilities import get_offline_directory, create_file +from .utilities import get_offline_directory, create_file, prepare_for_api logger = logging.getLogger(__name__) @@ -90,9 +92,14 @@ def save_file(self, data): """ Save file """ + if 'pickled' in data: + temp_file = f"{self._directory}/temp-{str(uuid.uuid4())}.pickle" + with open(temp_file, 'wb') as fh: + fh.write(data['pickled']) + data['pickledFile'] = temp_file unique_id = time.time() filename = f"{self._directory}/file-{unique_id}.json" - self._write_json(filename, data) + self._write_json(filename, prepare_for_api(data, False)) return True def add_alert(self, data): diff --git a/simvue/remote.py b/simvue/remote.py index 31f3b8d2..04af7079 100644 --- a/simvue/remote.py +++ b/simvue/remote.py @@ -1,9 +1,8 @@ import logging import time -import requests from .api import post, put -from .utilities import get_auth, get_expiry +from .utilities import get_auth, get_expiry, prepare_for_api logger = logging.getLogger(__name__) @@ -53,10 +52,13 @@ def create_run(self, data): return self._name - def update(self, data): + def update(self, data, run=None): """ Update metadata, tags or status """ + if run is not None: + data['name'] = run + try: response = put(f"{self._url}/api/runs", self._headers, data) except Exception as err: @@ -69,10 +71,13 @@ def update(self, data): self._error(f"Got status code {response.status_code} when updating run") return False - def set_folder_details(self, data): + def set_folder_details(self, data, run=None): """ Set folder details """ + if run is not None: + data['run'] = run + try: response = put(f"{self._url}/api/folders", self._headers, data) except Exception as err: @@ -85,13 +90,16 @@ def set_folder_details(self, data): self._error(f"Got status code {response.status_code} when updating folder details") return False - def save_file(self, data): + def save_file(self, data, run=None): """ Save file """ + if run is not None: + data['run'] = run + # Get presigned URL try: - response = post(f"{self._url}/api/data", self._headers, data) + response = post(f"{self._url}/api/data", self._headers, prepare_for_api(data)) except Exception as err: self._error(f"Got exception when preparing to upload file {data['name']} to object storage: {str(err)}") return False @@ -105,22 +113,40 @@ def save_file(self, data): if 'url' in response.json(): url = response.json()['url'] - try: - with open(data['originalPath'], 'rb') as fh: - response = put(url, {}, fh, is_json=False, timeout=UPLOAD_TIMEOUT) + if 'pickled' in data and 'pickledFile' not in data: + try: + response = put(url, {}, data['pickled'], is_json=False, timeout=UPLOAD_TIMEOUT) if response.status_code != 200: - self._error(f"Got status code {response.status_code} when uploading file {data['name']} to object storage") + self._error(f"Got status code {response.status_code} when uploading object {data['name']} to object storage") return None - except Exception as err: - self._error(f"Got exception when uploading file {data['name']} to object storage: {str(err)}") - return None + except Exception as err: + self._error(f"Got exception when uploading object {data['name']} to object storage: {str(err)}") + return None + else: + if 'pickledFile' in data: + use_filename = data['pickledFile'] + else: + use_filename = data['originalPath'] + + try: + with open(use_filename, 'rb') as fh: + response = put(url, {}, fh, is_json=False, timeout=UPLOAD_TIMEOUT) + if response.status_code != 200: + self._error(f"Got status code {response.status_code} when uploading file {data['name']} to object storage") + return None + except Exception as err: + self._error(f"Got exception when uploading file {data['name']} to object storage: {str(err)}") + return None return True - def add_alert(self, data): + def add_alert(self, data, run=None): """ Add an alert """ + if run is not None: + data['run'] = run + try: response = post(f"{self._url}/api/alerts", self._headers, data) except Exception as err: diff --git a/simvue/run.py b/simvue/run.py index 51af4587..15fd5392 100644 --- a/simvue/run.py +++ b/simvue/run.py @@ -1,4 +1,3 @@ -import configparser import datetime import hashlib import logging @@ -6,16 +5,17 @@ import os import re import multiprocessing +import pickle import socket import subprocess import sys import time as tm import platform import uuid -import requests from .worker import Worker from .simvue import Simvue +from .serialization import Serializer from .models import RunInput from .utilities import get_auth, get_expiry from pydantic import ValidationError @@ -96,18 +96,25 @@ def get_system(): return system -def calculate_sha256(filename): +def calculate_sha256(filename, is_file): """ Calculate sha256 checksum of the specified file """ sha256_hash = hashlib.sha256() - try: - with open(filename, "rb") as fd: - for byte_block in iter(lambda: fd.read(CHECKSUM_BLOCK_SIZE), b""): - sha256_hash.update(byte_block) - return sha256_hash.hexdigest() - except: - pass + if is_file: + try: + with open(filename, "rb") as fd: + for byte_block in iter(lambda: fd.read(CHECKSUM_BLOCK_SIZE), b""): + sha256_hash.update(byte_block) + return sha256_hash.hexdigest() + except: + pass + else: + if isinstance(filename, str): + sha256_hash.update(bytes(filename, 'utf-8')) + else: + sha256_hash.update(bytes(filename)) + return sha256_hash.hexdigest() return None @@ -255,11 +262,11 @@ def init(self, name=None, metadata={}, tags=[], description=None, folder='/', ru self._check_token() - # compare with pydantic RunInput model + # compare with pydantic RunInput model try: runinput = RunInput(**data) - except ValidationError as e: - self._error(e) + except ValidationError as err: + self._error(err) self._simvue = Simvue(self._name, self._uuid, self._mode, self._suppress_errors) name = self._simvue.create_run(data) @@ -474,9 +481,9 @@ def log_metrics(self, metrics, step=None, time=None, timestamp=None): return True - def save(self, filename, category, filetype=None, preserve_path=False): + def save(self, filename, category, filetype=None, preserve_path=False, name=None, allow_pickle=False): """ - Upload file + Upload file or object """ if self._mode == 'disabled': return True @@ -489,12 +496,16 @@ def save(self, filename, category, filetype=None, preserve_path=False): self._error('Run is not active') return False - if not os.path.isfile(filename): - self._error(f"File {filename} does not exist") - return False + is_file = False + if isinstance(filename, str): + if not os.path.isfile(filename): + self._error(f"File {filename} does not exist") + return False + else: + is_file = True if filetype: - mimetypes_valid = [] + mimetypes_valid = ['application/vnd.plotly.v1+json'] mimetypes.init() for _, value in mimetypes.types_map.items(): mimetypes_valid.append(value) @@ -508,24 +519,38 @@ def save(self, filename, category, filetype=None, preserve_path=False): data['name'] = filename if data['name'].startswith('./'): data['name'] = data['name'][2:] - else: + elif is_file: data['name'] = os.path.basename(filename) + + if name: + data['name'] = name + data['run'] = self._name data['category'] = category - data['checksum'] = calculate_sha256(filename) - data['size'] = os.path.getsize(filename) - data['originalPath'] = os.path.abspath(os.path.expanduser(os.path.expandvars(filename))) + + if is_file: + data['size'] = os.path.getsize(filename) + data['originalPath'] = os.path.abspath(os.path.expanduser(os.path.expandvars(filename))) + data['checksum'] = calculate_sha256(filename, is_file) # Determine mimetype - if not filetype: + mimetype = None + if not filetype and is_file: mimetypes.init() mimetype = mimetypes.guess_type(filename)[0] if not mimetype: mimetype = 'application/octet-stream' - else: + elif is_file: mimetype = filetype - data['type'] = mimetype + if mimetype: + data['type'] = mimetype + + if not is_file: + data['pickled'], data['type'] = Serializer().serialize(filename, allow_pickle) + data['checksum'] = calculate_sha256(data['pickled'], False) + data['originalPath'] = '' + data['size'] = sys.getsizeof(data['pickled']) # Register file if not self._simvue.save_file(data): @@ -678,7 +703,8 @@ def add_alert(self, notification='none', pattern=None): """ - Creates an alert with the specified name (if it doesn't exist) and applies it to the current run + Creates an alert with the specified name (if it doesn't exist) + and applies it to the current run """ if self._mode == 'disabled': return True diff --git a/simvue/sender.py b/simvue/sender.py index d090e392..9a5bedf6 100644 --- a/simvue/sender.py +++ b/simvue/sender.py @@ -12,6 +12,13 @@ logger = logging.getLogger(__name__) +def update_name(name, data): + """ + Update name in metrics/events + """ + for item in data: + item['run'] = name + def add_name(name, data, filename): """ Update name in JSON @@ -67,7 +74,11 @@ def sender(): elif run.endswith('terminated'): status = 'terminated' - current = run.replace('/running', '').replace('/completed', '').replace('/failed', '').replace('/terminated', '').replace('/created', '') + current = run.replace('/running', '').\ + replace('/completed', '').\ + replace('/failed', '').\ + replace('/terminated', '').\ + replace('/created', '') if os.path.isfile("f{current}/sent"): if status == 'running': @@ -147,37 +158,41 @@ def sender(): # Handle metrics if '/metrics-' in record: logger.info('Sending metrics for run %s', run_init['name']) - remote.send_metrics(msgpack.packb(get_json(record, name), use_bin_type=True)) + data = get_json(record, name) + update_name(run_init['name'], data) + remote.send_metrics(msgpack.packb(data, use_bin_type=True)) rename = True # Handle events - if '/event-' in record: - logger.info('Sending event for run %s', run_init['name']) - remote.send_event(msgpack.packb(get_json(record, name), use_bin_type=True)) + if '/events-' in record: + logger.info('Sending events for run %s', run_init['name']) + data = get_json(record, name) + update_name(run_init['name'], data) + remote.send_event(msgpack.packb(data, use_bin_type=True)) rename = True # Handle updates if '/update-' in record: logger.info('Sending update for run %s', run_init['name']) - remote.update(get_json(record, name)) + remote.update(get_json(record, name), run_init['name']) rename = True # Handle folders if '/folder-' in record: logger.info('Sending folder details for run %s', run_init['name']) - remote.set_folder_details(get_json(record, name)) + remote.set_folder_details(get_json(record, name), run_init['name']) rename = True # Handle alerts if '/alert-' in record: logger.info('Sending alert details for run %s', run_init['name']) - remote.add_alert(get_json(record, name)) + remote.add_alert(get_json(record, name), run_init['name']) rename = True # Handle files if '/file-' in record: logger.info('Saving file for run %s', run_init['name']) - remote.save_file(get_json(record, name)) + remote.save_file(get_json(record, name), run_init['name']) rename = True # Rename processed files diff --git a/simvue/serialization.py b/simvue/serialization.py new file mode 100644 index 00000000..93e21084 --- /dev/null +++ b/simvue/serialization.py @@ -0,0 +1,176 @@ +from io import BytesIO +import pickle +import plotly + +class Serializer: + def serialize(self, data, allow_pickle=False): + serializer = get_serializer(data, allow_pickle) + if serializer: + return serializer(data) + return None, None + +def _is_torch_tensor(data): + """ + Check if a dictionary is a PyTorch tensor or state dict + """ + module_name = data.__class__.__module__ + class_name = data.__class__.__name__ + + if module_name == 'collections' and class_name == 'OrderedDict': + valid = True + for item in data: + module_name = data[item].__class__.__module__ + class_name = data[item].__class__.__name__ + if module_name != 'torch' or class_name != 'Tensor': + valid = False + if valid: + return True + elif module_name == 'torch' and class_name == 'Tensor': + return True + + return False + +def get_serializer(data, allow_pickle): + """ + Determine which serializer to use + """ + module_name = data.__class__.__module__ + class_name = data.__class__.__name__ + + if module_name == 'plotly.graph_objs._figure' and class_name == 'Figure': + return _serialize_plotly_figure + elif module_name == 'matplotlib.figure' and class_name == 'Figure': + return _serialize_matplotlib_figure + elif module_name == 'numpy' and class_name == 'ndarray': + return _serialize_numpy_array + elif module_name == 'pandas.core.frame' and class_name == 'DataFrame': + return _serialize_dataframe + elif _is_torch_tensor(data): + return _serialize_torch_tensor + elif allow_pickle: + return _serialize_pickle + return None + +def _serialize_plotly_figure(data): + mimetype = 'application/vnd.plotly.v1+json' + data = plotly.io.to_json(data, 'json') + return data, mimetype + +def _serialize_matplotlib_figure(data): + mimetype = 'application/vnd.plotly.v1+json' + data = plotly.io.to_json(plotly.tools.mpl_to_plotly(data), 'json') + return data, mimetype + +def _serialize_numpy_array(data): + try: + import numpy as np + except ImportError: + np = None + return None + + mimetype = 'application/vnd.simvue.numpy.v1' + mfile = BytesIO() + np.save(mfile, data, allow_pickle=False) + mfile.seek(0) + data = mfile.read() + return data, mimetype + +def _serialize_dataframe(data): + mimetype = 'application/vnd.simvue.df.v1' + mfile = BytesIO() + data.to_csv(mfile) + mfile.seek(0) + data = mfile.read() + return data, mimetype + +def _serialize_torch_tensor(data): + try: + import torch + except ImportError: + torch = None + return None + + mimetype = 'application/vnd.simvue.torch.v1' + mfile = BytesIO() + torch.save(data, mfile) + mfile.seek(0) + data = mfile.read() + return data, mimetype + +def _serialize_pickle(data): + mimetype = 'application/octet-stream' + data = pickle.dumps(data) + return data, mimetype + +class Deserializer: + def deserialize(self, data, mimetype, allow_pickle=False): + deserializer = get_deserializer(mimetype, allow_pickle) + if deserializer: + return deserializer(data) + return None + +def get_deserializer(mimetype, allow_pickle): + """ + Determine which deserializer to use + """ + if mimetype == 'application/vnd.plotly.v1+json': + return _deserialize_plotly_figure + elif mimetype == 'application/vnd.plotly.v1+json': + return _deserialize_matplotlib_figure + elif mimetype == 'application/vnd.simvue.numpy.v1': + return _deserialize_numpy_array + elif mimetype == 'application/vnd.simvue.df.v1': + return _deserialize_dataframe + elif mimetype == 'application/vnd.simvue.torch.v1': + return _deserialize_torch_tensor + elif mimetype == 'application/octet-stream' and allow_pickle: + return _deserialize_pickle + return None + +def _deserialize_plotly_figure(data): + data = plotly.io.from_json(data) + return data + +def _deserialize_matplotlib_figure(data): + data = plotly.io.from_json(data) + return data + +def _deserialize_numpy_array(data): + try: + import numpy as np + except ImportError: + np = None + return None + + mfile = BytesIO(data) + mfile.seek(0) + data = np.load(mfile, allow_pickle=False) + return data + +def _deserialize_dataframe(data): + try: + import pandas as pd + except ImportError: + pd = None + return None + + mfile = BytesIO(data) + mfile.seek(0) + data = pd.read_csv(mfile, index_col=0) + return data + +def _deserialize_torch_tensor(data): + try: + import torch + except ImportError: + torch = None + return None + + mfile = BytesIO(data) + mfile.seek(0) + data = torch.load(mfile) + return data + +def _deserialize_pickle(data): + data = pickle.loads(data) + return data diff --git a/simvue/utilities.py b/simvue/utilities.py index 7bb59149..8fc005d3 100644 --- a/simvue/utilities.py +++ b/simvue/utilities.py @@ -77,3 +77,14 @@ def get_expiry(token): except: pass return expiry + +def prepare_for_api(data_in, all=True): + """ + Remove references to pickling + """ + data = data_in.copy() + if 'pickled' in data: + del data['pickled'] + if 'pickledFile' in data and all: + del data['pickledFile'] + return data diff --git a/test-requirements.txt b/test-requirements.txt new file mode 100644 index 00000000..cd484dcd --- /dev/null +++ b/test-requirements.txt @@ -0,0 +1,12 @@ +dill +requests +msgpack +tenacity +pyjwt +psutil +pydantic +plotly +torch +pandas +numpy +matplotlib diff --git a/tests/unit/test_simvue.py b/tests/unit/test_simvue.py index b507b79b..50e91181 100644 --- a/tests/unit/test_simvue.py +++ b/tests/unit/test_simvue.py @@ -1,6 +1,12 @@ import os from simvue import Run +from simvue.serialization import Serializer, Deserializer import pytest +import numpy as np +import torch +import plotly +import matplotlib.pyplot as plt +import pandas as pd def test_suppress_errors(): """ @@ -68,3 +74,101 @@ def test_run_init_folder(): assert exc_info.match(r"string does not match regex") +def test_numpy_array_mime_type(): + """ + Check that the mimetype for numpy arrays is correct + """ + array = np.array([1, 2, 3, 4, 5]) + _, mime_type = Serializer().serialize(array) + + assert (mime_type == 'application/vnd.simvue.numpy.v1') + +def test_pytorch_tensor_mime_type(): + """ + Check that a PyTorch tensor has the correct mime-type + """ + torch.manual_seed(1724) + array = torch.rand(2, 3) + _, mime_type = Serializer().serialize(array) + + assert (mime_type == 'application/vnd.simvue.torch.v1') + +def test_matplotlib_figure_mime_type(): + """ + Check that a matplotlib figure has the correct mime-type + """ + plt.plot([1, 2, 3, 4]) + figure = plt.gcf() + + _, mime_type = Serializer().serialize(figure) + + assert (mime_type == 'application/vnd.plotly.v1+json') + +def test_plotly_figure_mime_type(): + """ + Check that a plotly figure has the correct mime-type + """ + plt.plot([1, 2, 3, 4]) + figure = plt.gcf() + plotly_figure = plotly.tools.mpl_to_plotly(figure) + + _, mime_type = Serializer().serialize(plotly_figure) + + assert (mime_type == 'application/vnd.plotly.v1+json') + +def test_numpy_array_serialization(): + """ + Check that a numpy array can be serialized then deserialized successfully + """ + array = np.array([1, 2, 3, 4, 5]) + + serialized, mime_type = Serializer().serialize(array) + array_out = Deserializer().deserialize(serialized, mime_type) + + assert (array == array_out).all() + +def test_pytorch_tensor_serialization(): + """ + Check that a PyTorch tensor can be serialized then deserialized successfully + """ + torch.manual_seed(1724) + array = torch.rand(2, 3) + + serialized, mime_type = Serializer().serialize(array) + array_out = Deserializer().deserialize(serialized, mime_type) + + assert (array == array_out).all() + +def test_pickle_serialization(): + """ + Check that a dictionary can be serialized then deserialized successfully + """ + data = {'a': 1.0, 'b': 'test'} + + serialized, mime_type = Serializer().serialize(data, allow_pickle=True) + data_out = Deserializer().deserialize(serialized, mime_type, allow_pickle=True) + + assert (data == data_out) + +def test_pandas_dataframe_mimetype(): + """ + Check that the mime-type of a Pandas dataframe is correct + """ + data = {'col1': [1, 2], 'col2': [3, 4]} + df = pd.DataFrame(data=data) + + _, mime_type = Serializer().serialize(df) + + assert (mime_type == 'application/vnd.simvue.df.v1') + +def test_pandas_dataframe_serialization(): + """ + Check that a Pandas dataframe can be serialized then deserialized successfully + """ + data = {'col1': [1, 2], 'col2': [3, 4]} + df = pd.DataFrame(data=data) + + serialized, mime_type = Serializer().serialize(df) + df_out = Deserializer().deserialize(serialized, mime_type) + + assert (df.equals(df_out))