From 0a1436bd531fa529b7cf820742765d0a7c02d0d4 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Sat, 4 Mar 2023 07:33:55 +0000 Subject: [PATCH 1/5] Support optional dataframe output for get_runs --- simvue/client.py | 10 ++++++++-- simvue/converters.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 simvue/converters.py diff --git a/simvue/client.py b/simvue/client.py index c25cc512..c2b2e7e1 100644 --- a/simvue/client.py +++ b/simvue/client.py @@ -5,6 +5,7 @@ from .serialization import Deserializer from .utilities import get_auth +from .converters import to_dataframe CONCURRENT_DOWNLOADS = 10 DOWNLOAD_CHUNK_SIZE = 8192 @@ -57,7 +58,7 @@ def get_run(self, run, system=False, tags=False, metadata=False): return None - def get_runs(self, filters, system=False, tags=False, metadata=False): + def get_runs(self, filters, system=False, tags=False, metadata=False, format='dict'): """ Get runs """ @@ -73,7 +74,12 @@ def get_runs(self, filters, system=False, tags=False, metadata=False): return None if response.status_code == 200: - return response.json() + if format == 'dict': + return response.json() + elif format == 'dataframe': + return to_dataframe(response.json()) + else: + return None return None diff --git a/simvue/converters.py b/simvue/converters.py new file mode 100644 index 00000000..eb84a0a7 --- /dev/null +++ b/simvue/converters.py @@ -0,0 +1,33 @@ +def to_dataframe(data): + """ + Convert runs to dataframe + """ + import pandas as pd + + columns = {} + for run in data: + for item in ('name', 'status', 'folder', 'created'): + if item not in columns: + columns[item] = [] + columns[item].append(run[item]) + + if 'system' in run: + for section in run['system']: + if section in ('cpu', 'gpu', 'platform'): + for item in run['system'][section]: + if 'system.%s.%s' % (section, item) not in columns: + columns['system.%s.%s' % (section, item)] = [] + columns['system.%s.%s' % (section, item)].append(run['system'][section][item]) + else: + if 'system.%s' % section not in columns: + columns['system.%s' % section] = [] + columns['system.%s' % section].append(run['system'][section]) + + if 'metadata' in run: + for item in run['metadata']: + if 'metadata.%s' % item not in columns: + columns['metadata.%s' % item] = [] + columns['metadata.%s' % item].append(run['metadata'][item]) + + df = pd.DataFrame(data=columns) + return df From 98bb2e4bac2d627eb738483b92f0cec925ff98f5 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Sat, 4 Mar 2023 08:09:49 +0000 Subject: [PATCH 2/5] Include start & end times --- simvue/converters.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/simvue/converters.py b/simvue/converters.py index eb84a0a7..1817d5a8 100644 --- a/simvue/converters.py +++ b/simvue/converters.py @@ -6,10 +6,13 @@ def to_dataframe(data): columns = {} for run in data: - for item in ('name', 'status', 'folder', 'created'): + for item in ('name', 'status', 'folder', 'created', 'started', 'ended'): if item not in columns: columns[item] = [] - columns[item].append(run[item]) + if item in run: + columns[item].append(run[item]) + else: + columns[item].append() if 'system' in run: for section in run['system']: From d1941ad050e842298a0681c12be063c9bc9ae574 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Sat, 4 Mar 2023 08:19:36 +0000 Subject: [PATCH 3/5] Remove line --- simvue/converters.py | 1 - 1 file changed, 1 deletion(-) diff --git a/simvue/converters.py b/simvue/converters.py index 1817d5a8..fc8fee27 100644 --- a/simvue/converters.py +++ b/simvue/converters.py @@ -3,7 +3,6 @@ def to_dataframe(data): Convert runs to dataframe """ import pandas as pd - columns = {} for run in data: for item in ('name', 'status', 'folder', 'created', 'started', 'ended'): From 9ccbd2af82aaa49356de3cc1c8198786a0e1d69e Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Sat, 4 Mar 2023 12:11:20 +0000 Subject: [PATCH 4/5] Add pandas as dependency --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4efc10c6..35038181 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ long_description_content_type="text/markdown", url="https://simvue.io", platforms=["any"], - install_requires=["dill", "requests", "msgpack", "tenacity", "pyjwt", "psutil", "pydantic", "plotly"], + install_requires=["dill", "requests", "msgpack", "tenacity", "pandas", "pyjwt", "psutil", "pydantic", "plotly"], package_dir={'': '.'}, packages=["simvue"], package_data={"": ["README.md"]}, From d7347e648a7440811ee0db6980042442a13bfc19 Mon Sep 17 00:00:00 2001 From: Andrew Lahiff Date: Sat, 4 Mar 2023 12:11:37 +0000 Subject: [PATCH 5/5] Update CHANGELOG and version --- CHANGELOG.md | 4 ++++ simvue/__init__.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index efdeb7b4..ef5e049e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Change log +## v0.11.0 + +* Support optional dataframe output from `get_runs`. + ## v0.10.1 * The worker process now no longer gives a long delay when a run has finished (now at most ~1 second). diff --git a/simvue/__init__.py b/simvue/__init__.py index 8a432658..025758ed 100644 --- a/simvue/__init__.py +++ b/simvue/__init__.py @@ -2,4 +2,4 @@ from simvue.client import Client from simvue.handler import Handler from simvue.models import RunInput -__version__ = '0.10.1' +__version__ = '0.11.0'