Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 82 additions & 93 deletions validphys2/src/validphys/uploadutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

Tools to upload resources to remote servers.
"""
import time
import subprocess
import logging
import os
Expand Down Expand Up @@ -172,65 +173,46 @@ def upload_context(self, output_and_file):
class ReportFileUploader(FileUploader, ReportUploader):
pass

class FitUploader(FileUploader):
"""An uploader for fits. Fits will be automatically compressed
before uploading."""
target_dir = _profile_key('fits_target_dir')
root_url = _profile_key('fits_root_url')

class ArchiveUploader(FileUploader):
""" Uploader for objects comprising many files such as fits or PDFs """
target_dir = None
root_url = None
_loader_name = None # vp loader for this kind of archive
_resource_type = "Archive" # name used during logging

def get_relative_path(self, output_path=None):
return ''

def check_fit_exists(self, fit_name):
"""Check whether the fit already exists on the server."""
# Get list of the available fits on the server
def _check_existence(self, resource_name):
""" Check whether the given resource exists on the server.
Returns true if the resource exists with the same name on the server
or false otherwise.
Note that the type of resource being checked is defined by the ``_loader_name`` attribute
"""
l = RemoteLoader()
fits = l.downloadable_fits
resource_list = getattr(l, self._loader_name)

if fit_name in fits:
log.error("A fit with the same name already exists on "
"the server. To overwrite this fit use the "
"--force flag, as in `vp-upload <fitname> "
"--force`.")
raise UploadError

def check_fit_md5(self, output_path):
"""When ``vp-setupfit`` is successfully ran, it creates an ``md5`` from
the config. We check that the ``md5`` matches the ``filter.yml`` which
is checking that ``vp-setupfit`` was ran and that the ``filter.yml``
inside the fit folder wasn't modified.
return resource_name in resource_list

def _check_is_indexed(self, resource_name):
""" Check whether the fit is correctly indexed in the server
"""
md5_path = output_path / "md5"
try:
with open(md5_path, "r") as f:
saved_md5 = f.read()
except FileNotFoundError as e:
log.error(
"It doesn't appear that `vp-setupfit` was ran because no `md5` "
"was found, `vp-setupfit` should be ran before uploading a fit."
)
raise UploadError(f"Fit MD5 file not found at {md5_path}") from e

with open(output_path / "filter.yml", "rb") as f:
hashed_config = hashlib.md5(f.read()).hexdigest()

if hashed_config != saved_md5:
log.error(
"Saved md5 doesn't match saved fit configuration runcard, which "
"suggests that the configuration file was modified after it was "
"saved. <fit folder>/filter.yml shouldn't be modified directly. "
"Instead modify the fit runcard and re-run ``vp-setupfit``."
)
raise UploadError

def compress(self, output_path):
log.info("Checking whether %s was correctly uploaded...", resource_name)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason why you don't use an f-string here and in one other place below? Personally I tend to find them a bit easier to read

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because has bullied me for years.
In reality I guess if you are ever bottle-necked by the writing of the logging then you have bigger problems than that... tbh I don't mind either way.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that you should really use this format in logging right? pylint complains if you don't, something about lazy formatting so I guess this gets resolved later than the f-string?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's the case. But tbh I don't think "compiling" a string will ever be a bottleneck of any kind...

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure sure, I was just saying I think we should follow the "rules"

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the more valid concern is that logging has the very theoretical capability of doing things with the args other than printing them. But it is also unlikely to matter.

time.sleep(3)
if self._check_existence(resource_name):
log.info("It has been correctly indexed by the server!")
else:
log.error("The object is uploaded but hasn't been indexed yet by the server. "
"You should upload it again to ensure it is indexed: vp-upload %s", resource_name)

def _compress(self, output_path):
"""Compress the folder and put in in a directory inside its parent."""
#make_archive fails if we give it relative paths for some reason
output_path = output_path.resolve()
tempdir = tempfile.mkdtemp(prefix='fit_upload_deleteme_',
tempdir = tempfile.mkdtemp(prefix=f'{self._resource_type}_upload_deleteme_',
dir=output_path.parent)
log.info(f"Compressing fit to {tempdir}")
log.info(f"Compressing {self._resource_type} to {tempdir}")
archive_path_without_extension = pathlib.Path(tempdir)/(output_path.name)
try:
with Spinner():
Expand All @@ -242,19 +224,24 @@ def compress(self, output_path):
raise UploadError(e) from e
return tempdir, archive_path_without_extension


def upload_output(self, output_path, force):
output_path = pathlib.Path(output_path)
fit_name = output_path.name

if not force:
self.check_fit_exists(fit_name)

self.check_fit_md5(output_path)

new_out, name = self.compress(output_path)
if self._check_existence(fit_name):
log.error("A %s with the same name already exists on "
"the server. To overwrite it use the "
"--force flag, as in `vp-upload <%s_name> --force.",
self._resource_type, self._resource_type)
raise UploadError

new_out, name = self._compress(output_path)
super().upload_output(new_out)

# Check whether the fit was really uploaded
self._check_is_indexed(fit_name)

shutil.rmtree(new_out)
return name.with_suffix('.tar.gz').name

Expand All @@ -274,56 +261,58 @@ def upload_or_exit_context(self, output, force):
log.error(e)
sys.exit()

class PDFUploader(FitUploader):
"""An uploader for PDFs. PDFs will be automatically compressed
before uploading."""
target_dir = _profile_key('pdfs_target_dir')
root_url = _profile_key('pdfs_root_url')

def check_pdf_exists(self, pdf_name):
"""Check whether the pdf already exists on the server."""
# Get list of the available fits on the server
l = RemoteLoader()
pdfs = l.downloadable_pdfs
class FitUploader(ArchiveUploader):
"""An uploader for fits. Fits will be automatically compressed
before uploading."""
target_dir = _profile_key('fits_target_dir')
root_url = _profile_key('fits_root_url')
_loader_name = "downloadable_fits"
_resource_type = "fit"

if pdf_name in pdfs:
log.error("A PDF with the same name already exists on "
"the server. To overwrite this PDF use the "
"--force flag, as in `vp-upload <pdfname> "
"--force`.")
raise UploadError
def check_fit_md5(self, output_path):
"""When ``vp-setupfit`` is run successfully, it creates an ``md5`` from
the config. We check that the ``md5`` matches the ``filter.yml`` which
is checking that ``vp-setupfit`` was run and that the ``filter.yml``
inside the fit folder wasn't modified.

def compress(self, output_path):
"""Compress the folder and put it in a directory inside its parent."""
# make_archive fails if we give it relative paths for some reason
output_path = output_path.resolve()
tempdir = tempfile.mkdtemp(prefix='pdf_upload_deleteme_',
dir=output_path.parent)
log.info(f"Compressing pdf to {tempdir}")
archive_path_without_extension = pathlib.Path(tempdir)/(output_path.name)
"""
md5_path = output_path / "md5"
try:
with Spinner():
shutil.make_archive(base_name=archive_path_without_extension,
format='gztar',
root_dir=output_path.parent, base_dir=output_path.name)
except Exception as e:
log.error(f"Couldn't compress archive: {e}")
raise UploadError(e) from e
return tempdir, archive_path_without_extension
with open(md5_path, "r") as f:
saved_md5 = f.read()
except FileNotFoundError as e:
log.error(
"It doesn't appear that `vp-setupfit` was run because no `md5` "
"was found, `vp-setupfit` should be run before uploading a fit."
)
raise UploadError(f"Fit MD5 file not found at {md5_path}") from e

with open(output_path / "filter.yml", "rb") as f:
hashed_config = hashlib.md5(f.read()).hexdigest()

if hashed_config != saved_md5:
log.error(
"Saved md5 doesn't match saved fit configuration runcard, which "
"suggests that the configuration file was modified after it was "
"saved. <fit folder>/filter.yml shouldn't be modified directly. "
"Instead modify the fit runcard and re-run ``vp-setupfit``."
)
raise UploadError

def upload_output(self, output_path, force):
output_path = pathlib.Path(output_path)
pdf_name = output_path.name

if not force:
self.check_pdf_exists(pdf_name)
self.check_fit_md5(output_path)
return super().upload_output(output_path, force)

new_out, name = self.compress(output_path)
super(FileUploader, self).upload_output(new_out)

shutil.rmtree(new_out)
return name.with_suffix('.tar.gz').name
class PDFUploader(ArchiveUploader):
"""An uploader for PDFs. PDFs will be automatically compressed
before uploading."""
target_dir = _profile_key('pdfs_target_dir')
root_url = _profile_key('pdfs_root_url')
_loader_name = "downloadable_pdfs"
_resource_type = "pdf"


def check_for_meta(path):
Expand Down