From 6a36f2178c21c53329110b37233da1630016d796 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Thu, 8 Aug 2019 15:01:23 +0200 Subject: [PATCH 1/3] Fix: ensure dataset file paths don't depend on the CWD of the renku command --- renku/models/datasets.py | 15 +++++++++++++++ tests/cli/test_datasets.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/renku/models/datasets.py b/renku/models/datasets.py index 2382b32778..e3c6fe2d62 100644 --- a/renku/models/datasets.py +++ b/renku/models/datasets.py @@ -465,3 +465,18 @@ def __attrs_post_init__(self): if not self.path: self.path = str(self.client.renku_datasets_path / str(self.uid)) + + if self.files: + for datasetfile in self.files: + if datasetfile.client is None: + try: + client, _, _ = self.client.resolve_in_submodules( + self.client.find_previous_commit( + datasetfile.path, + revision='HEAD'), + datasetfile.path, + ) + except KeyError: + client = self.client + + datasetfile.client = client diff --git a/tests/cli/test_datasets.py b/tests/cli/test_datasets.py index b07df83b72..a088ad72a5 100644 --- a/tests/cli/test_datasets.py +++ b/tests/cli/test_datasets.py @@ -440,6 +440,39 @@ def test_dataset_add_with_copy(tmpdir, runner, project, client): assert inode not in original_inodes +def test_dataset_file_path_from_subdirectory(runner, project, client): + """Test adding a file into a dataset and check path independent + of the CWD """ + # create a dataset + result = runner.invoke(cli.cli, ['dataset', 'create', 'dataset']) + assert 0 == result.exit_code + assert 'OK' in result.output + + with (client.path / 'a').open('w') as fp: + fp.write('a') + + client.repo.git.add('a') + client.repo.git.commit(message='Added file a') + + # add data + result = runner.invoke( + cli.cli, + ['dataset', 'add', 'dataset', 'a'], + catch_exceptions=False, + ) + assert 0 == result.exit_code + + with client.with_dataset('dataset') as dataset: + datasetfile = dataset.find_file('a') + assert datasetfile + + assert datasetfile.full_path == client.path / 'a' + + os.chdir('./data') + + assert datasetfile.full_path == client.path / 'a' + + def test_datasets_ls_files_tabular_empty(runner, project): """Test listing of data within empty dataset.""" # create a dataset From 47fd6073411425b0d977431adc2e091f04711016 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Thu, 8 Aug 2019 16:14:30 +0200 Subject: [PATCH 2/3] Fix: changes double quotes to single quotes to make pytest pass --- renku/cli/_format/graph.py | 2 +- renku/models/datasets.py | 6 +++--- renku/models/provenance/agents.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/renku/cli/_format/graph.py b/renku/cli/_format/graph.py index a49c44f093..e7f6115a9b 100644 --- a/renku/cli/_format/graph.py +++ b/renku/cli/_format/graph.py @@ -271,7 +271,7 @@ def color(p): fields[sn].add((qname(p, g), formatliteral(o, g))) for u, n in nodes.items(): - stream.write(u"# %s %s\n" % (u, n)) + stream.write(u'# %s %s\n' % (u, n)) f = [ '%s' '%s' % x for x in sorted(types[n]) diff --git a/renku/models/datasets.py b/renku/models/datasets.py index e3c6fe2d62..d8cdbbcbfe 100644 --- a/renku/models/datasets.py +++ b/renku/models/datasets.py @@ -88,7 +88,7 @@ def short_name(self): def check_email(self, attribute, value): """Check that the email is valid.""" if self.email and not ( - isinstance(value, str) and re.match(r"[^@]+@[^@]+\.[^@]+", value) + isinstance(value, str) and re.match(r'[^@]+@[^@]+\.[^@]+', value) ): raise ValueError('Email address is invalid.') @@ -472,8 +472,8 @@ def __attrs_post_init__(self): try: client, _, _ = self.client.resolve_in_submodules( self.client.find_previous_commit( - datasetfile.path, - revision='HEAD'), + datasetfile.path, revision='HEAD' + ), datasetfile.path, ) except KeyError: diff --git a/renku/models/provenance/agents.py b/renku/models/provenance/agents.py index fba38d14cb..c6ee2c7f15 100644 --- a/renku/models/provenance/agents.py +++ b/renku/models/provenance/agents.py @@ -54,7 +54,7 @@ def default_id(self): @email.validator def check_email(self, attribute, value): """Check that the email is valid.""" - if not (isinstance(value, str) and re.match(r"[^@]+@[^@]+", value)): + if not (isinstance(value, str) and re.match(r'[^@]+@[^@]+', value)): raise ValueError('Email address "{0}" is invalid.'.format(value)) @classmethod From 515a9ed02ef2b4cc2a2d68ba279b54b48afb140b Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Tue, 13 Aug 2019 10:23:25 +0200 Subject: [PATCH 3/3] Fixes path inconsistencies for dataset files when linked from an external github repository --- renku/api/datasets.py | 7 ++++++- renku/models/datasets.py | 15 ++++++--------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/renku/api/datasets.py b/renku/api/datasets.py index eacb30a3c6..91a87123a7 100644 --- a/renku/api/datasets.py +++ b/renku/api/datasets.py @@ -187,7 +187,12 @@ def add_data_to_dataset( # Generate the DatasetFiles dataset_files = [] for data in files: - dataset_files.append(DatasetFile.from_revision(self, **data)) + datasetfile = DatasetFile.from_revision(self, **data) + + # Set dataset file path relative to projects root for submodules + if datasetfile.client != self: + datasetfile.path = str(data['path']) + dataset_files.append(datasetfile) dataset.update_files(dataset_files) def _add_from_url(self, dataset, path, url, link=False, **kwargs): diff --git a/renku/models/datasets.py b/renku/models/datasets.py index d8cdbbcbfe..8fc7aa0967 100644 --- a/renku/models/datasets.py +++ b/renku/models/datasets.py @@ -469,14 +469,11 @@ def __attrs_post_init__(self): if self.files: for datasetfile in self.files: if datasetfile.client is None: - try: - client, _, _ = self.client.resolve_in_submodules( - self.client.find_previous_commit( - datasetfile.path, revision='HEAD' - ), - datasetfile.path, - ) - except KeyError: - client = self.client + client, _, _ = self.client.resolve_in_submodules( + self.client.find_previous_commit( + datasetfile.path, revision='HEAD' + ), + datasetfile.path, + ) datasetfile.client = client