From 93a87f86228817c7fee4a3811d073ef6993e14f7 Mon Sep 17 00:00:00 2001 From: Andreas Bleuler Date: Fri, 28 Jun 2019 19:51:44 +0200 Subject: [PATCH 1/2] fix(cli): allow renku commands with many inputs Note: This commit fixes only the case where a renku run command implicitly relies on many arguments which are NOT already expanded in the command line. Closes #552. --- renku/api/storage.py | 27 +++++++++++++++++++++------ tests/test_cli.py | 14 ++++++++++++++ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/renku/api/storage.py b/renku/api/storage.py index 83de34bc12..5db59a83ca 100644 --- a/renku/api/storage.py +++ b/renku/api/storage.py @@ -31,6 +31,10 @@ HAS_LFS = call(['git', 'lfs'], stdout=PIPE, stderr=STDOUT) == 0 +# Batch size for when renku is expanding a large list +# of files into an argument string. +ARGUMENT_BATCH_SIZE = 100 + @attr.s class StorageApiMixin(RepositoryApiMixin): @@ -105,6 +109,7 @@ def untrack_paths_from_storage(self, *paths): def pull_paths_from_storage(self, *paths): """Pull paths from LFS.""" + import math if self.use_external_storage and self.external_storage_installed: client_dict = defaultdict(list) @@ -115,12 +120,22 @@ def pull_paths_from_storage(self, *paths): client_dict[client.path].append(str(path)) for client_path, paths in client_dict.items(): - run( - self._CMD_STORAGE_PULL + [shlex.quote(','.join(paths))], - cwd=str(client_path.absolute()), - stdout=PIPE, - stderr=STDOUT, - ) + for ibatch in range( + math.ceil(len(paths) / ARGUMENT_BATCH_SIZE) + ): + run( + self._CMD_STORAGE_PULL + [ + shlex.quote( + ','.join( + paths[ibatch * ARGUMENT_BATCH_SIZE: + (ibatch + 1) * ARGUMENT_BATCH_SIZE] + ) + ) + ], + cwd=str(client_path.absolute()), + stdout=PIPE, + stderr=STDOUT, + ) elif self.use_external_storage: raise errors.ExternalStorageNotInstalled(self.repo) diff --git a/tests/test_cli.py b/tests/test_cli.py index a7e69f1d96..7b4cb71ff9 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -103,6 +103,20 @@ def test_run_simple(runner, project): assert '.renku/workflow/' in result.output +def test_run_many_args(client, run): + """Test a renku run command which implicitly relies on many inputs.""" + + os.mkdir('files') + output = 'output.txt' + for i in range(5003): + os.system('touch files/{}.txt'.format(i)) + client.repo.index.add(['files/']) + client.repo.index.commit('add many files') + + exit_code = run(args=('run', 'ls', 'files/'), stdout=output) + assert 0 == exit_code + + _CMD_EXIT_2 = ['bash', '-c', 'exit 2'] From d938423c0119b463e96fa7cb399a314b59d8ab15 Mon Sep 17 00:00:00 2001 From: Andreas Bleuler Date: Tue, 9 Jul 2019 16:11:45 +0200 Subject: [PATCH 2/2] fix(tests): fix failing OSX tests --- renku/api/client.py | 1 - renku/models/_datastructures.py | 1 - 2 files changed, 2 deletions(-) diff --git a/renku/api/client.py b/renku/api/client.py index b2df23be32..5097d75c00 100644 --- a/renku/api/client.py +++ b/renku/api/client.py @@ -33,7 +33,6 @@ class LocalClient( """A low-level client for communicating with a local Renku repository. Example: - >>> import renku >>> client = renku.LocalClient('.') diff --git a/renku/models/_datastructures.py b/renku/models/_datastructures.py index e04d68b76b..14e7d0606b 100644 --- a/renku/models/_datastructures.py +++ b/renku/models/_datastructures.py @@ -94,7 +94,6 @@ class IndexedList(list): """List allowing to query items by id or by named index. Example: - >>> from collections import namedtuple >>> Item = namedtuple('Item', 'key, value') >>> items = IndexedList(Item('a', 1), Item('b', 2), attr='key')