diff --git a/renku/api/storage.py b/renku/api/storage.py index 83de34bc12..5db59a83ca 100644 --- a/renku/api/storage.py +++ b/renku/api/storage.py @@ -31,6 +31,10 @@ HAS_LFS = call(['git', 'lfs'], stdout=PIPE, stderr=STDOUT) == 0 +# Batch size for when renku is expanding a large list +# of files into an argument string. +ARGUMENT_BATCH_SIZE = 100 + @attr.s class StorageApiMixin(RepositoryApiMixin): @@ -105,6 +109,7 @@ def untrack_paths_from_storage(self, *paths): def pull_paths_from_storage(self, *paths): """Pull paths from LFS.""" + import math if self.use_external_storage and self.external_storage_installed: client_dict = defaultdict(list) @@ -115,12 +120,22 @@ def pull_paths_from_storage(self, *paths): client_dict[client.path].append(str(path)) for client_path, paths in client_dict.items(): - run( - self._CMD_STORAGE_PULL + [shlex.quote(','.join(paths))], - cwd=str(client_path.absolute()), - stdout=PIPE, - stderr=STDOUT, - ) + for ibatch in range( + math.ceil(len(paths) / ARGUMENT_BATCH_SIZE) + ): + run( + self._CMD_STORAGE_PULL + [ + shlex.quote( + ','.join( + paths[ibatch * ARGUMENT_BATCH_SIZE: + (ibatch + 1) * ARGUMENT_BATCH_SIZE] + ) + ) + ], + cwd=str(client_path.absolute()), + stdout=PIPE, + stderr=STDOUT, + ) elif self.use_external_storage: raise errors.ExternalStorageNotInstalled(self.repo) diff --git a/tests/test_cli.py b/tests/test_cli.py index 635af28704..a8f7d7a49a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -103,6 +103,20 @@ def test_run_simple(runner, project): assert '.renku/workflow/' in result.output +def test_run_many_args(client, run): + """Test a renku run command which implicitly relies on many inputs.""" + + os.mkdir('files') + output = 'output.txt' + for i in range(5003): + os.system('touch files/{}.txt'.format(i)) + client.repo.index.add(['files/']) + client.repo.index.commit('add many files') + + exit_code = run(args=('run', 'ls', 'files/'), stdout=output) + assert 0 == exit_code + + _CMD_EXIT_2 = ['bash', '-c', 'exit 2']