From 9f6447d0756881123e0cf0107ea0a3456bc91029 Mon Sep 17 00:00:00 2001 From: Robert Clark Date: Wed, 18 Nov 2020 10:49:47 -0600 Subject: [PATCH] Chunk image creation process The starmap method from the multiprocessing library allocates memory for an entire iterable. When using starmap with very long iterables, as can be the case when creating a large amount of images using Imageinary, memory-constrained systems and containers can have stalled processes which leads to failure to complete the workload. By chunking the starmap into smaller pieces, the application trades slightly more overhead and slower processing for more stable functionality on a wider array of systems. Signed-Off-By: Robert Clark --- README.md | 4 ++++ imagine/imagine.py | 10 ++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 16cfc00..199a53b 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,10 @@ and end with an image number starting from 0, such as `random_image_0.jpg`, `--size` flag displays information on the images, such as the size of the first image and the size of the overall directory. +Note that for creating a very large number of images, systems can easily run out +of memory. In this case, increase the `--chunksize` to reduce the amount of +memory allocated by each multiprocessing pool. + ### TFRecords TFRecords can also be easily generated using the application. This command expects images to be pre-loaded to be used as the basis for the TFRecord files. diff --git a/imagine/imagine.py b/imagine/imagine.py index 2543239..e226b2c 100755 --- a/imagine/imagine.py +++ b/imagine/imagine.py @@ -72,7 +72,9 @@ def check_directory_exists(directory): @click.option('--image_format', default='png', required=True) @click.option('--seed', default=0) @click.option('--size', is_flag=True, default=False) -def create_images(path, name, width, height, count, image_format, seed, size): +@click.option('--chunksize', default=64) +def create_images(path, name, width, height, count, image_format, seed, size, + chunksize): click.echo("Creating {} {} files located at {} of {}x{} resolution with a " "base filename of {}".format(count, image_format, path, width, height, name)) @@ -84,9 +86,13 @@ def create_images(path, name, width, height, count, image_format, seed, size): pool = Pool() try: start_time = perf_counter() + # NOTE: For very large image counts on memory-constrained systems, this + # can stall-out. Either reduce the image count request, or increase the + # chunk size. pool.starmap(image_creation, ((combined_path, width, height, seed, image_format, n) - for n in range(count))) + for n in range(count)), + chunksize=chunksize) finally: pool.close() pool.join()