diff --git a/README.md b/README.md index 16cfc00..199a53b 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,10 @@ and end with an image number starting from 0, such as `random_image_0.jpg`, `--size` flag displays information on the images, such as the size of the first image and the size of the overall directory. +Note that for creating a very large number of images, systems can easily run out +of memory. In this case, increase the `--chunksize` to reduce the amount of +memory allocated by each multiprocessing pool. + ### TFRecords TFRecords can also be easily generated using the application. This command expects images to be pre-loaded to be used as the basis for the TFRecord files. diff --git a/imagine/imagine.py b/imagine/imagine.py index 2543239..e226b2c 100755 --- a/imagine/imagine.py +++ b/imagine/imagine.py @@ -72,7 +72,9 @@ def check_directory_exists(directory): @click.option('--image_format', default='png', required=True) @click.option('--seed', default=0) @click.option('--size', is_flag=True, default=False) -def create_images(path, name, width, height, count, image_format, seed, size): +@click.option('--chunksize', default=64) +def create_images(path, name, width, height, count, image_format, seed, size, + chunksize): click.echo("Creating {} {} files located at {} of {}x{} resolution with a " "base filename of {}".format(count, image_format, path, width, height, name)) @@ -84,9 +86,13 @@ def create_images(path, name, width, height, count, image_format, seed, size): pool = Pool() try: start_time = perf_counter() + # NOTE: For very large image counts on memory-constrained systems, this + # can stall-out. Either reduce the image count request, or increase the + # chunk size. pool.starmap(image_creation, ((combined_path, width, height, seed, image_format, n) - for n in range(count))) + for n in range(count)), + chunksize=chunksize) finally: pool.close() pool.join()