From 07bf072a31c3256f4402c97b9e68382a4190a917 Mon Sep 17 00:00:00 2001 From: Jon Masukawa Date: Mon, 13 Apr 2026 23:16:40 +0000 Subject: [PATCH 1/2] Also support absolute filepath on image uploads for datasets metadata --update --- docs/datasets_metadata.md | 16 ++++++++++++++-- src/kaggle/api/kaggle_api_extended.py | 15 ++++++++++----- src/kaggle/cli.py | 24 +++++++++++++++++++++--- 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/docs/datasets_metadata.md b/docs/datasets_metadata.md index 6af8c6aa..cb2ec3b8 100644 --- a/docs/datasets_metadata.md +++ b/docs/datasets_metadata.md @@ -176,9 +176,13 @@ You can specify the following values for `expectedUpdateFrequency`: * `hourly` ## Images -You can update your dataset image by providing a relative path from your `datasets-metadata.json` to an image file, using the `image` property. +You can update your dataset image using the `image` property. The value for this property should be either: +- A relative path from your `datasets-metadata.json` to an image file +- An absolute path to an image file + - Ensure that `kaggle-cli` has access to the directory and image file -For example, if your metadata file and image are located at: +### Specifying an image with a relative path: +If your metadata file and image are located at: - `/some/path/dataset-metadata.json` - `/some/path/image.png` @@ -196,6 +200,14 @@ This property should be specified as: "image": "alternative/path/to/other-image.jpg" ``` +### Specifying an image with an absolute path: +Note that this particular syntax deviates from the [Data Package spec](https://specs.frictionlessdata.io/data-package/#image). It can be used to avoid a scenario where your image would be included in your dataset files when using `kaggle datasets create --dir-mode` with a value of `zip` or `tar`. + +Simply use an absolute path to the image in your `datasets-metadata.json`: +``` +"image": "/absolute/file/path/to/image.png" +``` + ### Supported image file types and expected dimensions The following file types are supported: diff --git a/src/kaggle/api/kaggle_api_extended.py b/src/kaggle/api/kaggle_api_extended.py index 78124f86..b1614d99 100644 --- a/src/kaggle/api/kaggle_api_extended.py +++ b/src/kaggle/api/kaggle_api_extended.py @@ -1941,9 +1941,11 @@ def dataset_metadata_update(self, dataset, path): if expected_update_frequency: update_settings.expected_update_frequency = expected_update_frequency - effective_relative_path_to_image = metadata.get("image") - if effective_relative_path_to_image: - cropped_image_upload = self._upload_dataset_image_file(effective_path, effective_relative_path_to_image) + relative_or_absolute_image_file_path = metadata.get("image") + if relative_or_absolute_image_file_path: + cropped_image_upload = self._upload_dataset_image_file( + effective_path, relative_or_absolute_image_file_path + ) if cropped_image_upload: update_settings.image = cropped_image_upload @@ -1958,9 +1960,12 @@ def dataset_metadata_update(self, dataset, path): exit(1) def _upload_dataset_image_file( - self, metadata_file_path, relative_image_file_path, quiet=False + self, metadata_file_path, relative_or_absolute_image_file_path, quiet=False ) -> CroppedImageUpload: - image_full_path = os.path.join(metadata_file_path, relative_image_file_path) + if os.path.isabs(relative_or_absolute_image_file_path): + image_full_path = relative_or_absolute_image_file_path + else: + image_full_path = os.path.join(metadata_file_path, relative_or_absolute_image_file_path) ext = Path(image_full_path).suffix if ext not in [".jpg", ".jpeg", ".png", ".webp"]: raise ValueError("Image file requires an extension of .jpg, .jpeg, .png, or .webp: %s" % image_full_path) diff --git a/src/kaggle/cli.py b/src/kaggle/cli.py index 5a4567c0..51249e73 100644 --- a/src/kaggle/cli.py +++ b/src/kaggle/cli.py @@ -1015,9 +1015,27 @@ def parse_benchmark_tasks(subparsers) -> None: ) parser_run_optional = parser_run._action_groups.pop() parser_run_optional.add_argument("task", help=Help.param_benchmarks_task) - parser_run_optional.add_argument("-m", "--model", dest="model", nargs="+", required=False, help=Help.param_benchmarks_model) - parser_run_optional.add_argument("--wait", dest="wait", type=int, nargs="?", const=0, default=None, required=False, help=Help.param_benchmarks_wait) - parser_run_optional.add_argument("--poll-interval", dest="poll_interval", type=int, default=10, required=False, help=Help.param_benchmarks_poll_interval) + parser_run_optional.add_argument( + "-m", "--model", dest="model", nargs="+", required=False, help=Help.param_benchmarks_model + ) + parser_run_optional.add_argument( + "--wait", + dest="wait", + type=int, + nargs="?", + const=0, + default=None, + required=False, + help=Help.param_benchmarks_wait, + ) + parser_run_optional.add_argument( + "--poll-interval", + dest="poll_interval", + type=int, + default=10, + required=False, + help=Help.param_benchmarks_poll_interval, + ) parser_run._action_groups.append(parser_run_optional) parser_run.set_defaults(func=api.benchmarks_tasks_run_cli) From ed20d6cd497d5bb72f8a8145d113f1ee8e705095 Mon Sep 17 00:00:00 2001 From: Jon Masukawa Date: Tue, 14 Apr 2026 23:44:37 +0000 Subject: [PATCH 2/2] PR Feedback: Use canonical named file instead of path. --- docs/datasets_metadata.md | 32 +++++++++------------------ src/kaggle/api/kaggle_api_extended.py | 26 +++++++++++++--------- 2 files changed, 25 insertions(+), 33 deletions(-) diff --git a/docs/datasets_metadata.md b/docs/datasets_metadata.md index cb2ec3b8..75feac5d 100644 --- a/docs/datasets_metadata.md +++ b/docs/datasets_metadata.md @@ -176,36 +176,24 @@ You can specify the following values for `expectedUpdateFrequency`: * `hourly` ## Images -You can update your dataset image using the `image` property. The value for this property should be either: -- A relative path from your `datasets-metadata.json` to an image file -- An absolute path to an image file - - Ensure that `kaggle-cli` has access to the directory and image file +The recommended way to update your dataset's image is by placing a file named `dataset-cover-image.png` (or `.jpg`, `.jpeg`, `.webp`), as a sibling file to your `datasets-metadata.json`. -### Specifying an image with a relative path: -If your metadata file and image are located at: +Example: - `/some/path/dataset-metadata.json` -- `/some/path/image.png` +- `/some/path/dataset-cover-image.png` -This property should be specified as: -``` -"image": "image.png" -``` +The image file will only be used for dataset metadata, and not be uploaded as a file within your dataset. + +### Specifying an image with a relative path +As an alternative, you can update your dataset image by providing a relative path from your `datasets-metadata.json` to an image file, using the `image` property. -If instead, your files were located at: +If your files were located at: - `/some/path/dataset-metadata.json` -- `/some/path/alternative/path/to/other-image.jpg` +- `/some/path/to/my-image.jpg` This property should be specified as: ``` -"image": "alternative/path/to/other-image.jpg" -``` - -### Specifying an image with an absolute path: -Note that this particular syntax deviates from the [Data Package spec](https://specs.frictionlessdata.io/data-package/#image). It can be used to avoid a scenario where your image would be included in your dataset files when using `kaggle datasets create --dir-mode` with a value of `zip` or `tar`. - -Simply use an absolute path to the image in your `datasets-metadata.json`: -``` -"image": "/absolute/file/path/to/image.png" +"image": "to/my-image.jpg" ``` ### Supported image file types and expected dimensions diff --git a/src/kaggle/api/kaggle_api_extended.py b/src/kaggle/api/kaggle_api_extended.py index b1614d99..56854412 100644 --- a/src/kaggle/api/kaggle_api_extended.py +++ b/src/kaggle/api/kaggle_api_extended.py @@ -522,6 +522,8 @@ class KaggleApi: HEADER_API_VERSION = "X-Kaggle-ApiVersion" DATASET_METADATA_FILE = "dataset-metadata.json" OLD_DATASET_METADATA_FILE = "datapackage.json" + DATASET_COVER_IMAGE_SUPPORTED_EXTENSIONS = [".png", ".jpg", ".jpeg", ".webp"] + DATASET_COVER_IMAGE_FILES = ["dataset-cover-image" + ext for ext in DATASET_COVER_IMAGE_SUPPORTED_EXTENSIONS] KERNEL_METADATA_FILE = "kernel-metadata.json" MODEL_METADATA_FILE = "model-metadata.json" MODEL_INSTANCE_METADATA_FILE = "model-instance-metadata.json" @@ -1941,11 +1943,15 @@ def dataset_metadata_update(self, dataset, path): if expected_update_frequency: update_settings.expected_update_frequency = expected_update_frequency - relative_or_absolute_image_file_path = metadata.get("image") - if relative_or_absolute_image_file_path: - cropped_image_upload = self._upload_dataset_image_file( - effective_path, relative_or_absolute_image_file_path - ) + effective_relative_path_to_image = metadata.get("image") + if not effective_relative_path_to_image: + # If user did not specify an image path explicitly, check if canonical images exist as siblings to dataset-metadata.json. + for canonical_image_filename in self.DATASET_COVER_IMAGE_FILES: + canonical_image_full_path = os.path.join(effective_path, canonical_image_filename) + if os.path.exists(canonical_image_full_path): + effective_relative_path_to_image = canonical_image_filename + if effective_relative_path_to_image: + cropped_image_upload = self._upload_dataset_image_file(effective_path, effective_relative_path_to_image) if cropped_image_upload: update_settings.image = cropped_image_upload @@ -1960,14 +1966,11 @@ def dataset_metadata_update(self, dataset, path): exit(1) def _upload_dataset_image_file( - self, metadata_file_path, relative_or_absolute_image_file_path, quiet=False + self, metadata_file_path, relative_image_file_path, quiet=False ) -> CroppedImageUpload: - if os.path.isabs(relative_or_absolute_image_file_path): - image_full_path = relative_or_absolute_image_file_path - else: - image_full_path = os.path.join(metadata_file_path, relative_or_absolute_image_file_path) + image_full_path = os.path.join(metadata_file_path, relative_image_file_path) ext = Path(image_full_path).suffix - if ext not in [".jpg", ".jpeg", ".png", ".webp"]: + if ext not in self.DATASET_COVER_IMAGE_SUPPORTED_EXTENSIONS: raise ValueError("Image file requires an extension of .jpg, .jpeg, .png, or .webp: %s" % image_full_path) if not os.path.isfile(image_full_path): @@ -4924,6 +4927,7 @@ def upload_files( if file_name in [ self.DATASET_METADATA_FILE, self.OLD_DATASET_METADATA_FILE, + *self.DATASET_COVER_IMAGE_FILES, self.KERNEL_METADATA_FILE, self.MODEL_METADATA_FILE, self.MODEL_INSTANCE_METADATA_FILE,