diff --git a/CHANGELOG.md b/CHANGELOG.md index ba37f989..18838117 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,13 +5,23 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.16.8](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.8) - 2023-11-13 + +## [0.16.8](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.8) - 2023-11-16 ### Added +#### Dataset Item width and height +- Allow passing width and height to `DatasetItem` +- This is _required_ when using privacy mode + +#### Dataset Item Fetch - Added `dataset.items_and_annotation_chip_generator()` functionality to generate chips of images in s3 or locally. - Added `query` parameter for `dataset.items_and_annotation_generator()` to filter dataset items. +### Removed +- `upload_to_scale` is no longer a property in `DatasetItem`, users should instead specify `use_privacy_mode` on the dataset during creation + + ## [0.16.7](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.7) - 2023-11-03 ### Added diff --git a/nucleus/__init__.py b/nucleus/__init__.py index d0386567..c4e1a8dc 100644 --- a/nucleus/__init__.py +++ b/nucleus/__init__.py @@ -483,7 +483,13 @@ def create_dataset( }, "dataset/create", ) - return Dataset(response[DATASET_ID_KEY], self) + return Dataset( + response[DATASET_ID_KEY], + self, + name=name, + is_scene=is_scene, + use_privacy_mode=use_privacy_mode, + ) def delete_dataset(self, dataset_id: str) -> dict: """ diff --git a/nucleus/dataset.py b/nucleus/dataset.py index 07fad906..89725945 100644 --- a/nucleus/dataset.py +++ b/nucleus/dataset.py @@ -43,6 +43,7 @@ DATASET_IS_SCENE_KEY, DATASET_ITEM_IDS_KEY, DATASET_ITEMS_KEY, + DATASET_PRIVACY_MODE_KEY, DEFAULT_ANNOTATION_UPDATE_MODE, EMBEDDING_DIMENSION_KEY, EMBEDDINGS_URL_KEY, @@ -75,6 +76,7 @@ DatasetItem, check_all_paths_remote, check_for_duplicate_reference_ids, + check_items_have_dimensions, ) from .dataset_item_uploader import DatasetItemUploader from .deprecation_warning import deprecated @@ -145,12 +147,20 @@ class Dataset: existing_dataset = client.get_dataset("YOUR_DATASET_ID") """ - def __init__(self, dataset_id, client: "NucleusClient", name=None): + def __init__( + self, + dataset_id, + client: "NucleusClient", + name=None, + is_scene=None, + use_privacy_mode=None, + ): self.id = dataset_id self._client = client # NOTE: Optionally set name on creation such that the property access doesn't need to hit the server self._name = name - self._is_scene = None + self._is_scene = is_scene + self._use_privacy_mode = use_privacy_mode def __repr__(self): if os.environ.get("NUCLEUS_DEBUG", None): @@ -184,6 +194,17 @@ def is_scene(self) -> bool: self._is_scene = response return self._is_scene # type: ignore + @property + def use_privacy_mode(self) -> bool: + """Whether or not the dataset was created for privacy mode.""" + if self._use_privacy_mode is not None: + return self._use_privacy_mode + response = self._client.make_request( + {}, f"dataset/{self.id}/use_privacy_mode", requests.get + )[DATASET_PRIVACY_MODE_KEY] + self._use_privacy_mode = response + return self._use_privacy_mode # type: ignore + @property def model_runs(self) -> List[str]: """List of all model runs associated with the Dataset.""" @@ -656,6 +677,9 @@ def append( check_for_duplicate_reference_ids(dataset_items) + if self.use_privacy_mode: + check_items_have_dimensions(dataset_items) + if dataset_items and (lidar_scenes or video_scenes): raise Exception( "You must append either DatasetItems or Scenes to the dataset." diff --git a/nucleus/dataset_item.py b/nucleus/dataset_item.py index 90e2c2ef..ca7cb20d 100644 --- a/nucleus/dataset_item.py +++ b/nucleus/dataset_item.py @@ -12,6 +12,7 @@ CAMERA_PARAMS_KEY, EMBEDDING_INFO_KEY, EMBEDDING_VECTOR_KEY, + HEIGHT_KEY, IMAGE_URL_KEY, INDEX_ID_KEY, METADATA_KEY, @@ -20,6 +21,7 @@ REFERENCE_ID_KEY, TYPE_KEY, URL_KEY, + WIDTH_KEY, ) @@ -120,6 +122,8 @@ class DatasetItem: # pylint: disable=R0902 metadata: Optional[dict] = None pointcloud_location: Optional[str] = None embedding_info: Optional[DatasetItemEmbeddingInfo] = None + width: Optional[int] = None + height: Optional[int] = None def __post_init__(self): assert self.reference_id != "DUMMY_VALUE", "reference_id is required." @@ -190,6 +194,12 @@ def to_payload(self, is_scene=False) -> dict: if self.embedding_info: payload[EMBEDDING_INFO_KEY] = self.embedding_info.to_payload() + if self.width: + payload[WIDTH_KEY] = self.width + + if self.height: + payload[HEIGHT_KEY] = self.height + if is_scene: if self.image_location: payload[URL_KEY] = self.image_location @@ -237,3 +247,13 @@ def check_for_duplicate_reference_ids(dataset_items: Sequence[DatasetItem]): raise ValueError( f"Duplicate reference IDs found among dataset_items: {duplicates}" ) + + +def check_items_have_dimensions(dataset_items: Sequence[DatasetItem]): + for item in dataset_items: + has_width = getattr(item, "width") + has_height = getattr(item, "height") + if not (has_width and has_height): + raise Exception( + f"When using privacy mode, all items require a width and height. Missing for item: '{item.reference_id}'" + ) diff --git a/pyproject.toml b/pyproject.toml index 28c45713..b92fbed5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ ignore = ["E501", "E741", "E731", "F401"] # Easy ignore for getting it running [tool.poetry] name = "scale-nucleus" -version = "0.16.7" +version = "0.16.8" description = "The official Python client library for Nucleus, the Data Platform for AI" license = "MIT" authors = ["Scale AI Nucleus Team "]