From 7017bf2df2c0e16dfc33b5f9339f4a4c11a1799c Mon Sep 17 00:00:00 2001 From: Jean Lucas Date: Thu, 9 Nov 2023 16:50:01 +0100 Subject: [PATCH 1/9] add dims for dataset item --- nucleus/dataset_item.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/nucleus/dataset_item.py b/nucleus/dataset_item.py index 41ba1cc8..726a80c6 100644 --- a/nucleus/dataset_item.py +++ b/nucleus/dataset_item.py @@ -10,7 +10,11 @@ from .constants import ( BACKEND_REFERENCE_ID_KEY, CAMERA_PARAMS_KEY, + EMBEDDING_INFO_KEY, + EMBEDDING_VECTOR_KEY, + HEIGHT_KEY, IMAGE_URL_KEY, + INDEX_ID_KEY, METADATA_KEY, ORIGINAL_IMAGE_URL_KEY, POINTCLOUD_URL_KEY, @@ -18,6 +22,7 @@ TYPE_KEY, UPLOAD_TO_SCALE_KEY, URL_KEY, + WIDTH_KEY, ) @@ -26,6 +31,18 @@ class DatasetItemType(Enum): POINTCLOUD = "pointcloud" +@dataclass +class DatasetItemEmbeddingInfo: + index_id: str + embedding_vector: list + + def to_payload(self) -> dict: + return { + INDEX_ID_KEY: self.index_id, + EMBEDDING_VECTOR_KEY: self.embedding_vector, + } + + @dataclass # pylint: disable=R0902 class DatasetItem: # pylint: disable=R0902 """A dataset item is an image or pointcloud that has associated metadata. @@ -113,16 +130,30 @@ class DatasetItem: # pylint: disable=R0902 metadata: Optional[dict] = None pointcloud_location: Optional[str] = None upload_to_scale: Optional[bool] = True + embedding_info: Optional[DatasetItemEmbeddingInfo] = None + width: Optional[int] = None + height: Optional[int] = None def __post_init__(self): assert self.reference_id != "DUMMY_VALUE", "reference_id is required." assert bool(self.image_location) != bool( self.pointcloud_location ), "Must specify exactly one of the image_location or pointcloud_location parameters" + if self.pointcloud_location and self.embedding_info: + raise AssertionError( + "Cannot upload embedding vector if pointcloud_location is set" + ) + if (self.pointcloud_location) and not self.upload_to_scale: raise NotImplementedError( "Skipping upload to Scale is not currently implemented for pointclouds." ) + + if any([self.width, self.height]): + assert all( + [self.width, self.height] + ), "If a dimension is specified, both height and width must be given" + self.local = ( is_local_path(self.image_location) if self.image_location else None ) @@ -179,6 +210,15 @@ def to_payload(self, is_scene=False) -> dict: payload[REFERENCE_ID_KEY] = self.reference_id + if self.embedding_info: + payload[EMBEDDING_INFO_KEY] = self.embedding_info.to_payload() + + if self.width: + payload[WIDTH_KEY] = self.width + + if self.height: + payload[HEIGHT_KEY] = self.height + if is_scene: if self.image_location: payload[URL_KEY] = self.image_location From b10116f96234a884428af3d8d0b4f71f4a9a57cb Mon Sep 17 00:00:00 2001 From: Jean Lucas Date: Thu, 9 Nov 2023 16:51:12 +0100 Subject: [PATCH 2/9] update version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 09672787..298542f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ ignore = ["E501", "E741", "E731", "F401"] # Easy ignore for getting it running [tool.poetry] name = "scale-nucleus" -version = "0.16.6" +version = "0.16.7" description = "The official Python client library for Nucleus, the Data Platform for AI" license = "MIT" authors = ["Scale AI Nucleus Team "] From dd8ec429dd812edcc817f4deeffa453442129590 Mon Sep 17 00:00:00 2001 From: Jean Lucas Date: Tue, 14 Nov 2023 13:19:52 +0100 Subject: [PATCH 3/9] changelog --- CHANGELOG.md | 9 +++++++++ pyproject.toml | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 496b2edb..57a63d6c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,15 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.16.8](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.7) - 2023-11-09 + +### Added +- Allow passing width and height to `DatasetItem` +- This is _required_ when using privacy mode + +### Removed +- `upload_to_scale` is no longer a property in `DatasetItem`, users should instead specify `use_privacy_mode` on the dataset during creation + ## [0.16.7](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.7) - 2023-11-03 ### Added diff --git a/pyproject.toml b/pyproject.toml index 298542f0..b4dab7cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ ignore = ["E501", "E741", "E731", "F401"] # Easy ignore for getting it running [tool.poetry] name = "scale-nucleus" -version = "0.16.7" +version = "0.16.8" description = "The official Python client library for Nucleus, the Data Platform for AI" license = "MIT" authors = ["Scale AI Nucleus Team "] From 3722b07f3ad47624f07d5299406123c4251effd7 Mon Sep 17 00:00:00 2001 From: Jean Lucas Date: Tue, 14 Nov 2023 13:37:34 +0100 Subject: [PATCH 4/9] add check for dims --- nucleus/constants.py | 1 + nucleus/dataset.py | 17 +++++++++++++++++ nucleus/dataset_item.py | 10 ++++++++++ 3 files changed, 28 insertions(+) diff --git a/nucleus/constants.py b/nucleus/constants.py index c6822c7d..9a539b33 100644 --- a/nucleus/constants.py +++ b/nucleus/constants.py @@ -45,6 +45,7 @@ DATASET_NAME_KEY = "name" DATASET_PRIVACY_MODE_KEY = "use_privacy_mode" DATASET_SLICES_KEY = "slice_ids" +DATASET_USE_PRIVACY_MODE = "use_privacy_mode" DEFAULT_ANNOTATION_UPDATE_MODE = False DEFAULT_NETWORK_TIMEOUT_SEC = 120 DIMENSIONS_KEY = "dimensions" diff --git a/nucleus/dataset.py b/nucleus/dataset.py index 6470c785..d2fd406c 100644 --- a/nucleus/dataset.py +++ b/nucleus/dataset.py @@ -40,6 +40,7 @@ DATASET_IS_SCENE_KEY, DATASET_ITEM_IDS_KEY, DATASET_ITEMS_KEY, + DATASET_USE_PRIVACY_MODE, DEFAULT_ANNOTATION_UPDATE_MODE, EMBEDDING_DIMENSION_KEY, EMBEDDINGS_URL_KEY, @@ -69,6 +70,7 @@ DatasetItem, check_all_paths_remote, check_for_duplicate_reference_ids, + check_items_have_dimensions, ) from .dataset_item_uploader import DatasetItemUploader from .deprecation_warning import deprecated @@ -145,6 +147,7 @@ def __init__(self, dataset_id, client: "NucleusClient", name=None): # NOTE: Optionally set name on creation such that the property access doesn't need to hit the server self._name = name self._is_scene = None + self._use_privacy_mode = None def __repr__(self): if os.environ.get("NUCLEUS_DEBUG", None): @@ -178,6 +181,17 @@ def is_scene(self) -> bool: self._is_scene = response return self._is_scene # type: ignore + @property + def use_privacy_mode(self) -> bool: + """Whether or not the dataset was created for privacy mode.""" + if self._use_privacy_mode is not None: + return self._use_privacy_mode + response = self._client.make_request( + {}, f"dataset/{self.id}/use_privacy_mode", requests.get + )[DATASET_USE_PRIVACY_MODE] + self._use_privacy_mode = response + return self._use_privacy_mode # type: ignore + @property def model_runs(self) -> List[str]: """List of all model runs associated with the Dataset.""" @@ -650,6 +664,9 @@ def append( check_for_duplicate_reference_ids(dataset_items) + if self.use_privacy_mode: + check_items_have_dimensions(dataset_items) + if dataset_items and (lidar_scenes or video_scenes): raise Exception( "You must append either DatasetItems or Scenes to the dataset." diff --git a/nucleus/dataset_item.py b/nucleus/dataset_item.py index c68c6e95..ca7cb20d 100644 --- a/nucleus/dataset_item.py +++ b/nucleus/dataset_item.py @@ -247,3 +247,13 @@ def check_for_duplicate_reference_ids(dataset_items: Sequence[DatasetItem]): raise ValueError( f"Duplicate reference IDs found among dataset_items: {duplicates}" ) + + +def check_items_have_dimensions(dataset_items: Sequence[DatasetItem]): + for item in dataset_items: + has_width = getattr(item, "width") + has_height = getattr(item, "height") + if not (has_width and has_height): + raise Exception( + f"When using privacy mode, all items require a width and height. Missing for item: '{item.reference_id}'" + ) From b73c69ad4bab89c166e91eb02f3e03280cfce051 Mon Sep 17 00:00:00 2001 From: Jean Lucas Date: Thu, 16 Nov 2023 10:20:29 +0100 Subject: [PATCH 5/9] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b92fbed5..bd25bcfc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ ignore = ["E501", "E741", "E731", "F401"] # Easy ignore for getting it running [tool.poetry] name = "scale-nucleus" -version = "0.16.8" +version = "0.16.9" description = "The official Python client library for Nucleus, the Data Platform for AI" license = "MIT" authors = ["Scale AI Nucleus Team "] From 0340d8948c3454efa1d3202f8057698365318fc3 Mon Sep 17 00:00:00 2001 From: Jean Lucas Date: Thu, 16 Nov 2023 12:09:21 +0100 Subject: [PATCH 6/9] remove dup constant --- nucleus/constants.py | 1 - nucleus/dataset.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/nucleus/constants.py b/nucleus/constants.py index d3b6ccb0..d0b53b11 100644 --- a/nucleus/constants.py +++ b/nucleus/constants.py @@ -45,7 +45,6 @@ DATASET_NAME_KEY = "name" DATASET_PRIVACY_MODE_KEY = "use_privacy_mode" DATASET_SLICES_KEY = "slice_ids" -DATASET_USE_PRIVACY_MODE = "use_privacy_mode" DEFAULT_ANNOTATION_UPDATE_MODE = False DEFAULT_NETWORK_TIMEOUT_SEC = 120 DIMENSIONS_KEY = "dimensions" diff --git a/nucleus/dataset.py b/nucleus/dataset.py index b20446bd..74b4a417 100644 --- a/nucleus/dataset.py +++ b/nucleus/dataset.py @@ -43,7 +43,7 @@ DATASET_IS_SCENE_KEY, DATASET_ITEM_IDS_KEY, DATASET_ITEMS_KEY, - DATASET_USE_PRIVACY_MODE, + DATASET_PRIVACY_MODE_KEY, DEFAULT_ANNOTATION_UPDATE_MODE, EMBEDDING_DIMENSION_KEY, EMBEDDINGS_URL_KEY, @@ -194,7 +194,7 @@ def use_privacy_mode(self) -> bool: return self._use_privacy_mode response = self._client.make_request( {}, f"dataset/{self.id}/use_privacy_mode", requests.get - )[DATASET_USE_PRIVACY_MODE] + )[DATASET_PRIVACY_MODE_KEY] self._use_privacy_mode = response return self._use_privacy_mode # type: ignore From 3d13b9771fc7ff5ef4262e26e3d69688bb687d94 Mon Sep 17 00:00:00 2001 From: Jean Lucas Date: Thu, 16 Nov 2023 12:09:36 +0100 Subject: [PATCH 7/9] go back to release .8 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index bd25bcfc..b92fbed5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ ignore = ["E501", "E741", "E731", "F401"] # Easy ignore for getting it running [tool.poetry] name = "scale-nucleus" -version = "0.16.9" +version = "0.16.8" description = "The official Python client library for Nucleus, the Data Platform for AI" license = "MIT" authors = ["Scale AI Nucleus Team "] From fd6f115e96dc483033d466123e048a9db794690d Mon Sep 17 00:00:00 2001 From: Jean Lucas Date: Thu, 16 Nov 2023 12:12:50 +0100 Subject: [PATCH 8/9] set properties for dataset on creation --- nucleus/__init__.py | 8 +++++++- nucleus/dataset.py | 13 ++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/nucleus/__init__.py b/nucleus/__init__.py index d0386567..c4e1a8dc 100644 --- a/nucleus/__init__.py +++ b/nucleus/__init__.py @@ -483,7 +483,13 @@ def create_dataset( }, "dataset/create", ) - return Dataset(response[DATASET_ID_KEY], self) + return Dataset( + response[DATASET_ID_KEY], + self, + name=name, + is_scene=is_scene, + use_privacy_mode=use_privacy_mode, + ) def delete_dataset(self, dataset_id: str) -> dict: """ diff --git a/nucleus/dataset.py b/nucleus/dataset.py index 74b4a417..89725945 100644 --- a/nucleus/dataset.py +++ b/nucleus/dataset.py @@ -147,13 +147,20 @@ class Dataset: existing_dataset = client.get_dataset("YOUR_DATASET_ID") """ - def __init__(self, dataset_id, client: "NucleusClient", name=None): + def __init__( + self, + dataset_id, + client: "NucleusClient", + name=None, + is_scene=None, + use_privacy_mode=None, + ): self.id = dataset_id self._client = client # NOTE: Optionally set name on creation such that the property access doesn't need to hit the server self._name = name - self._is_scene = None - self._use_privacy_mode = None + self._is_scene = is_scene + self._use_privacy_mode = use_privacy_mode def __repr__(self): if os.environ.get("NUCLEUS_DEBUG", None): From daf8d2a328c06744ce11a2dfa250b0f35c689490 Mon Sep 17 00:00:00 2001 From: Jean Lucas Date: Thu, 16 Nov 2023 12:25:10 +0100 Subject: [PATCH 9/9] fix changelog --- CHANGELOG.md | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bec7f29f..18838117 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,23 +6,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.16.9](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.9) - 2023-11-16 +## [0.16.8](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.8) - 2023-11-16 ### Added + +#### Dataset Item width and height - Allow passing width and height to `DatasetItem` - This is _required_ when using privacy mode -### Removed -- `upload_to_scale` is no longer a property in `DatasetItem`, users should instead specify `use_privacy_mode` on the dataset during creation - - -## [0.16.8](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.8) - 2023-11-13 - -### Added - +#### Dataset Item Fetch - Added `dataset.items_and_annotation_chip_generator()` functionality to generate chips of images in s3 or locally. - Added `query` parameter for `dataset.items_and_annotation_generator()` to filter dataset items. +### Removed +- `upload_to_scale` is no longer a property in `DatasetItem`, users should instead specify `use_privacy_mode` on the dataset during creation + ## [0.16.7](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.7) - 2023-11-03