Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,23 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.16.8](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.8) - 2023-11-13

## [0.16.8](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.8) - 2023-11-16

### Added

#### Dataset Item width and height
- Allow passing width and height to `DatasetItem`
- This is _required_ when using privacy mode

#### Dataset Item Fetch
- Added `dataset.items_and_annotation_chip_generator()` functionality to generate chips of images in s3 or locally.
- Added `query` parameter for `dataset.items_and_annotation_generator()` to filter dataset items.

### Removed
- `upload_to_scale` is no longer a property in `DatasetItem`, users should instead specify `use_privacy_mode` on the dataset during creation


## [0.16.7](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.7) - 2023-11-03

### Added
Expand Down
8 changes: 7 additions & 1 deletion nucleus/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,13 @@ def create_dataset(
},
"dataset/create",
)
return Dataset(response[DATASET_ID_KEY], self)
return Dataset(
response[DATASET_ID_KEY],
self,
name=name,
is_scene=is_scene,
use_privacy_mode=use_privacy_mode,
)

def delete_dataset(self, dataset_id: str) -> dict:
"""
Expand Down
28 changes: 26 additions & 2 deletions nucleus/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
DATASET_IS_SCENE_KEY,
DATASET_ITEM_IDS_KEY,
DATASET_ITEMS_KEY,
DATASET_PRIVACY_MODE_KEY,
DEFAULT_ANNOTATION_UPDATE_MODE,
EMBEDDING_DIMENSION_KEY,
EMBEDDINGS_URL_KEY,
Expand Down Expand Up @@ -75,6 +76,7 @@
DatasetItem,
check_all_paths_remote,
check_for_duplicate_reference_ids,
check_items_have_dimensions,
)
from .dataset_item_uploader import DatasetItemUploader
from .deprecation_warning import deprecated
Expand Down Expand Up @@ -145,12 +147,20 @@ class Dataset:
existing_dataset = client.get_dataset("YOUR_DATASET_ID")
"""

def __init__(self, dataset_id, client: "NucleusClient", name=None):
def __init__(
self,
dataset_id,
client: "NucleusClient",
name=None,
is_scene=None,
use_privacy_mode=None,
):
self.id = dataset_id
self._client = client
# NOTE: Optionally set name on creation such that the property access doesn't need to hit the server
self._name = name
self._is_scene = None
self._is_scene = is_scene
self._use_privacy_mode = use_privacy_mode

def __repr__(self):
if os.environ.get("NUCLEUS_DEBUG", None):
Expand Down Expand Up @@ -184,6 +194,17 @@ def is_scene(self) -> bool:
self._is_scene = response
return self._is_scene # type: ignore

@property
def use_privacy_mode(self) -> bool:
"""Whether or not the dataset was created for privacy mode."""
if self._use_privacy_mode is not None:
return self._use_privacy_mode
response = self._client.make_request(
{}, f"dataset/{self.id}/use_privacy_mode", requests.get
)[DATASET_PRIVACY_MODE_KEY]
self._use_privacy_mode = response
return self._use_privacy_mode # type: ignore

@property
def model_runs(self) -> List[str]:
"""List of all model runs associated with the Dataset."""
Expand Down Expand Up @@ -656,6 +677,9 @@ def append(

check_for_duplicate_reference_ids(dataset_items)

if self.use_privacy_mode:
check_items_have_dimensions(dataset_items)

if dataset_items and (lidar_scenes or video_scenes):
raise Exception(
"You must append either DatasetItems or Scenes to the dataset."
Expand Down
20 changes: 20 additions & 0 deletions nucleus/dataset_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
CAMERA_PARAMS_KEY,
EMBEDDING_INFO_KEY,
EMBEDDING_VECTOR_KEY,
HEIGHT_KEY,
IMAGE_URL_KEY,
INDEX_ID_KEY,
METADATA_KEY,
Expand All @@ -20,6 +21,7 @@
REFERENCE_ID_KEY,
TYPE_KEY,
URL_KEY,
WIDTH_KEY,
)


Expand Down Expand Up @@ -120,6 +122,8 @@ class DatasetItem: # pylint: disable=R0902
metadata: Optional[dict] = None
pointcloud_location: Optional[str] = None
embedding_info: Optional[DatasetItemEmbeddingInfo] = None
width: Optional[int] = None
height: Optional[int] = None

def __post_init__(self):
assert self.reference_id != "DUMMY_VALUE", "reference_id is required."
Expand Down Expand Up @@ -190,6 +194,12 @@ def to_payload(self, is_scene=False) -> dict:
if self.embedding_info:
payload[EMBEDDING_INFO_KEY] = self.embedding_info.to_payload()

if self.width:
payload[WIDTH_KEY] = self.width

if self.height:
payload[HEIGHT_KEY] = self.height

if is_scene:
if self.image_location:
payload[URL_KEY] = self.image_location
Expand Down Expand Up @@ -237,3 +247,13 @@ def check_for_duplicate_reference_ids(dataset_items: Sequence[DatasetItem]):
raise ValueError(
f"Duplicate reference IDs found among dataset_items: {duplicates}"
)


def check_items_have_dimensions(dataset_items: Sequence[DatasetItem]):
for item in dataset_items:
has_width = getattr(item, "width")
has_height = getattr(item, "height")
if not (has_width and has_height):
raise Exception(
f"When using privacy mode, all items require a width and height. Missing for item: '{item.reference_id}'"
)
Comment on lines +252 to +259
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fyi @gatli your script for privacy data upload will break after this change.

But i believe you can already get the image dimensions from the embedding service anyways?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the heads up 🙂

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ ignore = ["E501", "E741", "E731", "F401"] # Easy ignore for getting it running

[tool.poetry]
name = "scale-nucleus"
version = "0.16.7"
version = "0.16.8"
description = "The official Python client library for Nucleus, the Data Platform for AI"
license = "MIT"
authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]
Expand Down