Skip to content
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.17.8](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.17.7) - 2024-11-05
## [0.17.8](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.17.8) - 2025-01-02

### Added
- Adding `only_most_recent_tasks` parameter for `dataset.scene_and_annotation_generator()` and `dataset.items_and_annotation_generator()` to accommodate for multiple sets of ground truth caused by relabeled tasks. Also returns the task_id in the annotation results.
Expand Down
52 changes: 27 additions & 25 deletions nucleus/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ class BoxAnnotation(Annotation): # pylint: disable=R0902
metadata: Optional[Dict] = None
embedding_vector: Optional[list] = None
track_reference_id: Optional[str] = None
task_id: Optional[str] = None
_task_id: Optional[str] = field(default=None, repr=False)

def __post_init__(self):
self.metadata = self.metadata if self.metadata else {}
Expand All @@ -180,7 +180,7 @@ def from_json(cls, payload: dict):
metadata=payload.get(METADATA_KEY, {}),
embedding_vector=payload.get(EMBEDDING_VECTOR_KEY, None),
track_reference_id=payload.get(TRACK_REFERENCE_ID_KEY, None),
task_id=payload.get(TASK_ID_KEY, None),
_task_id=payload.get(TASK_ID_KEY, None),
)

def to_payload(self) -> dict:
Expand All @@ -198,7 +198,7 @@ def to_payload(self) -> dict:
METADATA_KEY: self.metadata,
EMBEDDING_VECTOR_KEY: self.embedding_vector,
TRACK_REFERENCE_ID_KEY: self.track_reference_id,
TASK_ID_KEY: self.task_id,
TASK_ID_KEY: self._task_id,
}

def __eq__(self, other):
Expand All @@ -213,7 +213,7 @@ def __eq__(self, other):
and sorted(self.metadata.items()) == sorted(other.metadata.items())
and self.embedding_vector == other.embedding_vector
and self.track_reference_id == other.track_reference_id
and self.task_id == other.task_id
and self._task_id == other._task_id
)


Expand Down Expand Up @@ -280,7 +280,7 @@ class LineAnnotation(Annotation):
annotation_id: Optional[str] = None
metadata: Optional[Dict] = None
track_reference_id: Optional[str] = None
task_id: Optional[str] = None
_task_id: Optional[str] = field(default=None, repr=False)

def __post_init__(self):
self.metadata = self.metadata if self.metadata else {}
Expand Down Expand Up @@ -310,7 +310,7 @@ def from_json(cls, payload: dict):
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
metadata=payload.get(METADATA_KEY, {}),
track_reference_id=payload.get(TRACK_REFERENCE_ID_KEY, None),
task_id=payload.get(TASK_ID_KEY, None),
_task_id=payload.get(TASK_ID_KEY, None),
)

def to_payload(self) -> dict:
Expand All @@ -324,7 +324,7 @@ def to_payload(self) -> dict:
ANNOTATION_ID_KEY: self.annotation_id,
METADATA_KEY: self.metadata,
TRACK_REFERENCE_ID_KEY: self.track_reference_id,
TASK_ID_KEY: self.task_id,
TASK_ID_KEY: self._task_id,
}
return payload

Expand Down Expand Up @@ -375,7 +375,7 @@ class PolygonAnnotation(Annotation):
metadata: Optional[Dict] = None
embedding_vector: Optional[list] = None
track_reference_id: Optional[str] = None
task_id: Optional[str] = None
_task_id: Optional[str] = field(default=None, repr=False)

def __post_init__(self):
self.metadata = self.metadata if self.metadata else {}
Expand Down Expand Up @@ -406,7 +406,7 @@ def from_json(cls, payload: dict):
metadata=payload.get(METADATA_KEY, {}),
embedding_vector=payload.get(EMBEDDING_VECTOR_KEY, None),
track_reference_id=payload.get(TRACK_REFERENCE_ID_KEY, None),
task_id=payload.get(TASK_ID_KEY, None),
_task_id=payload.get(TASK_ID_KEY, None),
)

def to_payload(self) -> dict:
Expand All @@ -421,7 +421,7 @@ def to_payload(self) -> dict:
METADATA_KEY: self.metadata,
EMBEDDING_VECTOR_KEY: self.embedding_vector,
TRACK_REFERENCE_ID_KEY: self.track_reference_id,
TASK_ID_KEY: self.task_id,
TASK_ID_KEY: self._task_id,
}
return payload

Expand Down Expand Up @@ -518,7 +518,7 @@ class KeypointsAnnotation(Annotation):
annotation_id: Optional[str] = None
metadata: Optional[Dict] = None
track_reference_id: Optional[str] = None
task_id: Optional[str] = None
_task_id: Optional[str] = field(default=None, repr=False)

def __post_init__(self):
self.metadata = self.metadata or {}
Expand Down Expand Up @@ -571,7 +571,7 @@ def from_json(cls, payload: dict):
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
metadata=payload.get(METADATA_KEY, {}),
track_reference_id=payload.get(TRACK_REFERENCE_ID_KEY, None),
task_id=payload.get(TASK_ID_KEY, None),
_task_id=payload.get(TASK_ID_KEY, None),
)

def to_payload(self) -> dict:
Expand All @@ -587,7 +587,7 @@ def to_payload(self) -> dict:
ANNOTATION_ID_KEY: self.annotation_id,
METADATA_KEY: self.metadata,
TRACK_REFERENCE_ID_KEY: self.track_reference_id,
TASK_ID_KEY: self.task_id,
TASK_ID_KEY: self._task_id,
}
return payload

Expand Down Expand Up @@ -692,7 +692,7 @@ class CuboidAnnotation(Annotation): # pylint: disable=R0902
annotation_id: Optional[str] = None
metadata: Optional[Dict] = None
track_reference_id: Optional[str] = None
task_id: Optional[str] = None
_task_id: Optional[str] = field(default=None, repr=False)

def __post_init__(self):
self.metadata = self.metadata if self.metadata else {}
Expand All @@ -709,7 +709,7 @@ def from_json(cls, payload: dict):
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
metadata=payload.get(METADATA_KEY, {}),
track_reference_id=payload.get(TRACK_REFERENCE_ID_KEY, None),
task_id=payload.get(TASK_ID_KEY, None),
_task_id=payload.get(TASK_ID_KEY, None),
)

def to_payload(self) -> dict:
Expand All @@ -729,7 +729,8 @@ def to_payload(self) -> dict:
payload[METADATA_KEY] = self.metadata
if self.track_reference_id:
payload[TRACK_REFERENCE_ID_KEY] = self.track_reference_id

if self._task_id:
payload[TASK_ID_KEY] = self._task_id
return payload


Expand Down Expand Up @@ -942,7 +943,7 @@ class CategoryAnnotation(Annotation):
taxonomy_name: Optional[str] = None
metadata: Optional[Dict] = None
track_reference_id: Optional[str] = None
task_id: Optional[str] = None
_task_id: Optional[str] = field(default=None, repr=False)

def __post_init__(self):
self.metadata = self.metadata if self.metadata else {}
Expand All @@ -955,7 +956,7 @@ def from_json(cls, payload: dict):
taxonomy_name=payload.get(TAXONOMY_NAME_KEY, None),
metadata=payload.get(METADATA_KEY, {}),
track_reference_id=payload.get(TRACK_REFERENCE_ID_KEY, None),
task_id=payload.get(TASK_ID_KEY, None),
_task_id=payload.get(TASK_ID_KEY, None),
)

def to_payload(self) -> dict:
Expand All @@ -966,7 +967,7 @@ def to_payload(self) -> dict:
REFERENCE_ID_KEY: self.reference_id,
METADATA_KEY: self.metadata,
TRACK_REFERENCE_ID_KEY: self.track_reference_id,
TASK_ID_KEY: self.task_id,
TASK_ID_KEY: self._task_id,
}
if self.taxonomy_name is not None:
payload[TAXONOMY_NAME_KEY] = self.taxonomy_name
Expand All @@ -982,7 +983,7 @@ class MultiCategoryAnnotation(Annotation):
taxonomy_name: Optional[str] = None
metadata: Optional[Dict] = None
track_reference_id: Optional[str] = None
task_id: Optional[str] = None
_task_id: Optional[str] = field(default=None, repr=False)

def __post_init__(self):
self.metadata = self.metadata if self.metadata else {}
Expand All @@ -995,7 +996,7 @@ def from_json(cls, payload: dict):
taxonomy_name=payload.get(TAXONOMY_NAME_KEY, None),
metadata=payload.get(METADATA_KEY, {}),
track_reference_id=payload.get(TRACK_REFERENCE_ID_KEY, None),
task_id=payload.get(TASK_ID_KEY, None),
_task_id=payload.get(TASK_ID_KEY, None),
)

def to_payload(self) -> dict:
Expand All @@ -1006,7 +1007,7 @@ def to_payload(self) -> dict:
REFERENCE_ID_KEY: self.reference_id,
METADATA_KEY: self.metadata,
TRACK_REFERENCE_ID_KEY: self.track_reference_id,
TASK_ID_KEY: self.task_id,
TASK_ID_KEY: self._task_id,
}
if self.taxonomy_name is not None:
payload[TAXONOMY_NAME_KEY] = self.taxonomy_name
Expand Down Expand Up @@ -1045,6 +1046,7 @@ class SceneCategoryAnnotation(Annotation):
reference_id: str
taxonomy_name: Optional[str] = None
metadata: Optional[Dict] = field(default_factory=dict)
_task_id: Optional[str] = field(default=None, repr=False)

@classmethod
def from_json(cls, payload: dict):
Expand All @@ -1053,6 +1055,7 @@ def from_json(cls, payload: dict):
reference_id=payload[REFERENCE_ID_KEY],
taxonomy_name=payload.get(TAXONOMY_NAME_KEY, None),
metadata=payload.get(METADATA_KEY, {}),
_task_id=payload.get(TASK_ID_KEY, None),
)

def to_payload(self) -> dict:
Expand All @@ -1062,6 +1065,7 @@ def to_payload(self) -> dict:
GEOMETRY_KEY: {},
REFERENCE_ID_KEY: self.reference_id,
METADATA_KEY: self.metadata,
TASK_ID_KEY: self._task_id,
}
if self.taxonomy_name is not None:
payload[TAXONOMY_NAME_KEY] = self.taxonomy_name
Expand All @@ -1079,9 +1083,7 @@ class AnnotationList:
default_factory=list
)
cuboid_annotations: List[CuboidAnnotation] = field(default_factory=list)
category_annotations: List[CategoryAnnotation] = field(
default_factory=list
)
category_annotations: List[CategoryAnnotation] = field(default_factory=list)
multi_category_annotations: List[MultiCategoryAnnotation] = field(
default_factory=list
)
Expand Down
33 changes: 25 additions & 8 deletions nucleus/annotation_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,7 @@ def get_form_data_and_file_pointers_fn(
"""

def fn():
request_json = construct_segmentation_payload(
segmentations, update
)
request_json = construct_segmentation_payload(segmentations, update)
form_data = [
FileFormField(
name=SERIALIZED_REQUEST_KEY,
Expand Down Expand Up @@ -212,15 +210,17 @@ def fn():

return fn

@staticmethod
def check_for_duplicate_ids(annotations: Iterable[Annotation]):
def check_for_duplicate_ids(self, annotations: Iterable[Annotation]):
"""Do not allow annotations to have the same (annotation_id, reference_id, task_id) tuple"""

# some annotations like CategoryAnnotation do not have annotation_id attribute, and as such, we allow duplicates
tuple_ids = [
(ann.reference_id, ann.annotation_id, ann.task_id) # type: ignore
(
ann.reference_id,
ann.annotation_id,
getattr(ann, "_task_id", None),
)
for ann in annotations
if hasattr(ann, "annotation_id") and hasattr(ann, "task_id")
if hasattr(ann, "annotation_id")
]
tuple_count = Counter(tuple_ids)
duplicates = {key for key, value in tuple_count.items() if value > 1}
Expand Down Expand Up @@ -255,3 +255,20 @@ def __init__(
self._route = (
f"dataset/{dataset_id}/model/{model_id}/uploadPredictions"
)

def check_for_duplicate_ids(self, annotations: Iterable[Annotation]):
"""Do not allow predictions to have the same (annotation_id, reference_id) tuple"""
tuple_ids = [
(pred.annotation_id, pred.reference_id) # type: ignore
for pred in annotations
if hasattr(pred, "annotation_id") and hasattr(pred, "reference_id")
]
tuple_count = Counter(tuple_ids)
duplicates = {key for key, value in tuple_count.items() if value > 1}
if len(duplicates) > 0:
raise DuplicateIDError(
f"Duplicate predictions with the same (annotation_id, reference_id) properties found.\n"
f"Duplicates: {duplicates}\n"
f"To fix this, avoid duplicate predictions, or specify a different annotation_id attribute "
f"for the failing items."
)
28 changes: 9 additions & 19 deletions tests/test_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,7 @@ def test_polygon_gt_upload(dataset):
assert response["annotations_processed"] == 1
assert response["annotations_ignored"] == 0

response = dataset.refloc(annotation.reference_id)["annotations"][
"polygon"
]
response = dataset.refloc(annotation.reference_id)["annotations"]["polygon"]
assert len(response) == 1
response_annotation = response[0]
assert_polygon_annotation_matches_dict(
Expand Down Expand Up @@ -370,7 +368,7 @@ def test_mixed_annotation_upload(dataset):


def test_box_gt_upload_update(dataset):
TEST_BOX_ANNOTATIONS[0]["task_id"] = "test_task_id"
TEST_BOX_ANNOTATIONS[0]["_task_id"] = "test_task_id"
annotation = BoxAnnotation(**TEST_BOX_ANNOTATIONS[0])
response = dataset.annotate(annotations=[annotation])

Expand All @@ -384,7 +382,7 @@ def test_box_gt_upload_update(dataset):
annotation_update_params["reference_id"] = TEST_BOX_ANNOTATIONS[0][
"reference_id"
]
annotation_update_params["task_id"] = TEST_BOX_ANNOTATIONS[0]["task_id"]
annotation_update_params["_task_id"] = TEST_BOX_ANNOTATIONS[0]["_task_id"]

annotation_update = BoxAnnotation(**annotation_update_params)
response = dataset.annotate(annotations=[annotation_update], update=True)
Expand All @@ -401,7 +399,7 @@ def test_box_gt_upload_update(dataset):


def test_box_gt_upload_ignore(dataset):
TEST_BOX_ANNOTATIONS[0]["task_id"] = "test_task_id"
TEST_BOX_ANNOTATIONS[0]["_task_id"] = "test_task_id"
annotation = BoxAnnotation(**TEST_BOX_ANNOTATIONS[0])

print(annotation)
Expand All @@ -418,7 +416,7 @@ def test_box_gt_upload_ignore(dataset):
annotation_update_params["reference_id"] = TEST_BOX_ANNOTATIONS[0][
"reference_id"
]
annotation_update_params["task_id"] = TEST_BOX_ANNOTATIONS[0]["task_id"]
annotation_update_params["_task_id"] = TEST_BOX_ANNOTATIONS[0]["_task_id"]
annotation_update = BoxAnnotation(**annotation_update_params)

# Default behavior is ignore.
Expand Down Expand Up @@ -450,19 +448,15 @@ def test_polygon_gt_upload_update(dataset):
annotation_update_params["reference_id"] = TEST_POLYGON_ANNOTATIONS[0][
"reference_id"
]
annotation_update_params["task_id"] = TEST_POLYGON_ANNOTATIONS[0][
"task_id"
]
annotation_update_params["task_id"] = TEST_POLYGON_ANNOTATIONS[0]["task_id"]

annotation_update = PolygonAnnotation.from_json(annotation_update_params)
response = dataset.annotate(annotations=[annotation_update], update=True)

assert response["annotations_processed"] == 1
assert response["annotations_ignored"] == 0

response = dataset.refloc(annotation.reference_id)["annotations"][
"polygon"
]
response = dataset.refloc(annotation.reference_id)["annotations"]["polygon"]
assert len(response) == 1
response_annotation = response[0]
assert_polygon_annotation_matches_dict(
Expand All @@ -485,9 +479,7 @@ def test_polygon_gt_upload_ignore(dataset):
annotation_update_params["reference_id"] = TEST_POLYGON_ANNOTATIONS[0][
"reference_id"
]
annotation_update_params["task_id"] = TEST_POLYGON_ANNOTATIONS[0][
"task_id"
]
annotation_update_params["task_id"] = TEST_POLYGON_ANNOTATIONS[0]["task_id"]

annotation_update = PolygonAnnotation.from_json(annotation_update_params)
# Default behavior is ignore.
Expand All @@ -496,9 +488,7 @@ def test_polygon_gt_upload_ignore(dataset):
assert response["annotations_processed"] == 0
assert response["annotations_ignored"] == 1

response = dataset.refloc(annotation.reference_id)["annotations"][
"polygon"
]
response = dataset.refloc(annotation.reference_id)["annotations"]["polygon"]
assert len(response) == 1
response_annotation = response[0]
assert_polygon_annotation_matches_dict(
Expand Down
Loading