diff --git a/CHANGELOG.md b/CHANGELOG.md index cb3b3533..562bb048 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.14.30](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.30) - 2022-11-29 + +### Added +- Support for uploading track-level metrics to external evaluation functions using track_ref_ids + ## [0.14.29](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.29) - 2022-11-22 ### Added diff --git a/nucleus/__init__.py b/nucleus/__init__.py index a40ffee2..4d679acb 100644 --- a/nucleus/__init__.py +++ b/nucleus/__init__.py @@ -178,7 +178,7 @@ def __init__( import tqdm.notebook as tqdm_notebook self.tqdm_bar = tqdm_notebook.tqdm - self._connection = Connection(self.api_key, self.endpoint) + self.connection = Connection(self.api_key, self.endpoint) self.validate = Validate(self.api_key, self.endpoint) def __repr__(self): @@ -1014,16 +1014,16 @@ def create_object_index( ) def delete(self, route: str): - return self._connection.delete(route) + return self.connection.delete(route) def get(self, route: str): - return self._connection.get(route) + return self.connection.get(route) def post(self, payload: dict, route: str): - return self._connection.post(payload, route) + return self.connection.post(payload, route) def put(self, payload: dict, route: str): - return self._connection.put(payload, route) + return self.connection.put(payload, route) # TODO: Fix return type, can be a list as well. Brings on a lot of mypy errors ... def make_request( @@ -1054,7 +1054,7 @@ def make_request( "Received defined payload with GET request! Will ignore payload" ) payload = None - return self._connection.make_request(payload, route, requests_command, return_raw_response) # type: ignore + return self.connection.make_request(payload, route, requests_command, return_raw_response) # type: ignore def _set_api_key(self, api_key): """Fetch API key from environment variable NUCLEUS_API_KEY if not set""" diff --git a/nucleus/dataset.py b/nucleus/dataset.py index a83ed638..25a9cf7a 100644 --- a/nucleus/dataset.py +++ b/nucleus/dataset.py @@ -1863,7 +1863,7 @@ def tracks(self) -> List[Track]: tracks_list = [ Track.from_json( payload=track, - client=self._client, + connection=self._client.connection, ) for track in response[TRACKS_KEY] ] diff --git a/nucleus/scene.py b/nucleus/scene.py index b05c3cc5..48c8aaeb 100644 --- a/nucleus/scene.py +++ b/nucleus/scene.py @@ -330,7 +330,10 @@ def from_json( frames = [Frame.from_json(frame) for frame in frames_payload] tracks_payload = payload.get(TRACKS_KEY, []) tracks = ( - [Track.from_json(track, client) for track in tracks_payload] + [ + Track.from_json(track, connection=client.connection) + for track in tracks_payload + ] if client else [] ) @@ -680,7 +683,10 @@ def from_json( items = [DatasetItem.from_json(item) for item in items_payload] tracks_payload = payload.get(TRACKS_KEY, []) tracks = ( - [Track.from_json(track, client) for track in tracks_payload] + [ + Track.from_json(track, connection=client.connection) + for track in tracks_payload + ] if client else [] ) diff --git a/nucleus/track.py b/nucleus/track.py index 4221dac3..21ee8f7c 100644 --- a/nucleus/track.py +++ b/nucleus/track.py @@ -12,7 +12,7 @@ ) if TYPE_CHECKING: - from . import NucleusClient + from . import Connection @dataclass # pylint: disable=R0902 @@ -25,7 +25,7 @@ class Track: # pylint: disable=R0902 metadata: Arbitrary key/value dictionary of info to attach to this track. """ - _client: "NucleusClient" + _connection: "Connection" dataset_id: str reference_id: str metadata: Optional[dict] = None @@ -41,10 +41,10 @@ def __eq__(self, other): ) @classmethod - def from_json(cls, payload: dict, client: "NucleusClient"): + def from_json(cls, payload: dict, connection: "Connection"): """Instantiates track object from schematized JSON dict payload.""" return cls( - _client=client, + _connection=connection, reference_id=str(payload[REFERENCE_ID_KEY]), dataset_id=str(payload[DATASET_ID_KEY]), metadata=payload.get(METADATA_KEY, None), @@ -79,7 +79,7 @@ def update( entire metadata object will be overwritten. Otherwise, only the keys in metadata will be overwritten. """ - self._client.make_request( + self._connection.make_request( payload={ REFERENCE_ID_KEY: self.reference_id, METADATA_KEY: metadata, diff --git a/nucleus/validate/__init__.py b/nucleus/validate/__init__.py index 6ad24442..33b0a7eb 100644 --- a/nucleus/validate/__init__.py +++ b/nucleus/validate/__init__.py @@ -7,7 +7,7 @@ ] from .client import Validate -from .constants import ThresholdComparison +from .constants import EntityLevel, ThresholdComparison from .data_transfer_objects.eval_function import ( EvalFunctionEntry, EvaluationCriterion, diff --git a/nucleus/validate/client.py b/nucleus/validate/client.py index 840e7847..fd84cf61 100644 --- a/nucleus/validate/client.py +++ b/nucleus/validate/client.py @@ -213,7 +213,7 @@ def create_external_eval_function( Args: name: unique name of evaluation function - level: level at which the eval function is run, defaults to "item" + level: level at which the eval function is run, defaults to EntityLevel.ITEM. Raises: - NucleusAPIError if the creation of the function fails on the server side diff --git a/nucleus/validate/constants.py b/nucleus/validate/constants.py index 61e90831..92ad12c1 100644 --- a/nucleus/validate/constants.py +++ b/nucleus/validate/constants.py @@ -23,7 +23,14 @@ class ThresholdComparison(str, Enum): class EntityLevel(str, Enum): - """Level for evaluation functions and unit tests.""" + """ + Data level at which evaluation functions produce outputs. + For instance, when comparing results across dataset items, use + `EntityLevel.ITEM`. For scenes, use `EntityLevel.SCENE`. Finally, + when comparing results between tracks within a single scene or a + holistic item datset, use `EntityLevel.TRACK`. + """ + TRACK = "track" ITEM = "item" SCENE = "scene" diff --git a/nucleus/validate/data_transfer_objects/scenario_test_evaluations.py b/nucleus/validate/data_transfer_objects/scenario_test_evaluations.py index 87197851..46bf5e71 100644 --- a/nucleus/validate/data_transfer_objects/scenario_test_evaluations.py +++ b/nucleus/validate/data_transfer_objects/scenario_test_evaluations.py @@ -6,6 +6,7 @@ class EvaluationResult(ImmutableModel): + track_ref_id: Optional[str] = None item_ref_id: Optional[str] = None scene_ref_id: Optional[str] = None score: float = 0 @@ -15,16 +16,15 @@ class EvaluationResult(ImmutableModel): def is_item_or_scene_provided( cls, values ): # pylint: disable=no-self-argument - if ( - values.get("item_ref_id") is None - and values.get("scene_ref_id") is None - ) or ( - ( - values.get("item_ref_id") is not None - and values.get("scene_ref_id") is not None + ref_ids = [ + values.get("track_ref_id", None), + values.get("item_ref_id", None), + values.get("scene_ref_id", None), + ] + if len([ref_id for ref_id in ref_ids if ref_id is not None]) != 1: + raise ValueError( + "Must provide exactly one of track_ref_id, item_ref_id, or scene_ref_id" ) - ): - raise ValueError("Must provide either item_ref_id or scene_ref_id") return values @validator("score", "weight") diff --git a/nucleus/validate/scenario_test.py b/nucleus/validate/scenario_test.py index bcd9b6f1..41b4db30 100644 --- a/nucleus/validate/scenario_test.py +++ b/nucleus/validate/scenario_test.py @@ -8,9 +8,16 @@ from typing import List, Optional, Union from ..connection import Connection -from ..constants import DATASET_ITEMS_KEY, NAME_KEY, SCENES_KEY, SLICE_ID_KEY +from ..constants import ( + DATASET_ITEMS_KEY, + NAME_KEY, + SCENES_KEY, + SLICE_ID_KEY, + TRACKS_KEY, +) from ..dataset_item import DatasetItem from ..scene import Scene +from ..track import Track from .constants import ( EVAL_FUNCTION_ID_KEY, SCENARIO_TEST_ID_KEY, @@ -166,8 +173,8 @@ def get_eval_history(self) -> List[ScenarioTestEvaluation]: def get_items( self, level: EntityLevel = EntityLevel.ITEM - ) -> Union[List[DatasetItem], List[Scene]]: - """Gets items within a scenario test at a given level, returning a list of DatasetItem or Scene objects. + ) -> Union[List[Track], List[DatasetItem], List[Scene]]: + """Gets items within a scenario test at a given level, returning a list of Track, DatasetItem, or Scene objects. Args: level: :class:`EntityLevel` @@ -178,14 +185,22 @@ def get_items( response = self.connection.get( f"validate/scenario_test/{self.id}/items", ) + if level == EntityLevel.TRACK: + return [ + Track.from_json(track, connection=self.connection) + for track in response.get(TRACKS_KEY, []) + ] if level == EntityLevel.SCENE: return [ Scene.from_json(scene, skip_validate=True) - for scene in response[SCENES_KEY] + for scene in response.get(SCENES_KEY, []) ] - return [ - DatasetItem.from_json(item) for item in response[DATASET_ITEMS_KEY] - ] + if level == EntityLevel.ITEM: + return [ + DatasetItem.from_json(item) + for item in response.get(DATASET_ITEMS_KEY, []) + ] + raise ValueError(f"Invalid entity level: {level}") def set_baseline_model(self, model_id: str): """Sets a new baseline model for the ScenarioTest. In order to be eligible to be a baseline, @@ -222,23 +237,41 @@ def upload_external_evaluation_results( len(results) > 0 ), "Submitting evaluation requires at least one result." - level = EntityLevel.ITEM + level: Optional[EntityLevel] = None metric_per_ref_id = {} weight_per_ref_id = {} aggregate_weighted_sum = 0.0 aggregate_weight = 0.0 + # Ensures reults at only one EntityLevel are provided, otherwise throwing a ValueError + def ensure_level_consistency_or_raise( + cur_level: Optional[EntityLevel], new_level: EntityLevel + ): + if level is not None and level != new_level: + raise ValueError( + f"All evaluation results must only pertain to one level. Received {cur_level} then {new_level}" + ) + # aggregation based on https://en.wikipedia.org/wiki/Weighted_arithmetic_mean for r in results: - # Ensure results are uploaded ONLY for items or ONLY for scenes + # Ensure results are uploaded ONLY for ONE OF tracks, items, and scenes + if r.track_ref_id is not None: + ensure_level_consistency_or_raise(level, EntityLevel.TRACK) + level = EntityLevel.TRACK + if r.item_ref_id is not None: + ensure_level_consistency_or_raise(level, EntityLevel.ITEM) + level = EntityLevel.ITEM if r.scene_ref_id is not None: + ensure_level_consistency_or_raise(level, EntityLevel.SCENE) level = EntityLevel.SCENE - if r.item_ref_id is not None and level == EntityLevel.SCENE: - raise ValueError( - "All evaluation results must either pertain to a scene_ref_id or an item_ref_id, not both." - ) ref_id = ( - r.item_ref_id if level == EntityLevel.ITEM else r.scene_ref_id + r.track_ref_id + if level == EntityLevel.TRACK + else ( + r.item_ref_id + if level == EntityLevel.ITEM + else r.scene_ref_id + ) ) # Aggregate scores and weights @@ -255,7 +288,7 @@ def upload_external_evaluation_results( "overall_metric": aggregate_weighted_sum / aggregate_weight, "model_id": model_id, "slice_id": self.slice_id, - "level": level.value, + "level": level.value if level else None, } response = self.connection.post( payload, diff --git a/pyproject.toml b/pyproject.toml index ec1f09bb..a81cb572 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ exclude = ''' [tool.poetry] name = "scale-nucleus" -version = "0.14.29" +version = "0.14.30" description = "The official Python client library for Nucleus, the Data Platform for AI" license = "MIT" authors = ["Scale AI Nucleus Team "] diff --git a/tests/helpers.py b/tests/helpers.py index fd3ca13c..7de8ef7e 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -20,6 +20,7 @@ DATASET_WITH_EMBEDDINGS = "ds_c8jwdhy4y4f0078hzceg" NUCLEUS_PYTEST_USER_ID = "60ad648c85db770026e9bf77" +EVAL_FUNCTION_NAME = "eval_fn" EVAL_FUNCTION_THRESHOLD = 0.5 EVAL_FUNCTION_COMPARISON = ThresholdComparison.GREATER_THAN_EQUAL_TO diff --git a/tests/test_track.py b/tests/test_track.py index f336218d..0e23c0da 100644 --- a/tests/test_track.py +++ b/tests/test_track.py @@ -1,3 +1,4 @@ +import time from copy import deepcopy import pytest @@ -69,7 +70,7 @@ def test_create_mp_with_tracks(CLIENT, dataset_scene): expected_track_reference_ids = [ ann["track_reference_id"] for ann in TEST_SCENE_BOX_PREDS_WITH_TRACK ] - model_reference = "model_test_create_mp_with_tracks" + model_reference = "model_" + str(time.time()) model = CLIENT.create_model(TEST_MODEL_NAME, model_reference) # Act diff --git a/tests/validate/test_scenario_test.py b/tests/validate/test_scenario_test.py index 3b8b6b47..61983b5b 100644 --- a/tests/validate/test_scenario_test.py +++ b/tests/validate/test_scenario_test.py @@ -1,11 +1,21 @@ +import time + import pytest +from nucleus.annotation import BoxAnnotation from nucleus.validate import CreateScenarioTestError from nucleus.validate.constants import EntityLevel +from nucleus.validate.data_transfer_objects.scenario_test_evaluations import ( + EvaluationResult, +) from nucleus.validate.scenario_test import ScenarioTest from tests.helpers import ( EVAL_FUNCTION_COMPARISON, - EVAL_FUNCTION_THRESHOLD, + EVAL_FUNCTION_NAME, + TEST_MODEL_NAME, + TEST_SCENE_BOX_ANNS_WITH_TRACK, + TEST_SCENE_BOX_PREDS_WITH_TRACK, + TEST_TRACK_REFERENCE_ID, get_uuid, ) @@ -54,44 +64,132 @@ def test_scenario_test_get_dataset_items( test_slice, slice_items, ): + # Arrange test_name = "scenario_test_" + get_uuid() # use uuid to make unique + expected_items_locations = [item.image_location for item in slice_items] + # Act scenario_test = CLIENT.validate.create_scenario_test( name=test_name, slice_id=test_slice.id, evaluation_functions=[CLIENT.validate.eval_functions.bbox_iou()], ) - - expected_items_locations = [item.image_location for item in slice_items] actual_items_locations = [ item.image_location for item in scenario_test.get_items() ] + + # Assert assert set(actual_items_locations).issubset(expected_items_locations) + + # Clean CLIENT.validate.delete_scenario_test(scenario_test.id) def test_scenario_test_get_scenes( - CLIENT, test_scene_slice, slice_scenes, annotations + CLIENT, + test_scene_slice, + slice_scenes, ): + # Arrange test_name = "scenario_test_" + get_uuid() # use uuid to make unique + expected_scene_reference_ids = [ + scene.reference_id for scene in slice_scenes + ] + # Act scenario_test = CLIENT.validate.create_scenario_test( name=test_name, slice_id=test_scene_slice.id, evaluation_functions=[CLIENT.validate.eval_functions.bbox_iou()], ) - - expected_scene_reference_ids = [ - scene.reference_id for scene in slice_scenes - ] actual_scene_reference_ids = [ scene.reference_id for scene in scenario_test.get_items(level=EntityLevel.SCENE) ] + + # Assert assert set(actual_scene_reference_ids).issubset( expected_scene_reference_ids ) + + # Clean + CLIENT.validate.delete_scenario_test(scenario_test.id) + + +def test_scenario_test_get_tracks( + CLIENT, populated_scene_dataset, test_scene_slice, annotations +): + # Arrange + test_name = "scenario_test_" + get_uuid() + expected_track_reference_ids = list( + set( + [ + ann["track_reference_id"] + for ann in TEST_SCENE_BOX_ANNS_WITH_TRACK + ] + ) + ) + annotations = [ + BoxAnnotation.from_json(ann) for ann in TEST_SCENE_BOX_ANNS_WITH_TRACK + ] + model_reference = "model_" + str(time.time()) + model = CLIENT.create_model(TEST_MODEL_NAME, model_reference) + populated_scene_dataset.upload_predictions( + model=model, + predictions=[ + BoxAnnotation.from_json(ann) + for ann in TEST_SCENE_BOX_PREDS_WITH_TRACK + ], + update=False, + asynchronous=False, + ) + populated_scene_dataset.annotate( + annotations=annotations, + update=False, + asynchronous=False, + ) + try: + CLIENT.validate.create_external_eval_function( + name=EVAL_FUNCTION_NAME, + level=EntityLevel.TRACK, + ) + except Exception: # pylint: disable=W0703 + # Ignore external eval function already created + pass + all_external_fns = CLIENT.validate.eval_functions.external_functions + eval_fn_config = all_external_fns[EVAL_FUNCTION_NAME] + scenario_test = CLIENT.validate.create_scenario_test( + name=test_name, + slice_id=test_scene_slice.id, + evaluation_functions=[eval_fn_config], + ) + scenario_test.upload_external_evaluation_results( + eval_fn_config, + [ + EvaluationResult( + track_ref_id=TEST_TRACK_REFERENCE_ID, + score=1, + weight=1, + ), + ], + model.id, + ) + + # Act + actual_track_reference_ids = [ + track.reference_id + for track in scenario_test.get_items(level=EntityLevel.TRACK) + ] + + # Assert + assert len(actual_track_reference_ids) == len(expected_track_reference_ids) + assert set(actual_track_reference_ids).issubset( + expected_track_reference_ids + ) + + # Clean CLIENT.validate.delete_scenario_test(scenario_test.id) + assert CLIENT.delete_model(model.id) == {} def test_no_criteria_raises_error(CLIENT, test_slice, annotations):