diff --git a/Makefile b/Makefile index 2e56b504..8355c03e 100644 --- a/Makefile +++ b/Makefile @@ -94,6 +94,7 @@ docs-comprehensive: apidocs cd docs && npm run build apidocs: + cd docs && npm install poetry run make html html: diff --git a/generated/docs/ImageQueriesApi.md b/generated/docs/ImageQueriesApi.md index 6426adf8..04299b52 100644 --- a/generated/docs/ImageQueriesApi.md +++ b/generated/docs/ImageQueriesApi.md @@ -208,6 +208,7 @@ with openapi_client.ApiClient(configuration) as api_client: human_review = "human_review_example" # str | If set to `DEFAULT`, use the regular escalation logic (i.e., send the image query for human review if the ML model is not confident). If set to `ALWAYS`, always send the image query for human review even if the ML model is confident. If set to `NEVER`, never send the image query for human review even if the ML model is not confident. (optional) patience_time = 3.14 # float | How long to wait for a confident response. (optional) want_async = "want_async_example" # str | If \"true\" then submitting an image query returns immediately without a result. The result will be computed asynchronously and can be retrieved later. (optional) + metadata = "metadata_example" # str | A dictionary of custom key/value metadata to associate with the image query (limited to 1KB). (optional) body = open('@path/to/image.jpeg', 'rb') # file_type | (optional) # example passing only required values which don't have defaults set @@ -220,7 +221,7 @@ with openapi_client.ApiClient(configuration) as api_client: # example passing only required values which don't have defaults set # and optional values try: - api_response = api_instance.submit_image_query(detector_id, human_review=human_review, patience_time=patience_time, want_async=want_async, body=body) + api_response = api_instance.submit_image_query(detector_id, human_review=human_review, patience_time=patience_time, want_async=want_async, metadata=metadata, body=body) pprint(api_response) except openapi_client.ApiException as e: print("Exception when calling ImageQueriesApi->submit_image_query: %s\n" % e) @@ -235,6 +236,7 @@ Name | Type | Description | Notes **human_review** | **str**| If set to `DEFAULT`, use the regular escalation logic (i.e., send the image query for human review if the ML model is not confident). If set to `ALWAYS`, always send the image query for human review even if the ML model is confident. If set to `NEVER`, never send the image query for human review even if the ML model is not confident. | [optional] **patience_time** | **float**| How long to wait for a confident response. | [optional] **want_async** | **str**| If \"true\" then submitting an image query returns immediately without a result. The result will be computed asynchronously and can be retrieved later. | [optional] + **metadata** | **str**| A dictionary of custom key/value metadata to associate with the image query (limited to 1KB). | [optional] **body** | **file_type**| | [optional] ### Return type diff --git a/generated/docs/ImageQuery.md b/generated/docs/ImageQuery.md index 759118e5..5db66277 100644 --- a/generated/docs/ImageQuery.md +++ b/generated/docs/ImageQuery.md @@ -12,6 +12,7 @@ Name | Type | Description | Notes **detector_id** | **str** | Which detector was used on this image query? | [readonly] **result_type** | **bool, date, datetime, dict, float, int, list, str, none_type** | What type of result are we returning? | [readonly] **result** | **bool, date, datetime, dict, float, int, list, str, none_type** | | [optional] [readonly] +**metadata** | **{str: (bool, date, datetime, dict, float, int, list, str, none_type)}, none_type** | A dictionary of custom key/value metadata to associate with the image query (limited to 1KB). | [optional] [readonly] **any string name** | **bool, date, datetime, dict, float, int, list, str, none_type** | any string name can be used but the value must be the correct type | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/generated/model.py b/generated/model.py index 6b62b9ae..24a7d303 100644 --- a/generated/model.py +++ b/generated/model.py @@ -1,12 +1,12 @@ # generated by datamodel-codegen: # filename: public-api.yaml -# timestamp: 2023-10-16T23:29:00+00:00 +# timestamp: 2023-11-09T05:00:29+00:00 from __future__ import annotations from datetime import datetime from enum import Enum -from typing import List, Optional +from typing import Any, Dict, List, Optional from pydantic import AnyUrl, BaseModel, Field, confloat, constr @@ -70,6 +70,10 @@ class ImageQuery(BaseModel): detector_id: str = Field(..., description="Which detector was used on this image query?") result_type: ResultTypeEnum = Field(..., description="What type of result are we returning?") result: Optional[ClassificationResult] = None + metadata: Optional[Dict[str, Any]] = Field( + None, + description="A dictionary of custom key/value metadata to associate with the image query (limited to 1KB).", + ) class PaginatedDetectorList(BaseModel): diff --git a/generated/openapi_client/api/image_queries_api.py b/generated/openapi_client/api/image_queries_api.py index eb2ff05b..91ba8f0e 100644 --- a/generated/openapi_client/api/image_queries_api.py +++ b/generated/openapi_client/api/image_queries_api.py @@ -134,6 +134,7 @@ def __init__(self, api_client=None): "human_review", "patience_time", "want_async", + "metadata", "body", ], "required": [ @@ -151,6 +152,7 @@ def __init__(self, api_client=None): "human_review": (str,), "patience_time": (float,), "want_async": (str,), + "metadata": (str,), "body": (file_type,), }, "attribute_map": { @@ -158,12 +160,14 @@ def __init__(self, api_client=None): "human_review": "human_review", "patience_time": "patience_time", "want_async": "want_async", + "metadata": "metadata", }, "location_map": { "detector_id": "query", "human_review": "query", "patience_time": "query", "want_async": "query", + "metadata": "query", "body": "body", }, "collection_format_map": {}, @@ -304,6 +308,7 @@ def submit_image_query(self, detector_id, **kwargs): human_review (str): If set to `DEFAULT`, use the regular escalation logic (i.e., send the image query for human review if the ML model is not confident). If set to `ALWAYS`, always send the image query for human review even if the ML model is confident. If set to `NEVER`, never send the image query for human review even if the ML model is not confident. . [optional] patience_time (float): How long to wait for a confident response.. [optional] want_async (str): If \"true\" then submitting an image query returns immediately without a result. The result will be computed asynchronously and can be retrieved later.. [optional] + metadata (str): A dictionary of custom key/value metadata to associate with the image query (limited to 1KB).. [optional] body (file_type): [optional] _return_http_data_only (bool): response data without head status code and headers. Default is True. diff --git a/generated/openapi_client/model/image_query.py b/generated/openapi_client/model/image_query.py index b1d5deec..71d4d7a8 100644 --- a/generated/openapi_client/model/image_query.py +++ b/generated/openapi_client/model/image_query.py @@ -138,6 +138,10 @@ def openapi_types(): str, none_type, ), # noqa: E501 + "metadata": ( + {str: (bool, date, datetime, dict, float, int, list, str, none_type)}, + none_type, + ), # noqa: E501 } @cached_property @@ -152,6 +156,7 @@ def discriminator(): "detector_id": "detector_id", # noqa: E501 "result_type": "result_type", # noqa: E501 "result": "result", # noqa: E501 + "metadata": "metadata", # noqa: E501 } read_only_vars = { @@ -162,6 +167,7 @@ def discriminator(): "detector_id", # noqa: E501 "result_type", # noqa: E501 "result", # noqa: E501 + "metadata", # noqa: E501 } _composed_schemas = {} @@ -211,6 +217,7 @@ def _from_openapi_data(cls, id, type, created_at, query, detector_id, result_typ through its discriminator because we passed in _visited_composed_classes = (Animal,) result (bool, date, datetime, dict, float, int, list, str, none_type): [optional] # noqa: E501 + metadata ({str: (bool, date, datetime, dict, float, int, list, str, none_type)}, none_type): A dictionary of custom key/value metadata to associate with the image query (limited to 1KB).. [optional] # noqa: E501 """ _check_type = kwargs.pop("_check_type", True) @@ -304,6 +311,7 @@ def __init__(self, *args, **kwargs): # noqa: E501 through its discriminator because we passed in _visited_composed_classes = (Animal,) result (bool, date, datetime, dict, float, int, list, str, none_type): [optional] # noqa: E501 + metadata ({str: (bool, date, datetime, dict, float, int, list, str, none_type)}, none_type): A dictionary of custom key/value metadata to associate with the image query (limited to 1KB).. [optional] # noqa: E501 """ _check_type = kwargs.pop("_check_type", True) diff --git a/pyproject.toml b/pyproject.toml index ebecead3..ac492e15 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ packages = [ {include = "**/*.py", from = "src"}, ] readme = "README.md" -version = "0.12.1" +version = "0.13.0" [tool.poetry.dependencies] # For certifi, use ">=" instead of "^" since it upgrades its "major version" every year, not really following semver diff --git a/spec/public-api.yaml b/spec/public-api.yaml index 8dd436ea..7e7c2cb9 100644 --- a/spec/public-api.yaml +++ b/spec/public-api.yaml @@ -153,6 +153,14 @@ paths: schema: type: string description: If "true" then submitting an image query returns immediately without a result. The result will be computed asynchronously and can be retrieved later. + - in: query + name: metadata + schema: + type: string + required: false + description: + A dictionary of custom key/value metadata to associate with the image + query (limited to 1KB). tags: - image-queries requestBody: @@ -339,6 +347,13 @@ components: allOf: - $ref: "#/components/schemas/ClassificationResult" readOnly: true + metadata: + type: object + readOnly: true + nullable: true + description: + A dictionary of custom key/value metadata to associate with the image + query (limited to 1KB). required: - created_at - detector_id diff --git a/src/groundlight/client.py b/src/groundlight/client.py index 081b2f71..d113a096 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -13,6 +13,7 @@ from groundlight.binary_labels import Label, convert_display_label_to_internal, convert_internal_label_to_display from groundlight.config import API_TOKEN_HELP_MESSAGE, API_TOKEN_VARIABLE_NAME +from groundlight.encodings import url_encode_dict from groundlight.images import ByteStreamWrapper, parse_supported_image_types from groundlight.internalapi import ( GroundlightApiClient, @@ -289,6 +290,7 @@ def submit_image_query( # noqa: PLR0913 # pylint: disable=too-many-arguments, t human_review: Optional[str] = None, want_async: bool = False, inspection_id: Optional[str] = None, + metadata: Union[dict, str, None] = None, ) -> ImageQuery: """ Evaluates an image with Groundlight. @@ -334,6 +336,11 @@ def submit_image_query( # noqa: PLR0913 # pylint: disable=too-many-arguments, t this is the ID of the inspection to associate with the image query. :type inspection_id: str + :param metadata: A dictionary or JSON string of custom key/value metadata to associate with + the image query (limited to 1KB). You can retrieve this metadata later by calling + `get_image_query()`. + :type metadata: dict or str + :return: ImageQuery :rtype: ImageQuery """ @@ -360,6 +367,12 @@ def submit_image_query( # noqa: PLR0913 # pylint: disable=too-many-arguments, t ) params["want_async"] = str(bool(want_async)) + if metadata is not None: + # Currently, our backend server puts the image in the body data of the API request, + # which means we need to put the metadata in the query string. To do that safely, we + # url- and base64-encode the metadata. + params["metadata"] = url_encode_dict(metadata, name="metadata", size_limit_bytes=1024) + # If no inspection_id is provided, we submit the image query using image_queries_api (autogenerated via OpenAPI) # However, our autogenerated code does not currently support inspection_id, so if an inspection_id was # provided, we use the private API client instead. @@ -380,12 +393,13 @@ def submit_image_query( # noqa: PLR0913 # pylint: disable=too-many-arguments, t return self._fixup_image_query(image_query) - def ask_confident( + def ask_confident( # noqa: PLR0913 # pylint: disable=too-many-arguments self, detector: Union[Detector, str], image: Union[str, bytes, Image.Image, BytesIO, BufferedReader, np.ndarray], confidence_threshold: Optional[float] = None, wait: Optional[float] = None, + metadata: Union[dict, str, None] = None, ) -> ImageQuery: """ Evaluates an image with Groundlight waiting until an answer above the confidence threshold @@ -411,6 +425,11 @@ def ask_confident( :param wait: How long to wait (in seconds) for a confident answer. :type wait: float + :param metadata: A dictionary or JSON string of custom key/value metadata to associate with + the image query (limited to 1KB). You can retrieve this metadata later by calling + `get_image_query()`. + :type metadata: dict or str + :return: ImageQuery :rtype: ImageQuery """ @@ -421,6 +440,7 @@ def ask_confident( wait=wait, patience_time=wait, human_review=None, + metadata=metadata, ) def ask_ml( @@ -428,6 +448,7 @@ def ask_ml( detector: Union[Detector, str], image: Union[str, bytes, Image.Image, BytesIO, BufferedReader, np.ndarray], wait: Optional[float] = None, + metadata: Union[dict, str, None] = None, ) -> ImageQuery: """ Evaluates an image with Groundlight, getting the first answer Groundlight can provide. @@ -448,6 +469,11 @@ def ask_ml( :param wait: How long to wait (in seconds) for any answer. :type wait: float + :param metadata: A dictionary or JSON string of custom key/value metadata to associate with + the image query (limited to 1KB). You can retrieve this metadata later by calling + `get_image_query()`. + :type metadata: dict or str + :return: ImageQuery :rtype: ImageQuery """ @@ -455,6 +481,7 @@ def ask_ml( detector, image, wait=0, + metadata=metadata, ) if iq_is_answered(iq): return iq @@ -468,6 +495,7 @@ def ask_async( # noqa: PLR0913 # pylint: disable=too-many-arguments patience_time: Optional[float] = None, confidence_threshold: Optional[float] = None, human_review: Optional[str] = None, + metadata: Union[dict, str, None] = None, ) -> ImageQuery: """ Convenience method for submitting an `ImageQuery` asynchronously. This is equivalent to calling @@ -509,6 +537,11 @@ def ask_async( # noqa: PLR0913 # pylint: disable=too-many-arguments this is the ID of the inspection to associate with the image query. :type inspection_id: str + :param metadata: A dictionary or JSON string of custom key/value metadata to associate with + the image query (limited to 1KB). You can retrieve this metadata later by calling + `get_image_query()`. + :type metadata: dict or str + :return: ImageQuery :rtype: ImageQuery @@ -552,6 +585,7 @@ def ask_async( # noqa: PLR0913 # pylint: disable=too-many-arguments confidence_threshold=confidence_threshold, human_review=human_review, want_async=True, + metadata=metadata, ) def wait_for_confident_result( diff --git a/src/groundlight/encodings.py b/src/groundlight/encodings.py new file mode 100644 index 00000000..56d7a895 --- /dev/null +++ b/src/groundlight/encodings.py @@ -0,0 +1,45 @@ +import base64 +import json +import sys +from typing import Dict, Optional, Union + + +def url_encode_dict(maybe_dict: Union[Dict, str], name: str, size_limit_bytes: Optional[int] = None) -> str: + """Encode a dictionary as a URL-safe, base64-encoded JSON string. + + :param maybe_dict: The dictionary or JSON string to encode. + :type maybe_dict: dict or str + + :param name: The name of the dictionary, for use in the error message. + :type name: str + + :param size_limit_bytes: The maximum size of the dictionary, in bytes. + If `None`, no size limit is enforced. + :type size_limit_bytes: int or None + + :raises TypeError: If `maybe_dict` is not a dictionary or JSON string. + :raises ValueError: If `maybe_dict` is too large. + + :return: The URL-safe, base64-encoded JSON string. + :rtype: str + """ + original_type = type(maybe_dict) + if isinstance(maybe_dict, str): + try: + # It's a little inefficient to parse the JSON string, just to re-encode it later. But it + # allows us to check that we get a valid dictionary, and we remove any whitespace. + maybe_dict = json.loads(maybe_dict) + except json.JSONDecodeError as e: + raise TypeError(f"`{name}` must be a dictionary or JSON string: got {original_type}") from e + + if not isinstance(maybe_dict, dict): + raise TypeError(f"`{name}` must be a dictionary or JSON string: got {original_type}") + + data_json = json.dumps(maybe_dict) + + if size_limit_bytes is not None: + size_bytes = sys.getsizeof(data_json) + if size_bytes > size_limit_bytes: + raise ValueError(f"`{name}` is too large: {size_bytes} bytes > {size_limit_bytes} bytes limit.") + + return base64.urlsafe_b64encode(data_json.encode("utf-8")).decode("utf-8") diff --git a/test/integration/test_groundlight.py b/test/integration/test_groundlight.py index 06acd4bd..f8438650 100644 --- a/test/integration/test_groundlight.py +++ b/test/integration/test_groundlight.py @@ -1,9 +1,10 @@ # Optional star-imports are weird and not usually recommended ... # ruff: noqa: F403,F405 # pylint: disable=wildcard-import,unused-wildcard-import,redefined-outer-name,import-outside-toplevel +import json import time from datetime import datetime -from typing import Any +from typing import Any, Dict, Optional, Union import openapi_client import pytest @@ -62,6 +63,11 @@ def fixture_image_query_no(gl: Groundlight, detector: Detector) -> ImageQuery: return iq +@pytest.fixture(name="image") +def fixture_image() -> str: + return "test/assets/dog.jpeg" + + def test_create_detector(gl: Groundlight): name = f"Test {datetime.utcnow()}" # Need a unique name query = "Is there a dog?" @@ -246,6 +252,50 @@ def test_submit_image_query_with_human_review_param(gl: Groundlight, detector: D assert is_valid_display_result(_image_query.result) +@pytest.mark.parametrize("metadata", [None, {}, {"a": 1}, '{"a": 1}']) +def test_submit_image_query_with_metadata( + gl: Groundlight, detector: Detector, image: str, metadata: Union[Dict, str, None] +): + # We expect the returned value to be a dict + expected_metadata: Optional[Dict] = json.loads(metadata) if isinstance(metadata, str) else metadata + + iq = gl.submit_image_query(detector=detector.id, image=image, human_review="NEVER", metadata=metadata) + assert iq.metadata == expected_metadata + + # Test that we can retrieve the metadata from the server at a later time + retrieved_iq = gl.get_image_query(id=iq.id) + assert retrieved_iq.metadata == expected_metadata + + +def test_ask_methods_with_metadata(gl: Groundlight, detector: Detector, image: str): + metadata = {"a": 1} + iq = gl.ask_ml(detector=detector.id, image=image, metadata=metadata) + assert iq.metadata == metadata + + iq = gl.ask_async(detector=detector.id, image=image, human_review="NEVER", metadata=metadata) + assert iq.metadata == metadata + + # `ask_confident()` can make our unit tests take longer, so we don't include it here. + # iq = gl.ask_confident(detector=detector.id, image=image, metadata=metadata) + # assert iq.metadata == metadata + + +@pytest.mark.parametrize("metadata", ["", "a", b'{"a": 1}']) +def test_submit_image_query_with_invalid_metadata(gl: Groundlight, detector: Detector, image: str, metadata: Any): + with pytest.raises(TypeError): + gl.submit_image_query(detector=detector.id, image=image, human_review="NEVER", metadata=metadata) + + +def test_submit_image_query_with_metadata_too_large(gl: Groundlight, detector: Detector, image: str): + with pytest.raises(ValueError): + gl.submit_image_query( + detector=detector.id, + image=image, + human_review="NEVER", + metadata={"a": "b" * 2000}, # More than 1KB + ) + + def test_submit_image_query_jpeg_bytes(gl: Groundlight, detector: Detector): jpeg = open("test/assets/dog.jpeg", "rb").read() _image_query = gl.submit_image_query(detector=detector.id, image=jpeg, human_review="NEVER")