From 1f3c33d186c0d75240bdcce5925f11c1ec5656d8 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Wed, 5 Nov 2025 13:34:30 -0800 Subject: [PATCH 01/47] in work --- .../_internal/low_level_wrappers/ingestion.py | 44 ++++++- .../sift_client/_internal/util/sift_stream.py | 30 +++++ python/lib/sift_client/resources/ingestion.py | 109 +++++++++++++++++- 3 files changed, 180 insertions(+), 3 deletions(-) create mode 100644 python/lib/sift_client/_internal/util/sift_stream.py diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index 8ebd9201a..1b3948f20 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -34,6 +34,7 @@ from sift_client.sift_types.ingestion import Flow, IngestionConfig, _to_rust_value from sift_client.transport import GrpcClient, WithGrpcClient from sift_client.util import cel_utils as cel +from sift_stream_bindings import RunSelectorPy, SiftStreamBuilderPy logger = logging.getLogger(__name__) @@ -43,8 +44,12 @@ from sift_stream_bindings import ( IngestionConfigFormPy, IngestWithConfigDataStreamRequestPy, - SiftStreamBuilderPy, TimeValuePy, + SiftStreamPy, + RecoveryStrategyPy, + DurationPy, + RunFormPy, + MetadataPy, ) @@ -471,3 +476,40 @@ def ingest_flow( self.stream_cache[flow.ingestion_config_id] = self._new_ingestion_thread( flow.ingestion_config_id, ingestion_config ) + + + + +class IngestionConfigStreamingLowLevelClient(LowLevelClientBase): + _sift_stream_instance: SiftStreamPy + async def __init__( + self, + api_key: str, + grpc_uri: str, + ingestion_config: IngestionConfigFormPy | None = None, + run_selector: str | RunFormPy | None = None, + asset_tags: list[str] | None = None, + asset_metadata: list[MetadataPy] | None = None, + recovery_strategy: RecoveryStrategyPy | None = None, + checkpoint_interval: DurationPy | None = None, + enable_tls: bool = True + ): + super().__init__() + builder = SiftStreamBuilderPy( + uri = grpc_uri, + apikey = api_key, + ) + + builder.enable_tls = enable_tls + builder.ingestion_config = ingestion_config + builder.recovery_strategy = recovery_strategy + builder.checkpoint_interval = checkpoint_interval + builder.asset_tags = asset_tags + builder.asset_metadata = asset_metadata + + if isinstance(run_selector, str): + builder.run_id = run_selector + elif isinstance(run_selector, RunFormPy): + builder.run = run_selector + + self._sift_stream_instance = await builder.build() diff --git a/python/lib/sift_client/_internal/util/sift_stream.py b/python/lib/sift_client/_internal/util/sift_stream.py new file mode 100644 index 000000000..2009bbfe0 --- /dev/null +++ b/python/lib/sift_client/_internal/util/sift_stream.py @@ -0,0 +1,30 @@ +from sift_client.sift_types.run import Run, RunCreate, Tag +from sift_stream_bindings import MetadataPy, MetadataValuePy, RunFormPy, RunSelectorPy + +def to_runFormPy(create: RunCreate) -> RunFormPy: + + if create.client_key: + client_key = create.client_key + else: + client_key = create.name + + if create.tags: + tags = [tag.name if isinstance(tag, Tag) else tag for tag in create.tags] + else: + tags = None + + if create.metadata: + metadata = [] + for key, value in create.metadata.items(): + metadata.append(MetadataPy(key=key, value=MetadataValuePy(value))) + else: + metadata = None + + return RunFormPy( + name=create.name, + client_key=client_key, + description=create.description, + tags=tags, + metadata=metadata, + ) + diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index a04368c21..3d629e076 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -3,14 +3,17 @@ import logging from typing import TYPE_CHECKING, Any -from sift_client._internal.low_level_wrappers.ingestion import IngestionLowLevelClient +from sift_client._internal.low_level_wrappers.ingestion import IngestionConfigStreamingLowLevelClient, IngestionLowLevelClient from sift_client.resources._base import ResourceBase +from sift_client._internal.util.sift_stream import to_runFormPy if TYPE_CHECKING: from datetime import datetime from sift_client.client import SiftClient - from sift_client.sift_types.ingestion import Flow + from sift_client.sift_types.ingestion import Flow, IngestionConfig + from sift_client.sift_types.run import Run, RunCreate, Tag + from sift_stream_bindings import RunFormPy, RunSelectorPy logger = logging.getLogger(__name__) @@ -33,6 +36,66 @@ def __init__(self, sift_client: SiftClient): """ super().__init__(sift_client) self._low_level_client = IngestionLowLevelClient(grpc_client=self.client.grpc_client) + + + def create_ingestion_config_streaming_client( + self, + *, + ingestion_config, + run, + asset_tags, + asset_metadata, + recovery_strategy, + checkpoint_interval, + enable_tls: bool = True, + ) -> IngestionConfigStreamingClient: + return IngestionConfigStreamingClient( + self.client, + ingestion_config=ingestion_config, + run=run, + asset_tags=asset_tags, + asset_metadata=asset_metadata, + recovery_strategy=recovery_strategy, + checkpoint_interval=checkpoint_interval, + enable_tls=enable_tls + ) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + async def create_ingestion_config( self, @@ -104,3 +167,45 @@ def wait_for_ingestion_to_complete(self, timeout: float | None = None): """ logger.info("Waiting for ingestion to complete") self._low_level_client.wait_for_ingestion_to_complete(timeout) + + +class IngestionConfigStreamingClient(ResourceBase): + def __init__( + self, + sift_client: SiftClient, + *, + ingestion_config: IngestionConfig | None = None, + run: RunCreate | dict | str | Run | None = None, + asset_tags: list[str] | list[Tag] | None = None, + asset_metadata: dict[str, str | float | bool] | None = None, + recovery_strategy: IngestionRecoveryStrategy | None = None, + checkpoint_interval_seconds: int | None = None, + enable_tls: bool = True, + ): + super().__init__(sift_client) + + # Convert the various run varients to a RunSelectorPy + if isinstance(run, dict): + run_create = RunCreate.model_validate(run) + run_form = to_runFormPy(run_create) + run_selector = RunSelectorPy.by_form(run_form) + elif isinstance(run, Run): + run_selector = RunSelectorPy.by_id(run.id_) + elif isinstance(run, RunCreate): + run_form = to_runFormPy(run) + run_selector = RunSelectorPy.by_form(run_form) + elif isinstance(run, str): + run_selector = RunSelectorPy.by_id(run) + else: + run_selector = None + + + self._low_level_client = await IngestionConfigStreamingLowLevelClient( + ingestion_config, + run = run_selector, + asset_tags, + asset_metadata, + recovery_strategy=recovery_strategy, + checkpoint_interval=checkpoint_interval, + enable_tls=enable_tls, + ) From c1023e717afdf466c3908893f4ad6c187162e5cb Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Wed, 5 Nov 2025 14:36:40 -0800 Subject: [PATCH 02/47] in work 2 --- .../_internal/low_level_wrappers/ingestion.py | 112 +++++++-- python/lib/sift_client/resources/ingestion.py | 226 +++++++++--------- 2 files changed, 211 insertions(+), 127 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index 1b3948f20..e673285ba 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -32,15 +32,30 @@ LowLevelClientBase, ) from sift_client.sift_types.ingestion import Flow, IngestionConfig, _to_rust_value +from sift_client._internal.util.sift_stream import to_runFormPy from sift_client.transport import GrpcClient, WithGrpcClient from sift_client.util import cel_utils as cel -from sift_stream_bindings import RunSelectorPy, SiftStreamBuilderPy +from sift_stream_bindings import ( + DurationPy, + IngestionConfigFormPy, + MetadataPy, + RecoveryStrategyPy, + RetryPolicyPy, + RunSelectorPy, + MetadataValuePy, + SiftStreamBuilderPy, + FlowPy, + FlowConfigPy, + SiftStreamMetricsSnapshotPy, +) logger = logging.getLogger(__name__) if TYPE_CHECKING: from datetime import datetime + from sift_client.sift_types.run import RunCreate, Run, Tag + from sift_stream_bindings import ( IngestionConfigFormPy, IngestWithConfigDataStreamRequestPy, @@ -482,19 +497,59 @@ def ingest_flow( class IngestionConfigStreamingLowLevelClient(LowLevelClientBase): _sift_stream_instance: SiftStreamPy - async def __init__( - self, + + def __init__(self, sift_stream_instance: SiftStreamPy): + super().__init__() + self._sift_stream_instance = sift_stream_instance + + @classmethod + async def create_sift_stream_instance( + cls, api_key: str, grpc_uri: str, ingestion_config: IngestionConfigFormPy | None = None, - run_selector: str | RunFormPy | None = None, - asset_tags: list[str] | None = None, - asset_metadata: list[MetadataPy] | None = None, + run: RunCreate | dict | str | Run | None = None, + asset_tags: list[str] | list[Tag] | None = None, + asset_metadata: dict[str, str | float | bool] | None = None, recovery_strategy: RecoveryStrategyPy | None = None, - checkpoint_interval: DurationPy | None = None, - enable_tls: bool = True - ): - super().__init__() + checkpoint_interval_seconds: int | None = None, + enable_tls: bool = True, + ) -> IngestionConfigStreamingLowLevelClient: + # Convert the various run variants to a run or run_id + run_form: RunFormPy | None = None + run_id: str | None = None + + if isinstance(run, dict): + run_create = RunCreate.model_validate(run) + run_form = to_runFormPy(run_create) + elif isinstance(run, Run): + run_id = run.id_ + elif isinstance(run, RunCreate): + run_form = to_runFormPy(run) + elif isinstance(run, str): + run_id = run + + # Convert checkpoint_interval_seconds to DurationPy + checkpoint_interval: DurationPy | None = None + if checkpoint_interval_seconds is not None: + checkpoint_interval = DurationPy(secs=checkpoint_interval_seconds, nanos=0) + + # Convert asset_tags to list of strings + asset_tags_list: list[str] | None = None + if asset_tags is not None: + asset_tags_list = [ + tag.name if isinstance(tag, Tag) else tag for tag in asset_tags + ] + + # Convert asset_metadata dict to list of MetadataPy + asset_metadata_list: list[MetadataPy] | None = None + if asset_metadata is not None: + from sift_stream_bindings import MetadataPy + + asset_metadata_list = [ + MetadataPy(key=key, value=MetadataValuePy(value)) for key, value in asset_metadata.items() + ] + builder = SiftStreamBuilderPy( uri = grpc_uri, apikey = api_key, @@ -504,12 +559,35 @@ async def __init__( builder.ingestion_config = ingestion_config builder.recovery_strategy = recovery_strategy builder.checkpoint_interval = checkpoint_interval - builder.asset_tags = asset_tags - builder.asset_metadata = asset_metadata + builder.asset_tags = asset_tags_list + builder.asset_metadata = asset_metadata_list + builder.run = run_form + builder.run_id = run_id + + sift_stream_instance = await builder.build() + + return cls(sift_stream_instance) + + async def send(self, flow: FlowPy): + await self._sift_stream_instance.send(flow) + + async def send_requests(self, requests: list[IngestWithConfigDataStreamRequestPy]): + await self._sift_stream_instance.send_requests(requests) + + async def add_new_flows(self, flow_configs: list[FlowConfigPy]): + await self._sift_stream_instance.add_new_flows(flow_configs) + + async def attach_run(self, run_selector: RunSelectorPy): + await self._sift_stream_instance.attach_run(run_selector) + + def detach_run(self): + self._sift_stream_instance.detach_run() + + def get_run_id(self) -> str | None: + return self._sift_stream_instance.run() - if isinstance(run_selector, str): - builder.run_id = run_selector - elif isinstance(run_selector, RunFormPy): - builder.run = run_selector + async def finish(self): + await self._sift_stream_instance.finish() - self._sift_stream_instance = await builder.build() + def get_metrics_snapshot(self) -> SiftStreamMetricsSnapshotPy: + return self._sift_stream_instance.get_metrics_snapshot() \ No newline at end of file diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 3d629e076..170525ff8 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -3,17 +3,31 @@ import logging from typing import TYPE_CHECKING, Any -from sift_client._internal.low_level_wrappers.ingestion import IngestionConfigStreamingLowLevelClient, IngestionLowLevelClient +from sift_client._internal.low_level_wrappers.ingestion import ( + IngestionConfigStreamingLowLevelClient, + IngestionLowLevelClient, +) from sift_client.resources._base import ResourceBase from sift_client._internal.util.sift_stream import to_runFormPy +from sift_client.sift_types.ingestion import IngestionConfig +from sift_client.sift_types.run import Run, RunCreate, Tag +from sift_stream_bindings import ( + DurationPy, + IngestionConfigFormPy, + MetadataPy, + RecoveryStrategyPy, + RetryPolicyPy, + RunSelectorPy, + MetadataValuePy, + IngestWithConfigDataStreamRequestPy, + SiftStreamMetricsSnapshotPy, +) if TYPE_CHECKING: from datetime import datetime from sift_client.client import SiftClient - from sift_client.sift_types.ingestion import Flow, IngestionConfig - from sift_client.sift_types.run import Run, RunCreate, Tag - from sift_stream_bindings import RunFormPy, RunSelectorPy + from sift_client.sift_types.ingestion import Flow logger = logging.getLogger(__name__) @@ -38,65 +52,42 @@ def __init__(self, sift_client: SiftClient): self._low_level_client = IngestionLowLevelClient(grpc_client=self.client.grpc_client) - def create_ingestion_config_streaming_client( + async def create_ingestion_config_streaming_client( self, *, - ingestion_config, - run, - asset_tags, - asset_metadata, - recovery_strategy, - checkpoint_interval, + ingestion_config: IngestionConfig | None = None, + run: RunCreate | dict | str | Run | None = None, + asset_tags: list[str] | list[Tag] | None = None, + asset_metadata: dict[str, str | float | bool] | None = None, + recovery_strategy: RecoveryStrategyPy | None = None, + checkpoint_interval_seconds: int | None = None, enable_tls: bool = True, ) -> IngestionConfigStreamingClient: - return IngestionConfigStreamingClient( + """Create an IngestionConfigStreamingClient. + + Args: + ingestion_config: The ingestion config. + run: The run to associate with ingestion. Can be a Run, RunCreate, dict, or run ID string. + asset_tags: Tags to associate with the asset. + asset_metadata: Metadata to associate with the asset. + recovery_strategy: The recovery strategy to use for ingestion. + checkpoint_interval_seconds: The checkpoint interval in seconds. + enable_tls: Whether to enable TLS for the connection. + + Returns: + An initialized IngestionConfigStreamingClient. + """ + return await IngestionConfigStreamingClient.create( self.client, ingestion_config=ingestion_config, run=run, asset_tags=asset_tags, asset_metadata=asset_metadata, recovery_strategy=recovery_strategy, - checkpoint_interval=checkpoint_interval, - enable_tls=enable_tls + checkpoint_interval_seconds=checkpoint_interval_seconds, + enable_tls=enable_tls, ) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - async def create_ingestion_config( self, *, @@ -104,9 +95,8 @@ async def create_ingestion_config( run_id: str | None = None, flows: list[Flow], client_key: str | None = None, - organization_id: str | None = None, ) -> str: - """Create an ingestion config. + """Create an ingestion config. This is provided for direct use of the ingestion config API, and not the preferred way to create ingestion configs for streaming through SiftClient. Args: asset_name: The name of the asset for this ingestion config. @@ -138,74 +128,90 @@ async def create_ingestion_config( return ingestion_config_id - def ingest( - self, - *, - flow: Flow, - timestamp: datetime, - channel_values: dict[str, Any], - ): - """Ingest data for a flow. - - Args: - flow: The flow to ingest data for. - timestamp: The timestamp of the data. - channel_values: Dictionary mapping channel names to their values. - """ - self._low_level_client.ingest_flow( - flow=flow, - timestamp=timestamp, - channel_values=channel_values, - ) - def wait_for_ingestion_to_complete(self, timeout: float | None = None): - """Wait for all ingestion to complete. - - Args: - run_id: The id of the run to wait for. - timeout: The timeout in seconds to wait for ingestion to complete. If None, will wait forever. - """ - logger.info("Waiting for ingestion to complete") - self._low_level_client.wait_for_ingestion_to_complete(timeout) +class IngestionConfigStreamingClient(ResourceBase): + """A client for streaming ingestion with an ingestion config. + This client provides a high-level interface for streaming ingestion using + an ingestion config. It handles conversion of user-friendly types to the + low-level Rust bindings. + """ + def __init__(self, sift_client: SiftClient, low_level_client: IngestionConfigStreamingLowLevelClient): + """Initialize an IngestionConfigStreamingClient. Users should not initialize this class directly, but rather use the create classmethod.""" + super().__init__(sift_client) + self._low_level_client = low_level_client -class IngestionConfigStreamingClient(ResourceBase): - def __init__( - self, + @classmethod + async def create( + cls, sift_client: SiftClient, *, - ingestion_config: IngestionConfig | None = None, + ingestion_config: IngestionConfigFormPy | None = None, run: RunCreate | dict | str | Run | None = None, asset_tags: list[str] | list[Tag] | None = None, asset_metadata: dict[str, str | float | bool] | None = None, - recovery_strategy: IngestionRecoveryStrategy | None = None, + recovery_strategy: RecoveryStrategyPy | None = None, checkpoint_interval_seconds: int | None = None, enable_tls: bool = True, - ): - super().__init__(sift_client) + ) -> IngestionConfigStreamingClient: + """Create an IngestionConfigStreamingClient. + + Args: + sift_client: The Sift client to use. + ingestion_config: The ingestion config (IngestionConfig or IngestionConfigFormPy). + If IngestionConfig is provided, you must also provide flows separately. + run: The run to associate with ingestion. Can be a Run, RunCreate, dict, or run ID string. + asset_tags: Tags to associate with the asset. + asset_metadata: Metadata to associate with the asset. + recovery_strategy: The recovery strategy to use for ingestion. + checkpoint_interval_seconds: The checkpoint interval in seconds. + enable_tls: Whether to enable TLS for the connection. - # Convert the various run varients to a RunSelectorPy - if isinstance(run, dict): - run_create = RunCreate.model_validate(run) - run_form = to_runFormPy(run_create) - run_selector = RunSelectorPy.by_form(run_form) - elif isinstance(run, Run): - run_selector = RunSelectorPy.by_id(run.id_) - elif isinstance(run, RunCreate): - run_form = to_runFormPy(run) - run_selector = RunSelectorPy.by_form(run_form) - elif isinstance(run, str): - run_selector = RunSelectorPy.by_id(run) - else: - run_selector = None - - - self._low_level_client = await IngestionConfigStreamingLowLevelClient( - ingestion_config, - run = run_selector, - asset_tags, - asset_metadata, + Returns: + An initialized IngestionConfigStreamingClient. + """ + instance = cls.__new__(cls) + instance._sift_client = sift_client + + # Get API key and gRPC URI from the client + grpc_config = sift_client.grpc_client._config + api_key = grpc_config.api_key + grpc_uri = grpc_config.uri + + low_level_client = await IngestionConfigStreamingLowLevelClient.create_sift_stream_instance( + api_key=api_key, + grpc_uri=grpc_uri, + ingestion_config=ingestion_config, + run=run, + asset_tags=asset_tags, + asset_metadata=asset_metadata, recovery_strategy=recovery_strategy, - checkpoint_interval=checkpoint_interval, + checkpoint_interval_seconds=checkpoint_interval_seconds, enable_tls=enable_tls, ) + + return cls(sift_client, low_level_client) + + async def send(self, flow: Flow): + pass + + async def send_requests(self, requests: list[IngestWithConfigDataStreamRequestPy]): + pass + + async def add_new_flows(self, flow_configs: list[FlowConfigPy]): + pass + + async def attach_run(self, run: RunFormPy): + pass + + def detach_run(self): + pass + + def get_run_id(self) -> str | None: + pass + + async def finish(self): + pass + + def get_metrics_snapshot(self) -> SiftStreamMetricsSnapshotPy: + pass \ No newline at end of file From 579f7654ac859382619f3f9cd404a28bad7b60e6 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Wed, 5 Nov 2025 16:44:41 -0800 Subject: [PATCH 03/47] completed layout --- .../_internal/low_level_wrappers/ingestion.py | 48 ++-------- python/lib/sift_client/resources/ingestion.py | 93 +++++++++++++++---- .../lib/sift_client/sift_types/ingestion.py | 34 ++----- 3 files changed, 90 insertions(+), 85 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index e673285ba..4eb9f1e44 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -508,48 +508,14 @@ async def create_sift_stream_instance( api_key: str, grpc_uri: str, ingestion_config: IngestionConfigFormPy | None = None, - run: RunCreate | dict | str | Run | None = None, - asset_tags: list[str] | list[Tag] | None = None, - asset_metadata: dict[str, str | float | bool] | None = None, + run_form: RunFormPy | None = None, + run_id: str | None = None, + asset_tags: list[str] | None = None, + asset_metadata: list[MetadataPy] | None = None, recovery_strategy: RecoveryStrategyPy | None = None, - checkpoint_interval_seconds: int | None = None, + checkpoint_interval: DurationPy | None = None, enable_tls: bool = True, ) -> IngestionConfigStreamingLowLevelClient: - # Convert the various run variants to a run or run_id - run_form: RunFormPy | None = None - run_id: str | None = None - - if isinstance(run, dict): - run_create = RunCreate.model_validate(run) - run_form = to_runFormPy(run_create) - elif isinstance(run, Run): - run_id = run.id_ - elif isinstance(run, RunCreate): - run_form = to_runFormPy(run) - elif isinstance(run, str): - run_id = run - - # Convert checkpoint_interval_seconds to DurationPy - checkpoint_interval: DurationPy | None = None - if checkpoint_interval_seconds is not None: - checkpoint_interval = DurationPy(secs=checkpoint_interval_seconds, nanos=0) - - # Convert asset_tags to list of strings - asset_tags_list: list[str] | None = None - if asset_tags is not None: - asset_tags_list = [ - tag.name if isinstance(tag, Tag) else tag for tag in asset_tags - ] - - # Convert asset_metadata dict to list of MetadataPy - asset_metadata_list: list[MetadataPy] | None = None - if asset_metadata is not None: - from sift_stream_bindings import MetadataPy - - asset_metadata_list = [ - MetadataPy(key=key, value=MetadataValuePy(value)) for key, value in asset_metadata.items() - ] - builder = SiftStreamBuilderPy( uri = grpc_uri, apikey = api_key, @@ -559,8 +525,8 @@ async def create_sift_stream_instance( builder.ingestion_config = ingestion_config builder.recovery_strategy = recovery_strategy builder.checkpoint_interval = checkpoint_interval - builder.asset_tags = asset_tags_list - builder.asset_metadata = asset_metadata_list + builder.asset_tags = asset_tags + builder.asset_metadata = asset_metadata builder.run = run_form builder.run_id = run_id diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 170525ff8..5b2fff2e1 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -9,10 +9,11 @@ ) from sift_client.resources._base import ResourceBase from sift_client._internal.util.sift_stream import to_runFormPy -from sift_client.sift_types.ingestion import IngestionConfig +from sift_client.sift_types.ingestion import IngestionConfig, FlowConfig from sift_client.sift_types.run import Run, RunCreate, Tag from sift_stream_bindings import ( DurationPy, + FlowPy, IngestionConfigFormPy, MetadataPy, RecoveryStrategyPy, @@ -21,6 +22,7 @@ MetadataValuePy, IngestWithConfigDataStreamRequestPy, SiftStreamMetricsSnapshotPy, + RunFormPy, ) if TYPE_CHECKING: @@ -147,7 +149,7 @@ async def create( sift_client: SiftClient, *, ingestion_config: IngestionConfigFormPy | None = None, - run: RunCreate | dict | str | Run | None = None, + run: RunCreate | dict | str | Run | RunFormPy | None = None, asset_tags: list[str] | list[Tag] | None = None, asset_metadata: dict[str, str | float | bool] | None = None, recovery_strategy: RecoveryStrategyPy | None = None, @@ -178,40 +180,93 @@ async def create( api_key = grpc_config.api_key grpc_uri = grpc_config.uri + # Convert the run variants to a run or run_id + run_form: RunFormPy | None = None + run_id: str | None = None + if isinstance(run, RunFormPy): + run_form = run + elif isinstance(run, str): + run_id = run + elif isinstance(run, dict): + run_create = RunCreate.model_validate(run) + run_form = to_runFormPy(run_create) + elif isinstance(run, Run): + run_id = run._id_or_error + elif isinstance(run, RunCreate): + run_form = to_runFormPy(run) + + # Convert asset_tags to list of strings + asset_tags_list: list[str] | None = None + if asset_tags is not None: + asset_tags_list = [ + tag.name if isinstance(tag, Tag) else tag for tag in asset_tags + ] + + # Convert asset_metadata dict to list of MetadataPy + asset_metadata_list: list[MetadataPy] | None = None + if asset_metadata is not None: + from sift_stream_bindings import MetadataPy + + asset_metadata_list = [ + MetadataPy(key=key, value=MetadataValuePy(value)) for key, value in asset_metadata.items() + ] + + # Convert checkpoint_interval_seconds to DurationPy + checkpoint_interval: DurationPy | None = None + if checkpoint_interval_seconds is not None: + checkpoint_interval = DurationPy(secs=checkpoint_interval_seconds, nanos=0) + low_level_client = await IngestionConfigStreamingLowLevelClient.create_sift_stream_instance( api_key=api_key, grpc_uri=grpc_uri, ingestion_config=ingestion_config, - run=run, - asset_tags=asset_tags, - asset_metadata=asset_metadata, + run_form=run_form, + run_id=run_id, + asset_tags=asset_tags_list, + asset_metadata=asset_metadata_list, recovery_strategy=recovery_strategy, - checkpoint_interval_seconds=checkpoint_interval_seconds, + checkpoint_interval=checkpoint_interval, enable_tls=enable_tls, ) return cls(sift_client, low_level_client) - async def send(self, flow: Flow): - pass + async def send(self, flow: FlowPy): + flow_py = flow._to_rust_config() + await self._low_level_client.send(flow_py) async def send_requests(self, requests: list[IngestWithConfigDataStreamRequestPy]): - pass - - async def add_new_flows(self, flow_configs: list[FlowConfigPy]): - pass - - async def attach_run(self, run: RunFormPy): - pass + await self._low_level_client.send_requests(requests) + + async def add_new_flows(self, flow_configs: list[FlowConfig]): + flow_configs_py = [flow_config._to_rust_config() for flow_config in flow_configs] + await self._low_level_client.add_new_flows(flow_configs_py) + + async def attach_run(self, run: RunCreate | dict | str | Run | RunFormPy): + if isinstance(run, RunFormPy): + run_selector_py = RunSelectorPy.by_form(run) + elif isinstance(run, dict): + run_create = RunCreate.model_validate(run) + run_form_py = to_runFormPy(run_create) + run_selector_py = RunSelectorPy.by_form(run_form_py) + elif isinstance(run, Run): + run_selector_py = RunSelectorPy.by_id(run.id_) + elif isinstance(run, RunCreate): + run_form_py = to_runFormPy(run) + run_selector_py = RunSelectorPy.by_form(run_form_py) + elif isinstance(run, str): + run_selector_py = RunSelectorPy.by_id(run) + + await self._low_level_client.attach_run(run_selector_py) def detach_run(self): - pass + self._low_level_client.detach_run() def get_run_id(self) -> str | None: - pass + return self._low_level_client.get_run_id() async def finish(self): - pass + await self._low_level_client.finish() def get_metrics_snapshot(self) -> SiftStreamMetricsSnapshotPy: - pass \ No newline at end of file + return self._low_level_client.get_metrics_snapshot() \ No newline at end of file diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index 2f90e94f5..eb4e9c694 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -10,7 +10,7 @@ ChannelConfig as ChannelConfigProto, ) from sift.ingestion_configs.v2.ingestion_configs_pb2 import ( - FlowConfig, + FlowConfig as FlowConfigProto, ) from sift.ingestion_configs.v2.ingestion_configs_pb2 import ( IngestionConfig as IngestionConfigProto, @@ -27,6 +27,7 @@ ChannelDataTypePy, FlowConfigPy, IngestWithConfigDataChannelValuePy, + IngestionConfigFormPy, ) from sift_client.client import SiftClient @@ -150,10 +151,10 @@ def _to_config_proto(self) -> ChannelConfigProto: ) -class Flow(BaseType[FlowConfig, "Flow"]): +class FlowConfig(BaseType[FlowConfigProto, "FlowConfig"]): """Model representing a data flow for ingestion. - A Flow represents a collection of channels that are ingested together. + A FlowConfig represents a collection of channels that are ingested together. """ model_config = ConfigDict(frozen=False) @@ -163,7 +164,7 @@ class Flow(BaseType[FlowConfig, "Flow"]): run_id: str | None = None @classmethod - def _from_proto(cls, proto: FlowConfig, sift_client: SiftClient | None = None) -> Flow: + def _from_proto(cls, proto: FlowConfigProto, sift_client: SiftClient | None = None) -> FlowConfig: return cls( proto=proto, name=proto.name, @@ -171,8 +172,8 @@ def _from_proto(cls, proto: FlowConfig, sift_client: SiftClient | None = None) - _client=sift_client, ) - def _to_proto(self) -> FlowConfig: - return FlowConfig( + def _to_proto(self) -> FlowConfigProto: + return FlowConfigProto( name=self.name, channels=[channel._to_config_proto() for channel in self.channels], ) @@ -182,7 +183,7 @@ def _to_rust_config(self) -> FlowConfigPy: return FlowConfigPy( name=self.name, - channels=[_channel_to_rust_config(channel) for channel in self.channels], + channels=[_channel_config_to_rust_config(channel) for channel in self.channels], ) def add_channel(self, channel: ChannelConfig): @@ -198,27 +199,10 @@ def add_channel(self, channel: ChannelConfig): raise ValueError("Cannot add a channel to a flow after creation") self.channels.append(channel) - def ingest(self, *, timestamp: datetime, channel_values: dict[str, Any]): - """Ingest data for this Flow. - - Args: - timestamp: The timestamp of the data. - channel_values: Dictionary mapping Channel names to their values. - - Raises: - ValueError: If the ingestion config ID is not set. - """ - if self.ingestion_config_id is None: - raise ValueError("Ingestion config ID is not set.") - self.client.async_.ingestion.ingest( - flow=self, - timestamp=timestamp, - channel_values=channel_values, - ) # Converter functions. -def _channel_to_rust_config(channel: ChannelConfig) -> ChannelConfigPy: +def _channel_config_to_rust_config(channel: ChannelConfig) -> ChannelConfigPy: from sift_stream_bindings import ( ChannelBitFieldElementPy, ChannelConfigPy, From fc6fae2dc39a1ca6ba50744b71ad2b4671899a40 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Wed, 5 Nov 2025 17:45:49 -0800 Subject: [PATCH 04/47] docs and linting --- .../_internal/low_level_wrappers/ingestion.py | 34 +++-- .../sift_client/_internal/util/sift_stream.py | 6 +- python/lib/sift_client/resources/ingestion.py | 119 +++++++++++++++--- .../lib/sift_client/sift_types/ingestion.py | 2 - 4 files changed, 118 insertions(+), 43 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index 4eb9f1e44..35a69d81f 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -27,47 +27,43 @@ from sift.ingestion_configs.v2.ingestion_configs_pb2_grpc import ( IngestionConfigServiceStub, ) - -from sift_client._internal.low_level_wrappers.base import ( - LowLevelClientBase, -) -from sift_client.sift_types.ingestion import Flow, IngestionConfig, _to_rust_value -from sift_client._internal.util.sift_stream import to_runFormPy -from sift_client.transport import GrpcClient, WithGrpcClient -from sift_client.util import cel_utils as cel from sift_stream_bindings import ( DurationPy, + FlowConfigPy, + FlowPy, IngestionConfigFormPy, MetadataPy, RecoveryStrategyPy, - RetryPolicyPy, RunSelectorPy, - MetadataValuePy, SiftStreamBuilderPy, - FlowPy, - FlowConfigPy, SiftStreamMetricsSnapshotPy, ) +from sift_client._internal.low_level_wrappers.base import ( + LowLevelClientBase, +) +from sift_client.sift_types.ingestion import Flow, IngestionConfig, _to_rust_value +from sift_client.transport import GrpcClient, WithGrpcClient +from sift_client.util import cel_utils as cel + logger = logging.getLogger(__name__) if TYPE_CHECKING: from datetime import datetime - from sift_client.sift_types.run import RunCreate, Run, Tag - from sift_stream_bindings import ( + DurationPy, IngestionConfigFormPy, IngestWithConfigDataStreamRequestPy, - TimeValuePy, - SiftStreamPy, + MetadataPy, RecoveryStrategyPy, - DurationPy, RunFormPy, - MetadataPy, + SiftStreamPy, + TimeValuePy, ) + def to_rust_py_timestamp(time: datetime) -> TimeValuePy: """Convert a Python datetime to a Rust TimeValuePy. @@ -556,4 +552,4 @@ async def finish(self): await self._sift_stream_instance.finish() def get_metrics_snapshot(self) -> SiftStreamMetricsSnapshotPy: - return self._sift_stream_instance.get_metrics_snapshot() \ No newline at end of file + return self._sift_stream_instance.get_metrics_snapshot() diff --git a/python/lib/sift_client/_internal/util/sift_stream.py b/python/lib/sift_client/_internal/util/sift_stream.py index 2009bbfe0..3208dc524 100644 --- a/python/lib/sift_client/_internal/util/sift_stream.py +++ b/python/lib/sift_client/_internal/util/sift_stream.py @@ -1,5 +1,7 @@ -from sift_client.sift_types.run import Run, RunCreate, Tag -from sift_stream_bindings import MetadataPy, MetadataValuePy, RunFormPy, RunSelectorPy +from sift_stream_bindings import MetadataPy, MetadataValuePy, RunFormPy + +from sift_client.sift_types.run import RunCreate, Tag + def to_runFormPy(create: RunCreate) -> RunFormPy: diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 5b2fff2e1..4110febc0 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -1,32 +1,31 @@ from __future__ import annotations import logging -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING -from sift_client._internal.low_level_wrappers.ingestion import ( - IngestionConfigStreamingLowLevelClient, - IngestionLowLevelClient, -) -from sift_client.resources._base import ResourceBase -from sift_client._internal.util.sift_stream import to_runFormPy -from sift_client.sift_types.ingestion import IngestionConfig, FlowConfig -from sift_client.sift_types.run import Run, RunCreate, Tag from sift_stream_bindings import ( DurationPy, FlowPy, IngestionConfigFormPy, + IngestWithConfigDataStreamRequestPy, MetadataPy, + MetadataValuePy, RecoveryStrategyPy, - RetryPolicyPy, + RunFormPy, RunSelectorPy, - MetadataValuePy, - IngestWithConfigDataStreamRequestPy, SiftStreamMetricsSnapshotPy, - RunFormPy, ) +from sift_client._internal.low_level_wrappers.ingestion import ( + IngestionConfigStreamingLowLevelClient, + IngestionLowLevelClient, +) +from sift_client._internal.util.sift_stream import to_runFormPy +from sift_client.resources._base import ResourceBase +from sift_client.sift_types.ingestion import FlowConfig, IngestionConfig +from sift_client.sift_types.run import Run, RunCreate, Tag + if TYPE_CHECKING: - from datetime import datetime from sift_client.client import SiftClient from sift_client.sift_types.ingestion import Flow @@ -134,9 +133,11 @@ async def create_ingestion_config( class IngestionConfigStreamingClient(ResourceBase): """A client for streaming ingestion with an ingestion config. - This client provides a high-level interface for streaming ingestion using - an ingestion config. It handles conversion of user-friendly types to the - low-level Rust bindings. + This client provides a high-level interface for streaming data to Sift using + an ingestion config. Under the hood, this client uses the Rust powered SiftStream library to provide + a high-performance, low-latency, and reliable streaming interface to Sift. + + This client should be initialized using the create classmethod, and not directly. Once streaming has ended, the client should be shutdown using the finish method. """ def __init__(self, sift_client: SiftClient, low_level_client: IngestionConfigStreamingLowLevelClient): """Initialize an IngestionConfigStreamingClient. Users should not initialize this class directly, but rather use the create classmethod.""" @@ -160,8 +161,7 @@ async def create( Args: sift_client: The Sift client to use. - ingestion_config: The ingestion config (IngestionConfig or IngestionConfigFormPy). - If IngestionConfig is provided, you must also provide flows separately. + ingestion_config: The ingestion config to use for streaming. run: The run to associate with ingestion. Can be a Run, RunCreate, dict, or run ID string. asset_tags: Tags to associate with the asset. asset_metadata: Metadata to associate with the asset. @@ -232,17 +232,68 @@ async def create( return cls(sift_client, low_level_client) async def send(self, flow: FlowPy): + """Send telemetry to Sift in the form of a Flow. + + This is the entry-point to send actual telemetry to Sift. If a message is sent that + doesn't match any flows that the stream knows about locally, the message will still be + transmitted and a warning log emitted. If you are certain that the message corresponds + to an unregistered flow then `add_new_flows` should be called first to register the flow + before calling `send`; otherwise you should monitor the Sift DLQ either in the Sift UI + or Sift API to ensure successful transmission. + + When sending messages, if backups are enabled, first the message is sent to the backup system. This system is + used to backup data to disk until the data is confirmed received by Sift. If streaming + encounters errors, the backed up data will be re-ingested ensuring all data is received + by Sift. + + If the backup system has fallen behind and the backup queue/channel is full, it will still + proceed to sending the message to Sift. This ensures data is sent to Sift even if the + backup system is lagging. + + Args: + flow: The flow to send to Sift. + """ flow_py = flow._to_rust_config() await self._low_level_client.send(flow_py) async def send_requests(self, requests: list[IngestWithConfigDataStreamRequestPy]): + """Send data in a manner identical to the raw gRPC service for ingestion-config based streaming. + + This method offers a way to send data that matches the raw gRPC service interface. You are + expected to handle channel value ordering as well as empty values correctly. + + Important: + Most users should prefer to use `send`. This method primarily exists to make it easier + for existing integrations to utilize sift-stream. + + Args: + requests: List of ingestion requests to send to Sift. + """ await self._low_level_client.send_requests(requests) async def add_new_flows(self, flow_configs: list[FlowConfig]): + """Modify the existing ingestion config by adding new flows that weren't accounted for during initialization. + + This allows you to dynamically add new flow configurations to the ingestion config after + the stream has been initialized. The new flows will be registered with Sift and can then + be used in subsequent `send` calls. + + Args: + flow_configs: List of flow configurations to add to the ingestion config. + """ flow_configs_py = [flow_config._to_rust_config() for flow_config in flow_configs] await self._low_level_client.add_new_flows(flow_configs_py) async def attach_run(self, run: RunCreate | dict | str | Run | RunFormPy): + """Attach a run to the stream. + + Any data provided through `send` after this function returns will be associated with + the run. The run can be specified as a Run object, RunCreate object, dict, run ID string, + or RunFormPy object. + + Args: + run: The run to attach. Can be a Run, RunCreate, dict, run ID string, or RunFormPy. + """ if isinstance(run, RunFormPy): run_selector_py = RunSelectorPy.by_form(run) elif isinstance(run, dict): @@ -260,13 +311,41 @@ async def attach_run(self, run: RunCreate | dict | str | Run | RunFormPy): await self._low_level_client.attach_run(run_selector_py) def detach_run(self): + """Detach the run, if any, associated with the stream. + + Any data provided through `send` after this function is called will not be associated + with a run. + """ self._low_level_client.detach_run() def get_run_id(self) -> str | None: + """Retrieve the ID of the attached run, if one exists. + + Returns: + The run ID if a run is attached, None otherwise. + """ return self._low_level_client.get_run_id() async def finish(self): + """Conclude the stream and return when Sift has sent its final response. + + It is important that this method be called in order to obtain the final checkpoint + acknowledgement from Sift, otherwise some tail-end data may fail to send. This method + will gracefully shut down the streaming system and ensure all data has been properly + sent to Sift. + """ await self._low_level_client.finish() def get_metrics_snapshot(self) -> SiftStreamMetricsSnapshotPy: - return self._low_level_client.get_metrics_snapshot() \ No newline at end of file + """Retrieve a snapshot of the current metrics for this stream. + + NOTE: The returned metrics snapshot is currently an unstable feature and may change at any time. + + Metrics are recorded related to the performance and operational status of the stream. + Snapshots are taken at any time this method is called. Metrics are internally updated + atomically, and calls to get metric snapshots are non-blocking to stream operation. + + Returns: + A snapshot of the current stream metrics. + """ + return self._low_level_client.get_metrics_snapshot() diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index eb4e9c694..7a5d793ba 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -20,14 +20,12 @@ from sift_client.sift_types.channel import ChannelBitFieldElement, ChannelDataType if TYPE_CHECKING: - from datetime import datetime from sift_stream_bindings import ( ChannelConfigPy, ChannelDataTypePy, FlowConfigPy, IngestWithConfigDataChannelValuePy, - IngestionConfigFormPy, ) from sift_client.client import SiftClient From 25a952148495fb8bb2caeaf5ed39d186949e9ac7 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Wed, 5 Nov 2025 18:00:23 -0800 Subject: [PATCH 05/47] clean up --- .../_internal/low_level_wrappers/ingestion.py | 2 +- .../sift_client/_internal/util/sift_stream.py | 32 ------------------- python/lib/sift_client/resources/ingestion.py | 17 +++++----- python/lib/sift_client/sift_types/run.py | 28 ++++++++++++++++ 4 files changed, 37 insertions(+), 42 deletions(-) delete mode 100644 python/lib/sift_client/_internal/util/sift_stream.py diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index 35a69d81f..f997bd31c 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -503,7 +503,7 @@ async def create_sift_stream_instance( cls, api_key: str, grpc_uri: str, - ingestion_config: IngestionConfigFormPy | None = None, + ingestion_config: IngestionConfigFormPy, run_form: RunFormPy | None = None, run_id: str | None = None, asset_tags: list[str] | None = None, diff --git a/python/lib/sift_client/_internal/util/sift_stream.py b/python/lib/sift_client/_internal/util/sift_stream.py deleted file mode 100644 index 3208dc524..000000000 --- a/python/lib/sift_client/_internal/util/sift_stream.py +++ /dev/null @@ -1,32 +0,0 @@ -from sift_stream_bindings import MetadataPy, MetadataValuePy, RunFormPy - -from sift_client.sift_types.run import RunCreate, Tag - - -def to_runFormPy(create: RunCreate) -> RunFormPy: - - if create.client_key: - client_key = create.client_key - else: - client_key = create.name - - if create.tags: - tags = [tag.name if isinstance(tag, Tag) else tag for tag in create.tags] - else: - tags = None - - if create.metadata: - metadata = [] - for key, value in create.metadata.items(): - metadata.append(MetadataPy(key=key, value=MetadataValuePy(value))) - else: - metadata = None - - return RunFormPy( - name=create.name, - client_key=client_key, - description=create.description, - tags=tags, - metadata=metadata, - ) - diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 4110febc0..215634d43 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -20,15 +20,14 @@ IngestionConfigStreamingLowLevelClient, IngestionLowLevelClient, ) -from sift_client._internal.util.sift_stream import to_runFormPy from sift_client.resources._base import ResourceBase -from sift_client.sift_types.ingestion import FlowConfig, IngestionConfig +from sift_client.sift_types.ingestion import IngestionConfig from sift_client.sift_types.run import Run, RunCreate, Tag if TYPE_CHECKING: from sift_client.client import SiftClient - from sift_client.sift_types.ingestion import Flow + from sift_client.sift_types.ingestion import FlowConfig, IngestionConfig logger = logging.getLogger(__name__) @@ -94,7 +93,7 @@ async def create_ingestion_config( *, asset_name: str, run_id: str | None = None, - flows: list[Flow], + flows: list[FlowConfig], client_key: str | None = None, ) -> str: """Create an ingestion config. This is provided for direct use of the ingestion config API, and not the preferred way to create ingestion configs for streaming through SiftClient. @@ -148,8 +147,8 @@ def __init__(self, sift_client: SiftClient, low_level_client: IngestionConfigStr async def create( cls, sift_client: SiftClient, + ingestion_config: IngestionConfigFormPy, *, - ingestion_config: IngestionConfigFormPy | None = None, run: RunCreate | dict | str | Run | RunFormPy | None = None, asset_tags: list[str] | list[Tag] | None = None, asset_metadata: dict[str, str | float | bool] | None = None, @@ -189,11 +188,11 @@ async def create( run_id = run elif isinstance(run, dict): run_create = RunCreate.model_validate(run) - run_form = to_runFormPy(run_create) + run_form = run_create._to_rust_form() elif isinstance(run, Run): run_id = run._id_or_error elif isinstance(run, RunCreate): - run_form = to_runFormPy(run) + run_form = run._to_rust_form() # Convert asset_tags to list of strings asset_tags_list: list[str] | None = None @@ -298,12 +297,12 @@ async def attach_run(self, run: RunCreate | dict | str | Run | RunFormPy): run_selector_py = RunSelectorPy.by_form(run) elif isinstance(run, dict): run_create = RunCreate.model_validate(run) - run_form_py = to_runFormPy(run_create) + run_form_py = run_create._to_rust_form() run_selector_py = RunSelectorPy.by_form(run_form_py) elif isinstance(run, Run): run_selector_py = RunSelectorPy.by_id(run.id_) elif isinstance(run, RunCreate): - run_form_py = to_runFormPy(run) + run_form_py = run._to_rust_form() run_selector_py = RunSelectorPy.by_form(run_form_py) elif isinstance(run, str): run_selector_py = RunSelectorPy.by_id(run) diff --git a/python/lib/sift_client/sift_types/run.py b/python/lib/sift_client/sift_types/run.py index a8242dc40..beced607a 100644 --- a/python/lib/sift_client/sift_types/run.py +++ b/python/lib/sift_client/sift_types/run.py @@ -6,6 +6,7 @@ from pydantic import model_validator from sift.runs.v2.runs_pb2 import CreateRunRequest as CreateRunRequestProto from sift.runs.v2.runs_pb2 import Run as RunProto +from sift_stream_bindings import MetadataPy, MetadataValuePy, RunFormPy from sift_client.sift_types._base import ( BaseType, @@ -171,6 +172,33 @@ def _get_proto_class(self) -> type[CreateRunRequestProto]: return CreateRunRequestProto + def _to_rust_form(self) -> RunFormPy: + if self.client_key: + client_key = self.client_key + else: + client_key = self.name + + if self.tags: + tags = [tag.name if isinstance(tag, Tag) else tag for tag in self.tags] + else: + tags = None + + if self.metadata: + metadata = [] + for key, value in self.metadata.items(): + metadata.append(MetadataPy(key=key, value=MetadataValuePy(value))) + else: + metadata = None + + return RunFormPy( + name=self.name, + client_key=client_key, + description=self.description, + tags=tags, + metadata=metadata, + ) + + class RunUpdate(RunBase, ModelUpdate[RunProto]): """Update model for Run.""" From 54106314907b20a9f6722d947d1ab9200bacb448 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Wed, 5 Nov 2025 18:59:04 -0800 Subject: [PATCH 06/47] in work tracing --- .../_internal/low_level_wrappers/ingestion.py | 21 ++++ python/lib/sift_client/resources/__init__.py | 3 +- python/lib/sift_client/resources/ingestion.py | 96 +++++++++++++++++++ 3 files changed, 119 insertions(+), 1 deletion(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index f997bd31c..fcf0d80d4 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -37,6 +37,9 @@ RunSelectorPy, SiftStreamBuilderPy, SiftStreamMetricsSnapshotPy, + init_tracing, + init_tracing_with_file, + is_tracing_initialized, ) from sift_client._internal.low_level_wrappers.base import ( @@ -45,6 +48,7 @@ from sift_client.sift_types.ingestion import Flow, IngestionConfig, _to_rust_value from sift_client.transport import GrpcClient, WithGrpcClient from sift_client.util import cel_utils as cel +from sift_client.resources.ingestion import TracingConfig logger = logging.getLogger(__name__) @@ -511,7 +515,24 @@ async def create_sift_stream_instance( recovery_strategy: RecoveryStrategyPy | None = None, checkpoint_interval: DurationPy | None = None, enable_tls: bool = True, + tracing_config: TracingConfig | None = None, ) -> IngestionConfigStreamingLowLevelClient: + if not is_tracing_initialized(): + if tracing_config is None: + tracing_config = TracingConfig.console_only() + + if tracing_config.log_dir is not None: + # Use file logging + init_tracing_with_file( + tracing_config.level, + tracing_config.log_dir, + tracing_config.filename_prefix or "sift_stream_bindings.log", + tracing_config.max_log_files or 7, + ) + else: + # Use stdout/stderr only + init_tracing(tracing_config.level) + builder = SiftStreamBuilderPy( uri = grpc_uri, apikey = api_key, diff --git a/python/lib/sift_client/resources/__init__.py b/python/lib/sift_client/resources/__init__.py index 968fabdb3..7b2eacc29 100644 --- a/python/lib/sift_client/resources/__init__.py +++ b/python/lib/sift_client/resources/__init__.py @@ -153,7 +153,7 @@ async def main(): from sift_client.resources.assets import AssetsAPIAsync from sift_client.resources.calculated_channels import CalculatedChannelsAPIAsync from sift_client.resources.channels import ChannelsAPIAsync -from sift_client.resources.ingestion import IngestionAPIAsync +from sift_client.resources.ingestion import IngestionAPIAsync, TracingConfig from sift_client.resources.ping import PingAPIAsync from sift_client.resources.reports import ReportsAPIAsync from sift_client.resources.rules import RulesAPIAsync @@ -194,4 +194,5 @@ async def main(): "TagsAPIAsync", "TestResultsAPI", "TestResultsAPIAsync", + "TracingConfig", ] diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 215634d43..4b98caf90 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -32,6 +32,90 @@ logger = logging.getLogger(__name__) +class TracingConfig: + """Configuration for tracing in SiftStream. + + This class provides factory methods to create tracing configurations for use + with IngestionConfigStreamingClient. Tracing will only be initialized once per process. + """ + + def __init__( + self, + is_enabled: bool = True, + level: str = "info", + log_dir: str | None = None, + filename_prefix: str | None = None, + max_log_files: int | None = None, + ): + """Initialize a TracingConfig. + + Args: + is_enabled: Whether tracing is enabled. Defaults to True. + level: Logging level as string - one of "trace", "debug", "info", "warn", "error". + Defaults to "info". + log_dir: Directory path for log files. Required if using file logging. + Defaults to "./logs" when using with_file. + filename_prefix: Prefix for log filenames. Required if using file logging. + Defaults to "sift_stream_bindings.log" when using with_file. + max_log_files: Maximum number of log files to keep. Required if using file logging. + Defaults to 7 when using with_file. + """ + self.is_enabled = is_enabled + self.level = level + self.log_dir = log_dir + self.filename_prefix = filename_prefix + self.max_log_files = max_log_files + + @classmethod + def disabled(cls) -> TracingConfig: + """Create a configuration that disables tracing. + + Returns: + A TracingConfig with tracing disabled. + """ + return cls(is_enabled=False) + + @classmethod + def console_only(cls, level: str = "info") -> TracingConfig: + """Create a configuration that enables tracing to stdout/stderr only. + + Args: + level: Logging level as string - one of "trace", "debug", "info", "warn", "error". + Defaults to "info". + + Returns: + A TracingConfig with tracing enabled (outputs to stdout/stderr only). + """ + return cls(level=level) + + @classmethod + def with_file( + cls, + level: str = "info", + log_dir: str = "./logs", + filename_prefix: str = "sift_stream_bindings.log", + max_log_files: int = 7, + ) -> TracingConfig: + """Create a configuration that enables tracing to both stdout and rolling log files. + + Args: + level: Logging level as string - one of "trace", "debug", "info", "warn", "error". + Defaults to "info". + log_dir: Directory path for log files. Defaults to "./logs". + filename_prefix: Prefix for log filenames. Defaults to "sift_stream_bindings.log". + max_log_files: Maximum number of log files to keep. Defaults to 7. + + Returns: + A TracingConfig with tracing enabled for both stdout and file output. + """ + return cls( + level=level, + log_dir=log_dir, + filename_prefix=filename_prefix, + max_log_files=max_log_files, + ) + + class IngestionAPIAsync(ResourceBase): """High-level API for interacting with ingestion services. @@ -62,6 +146,7 @@ async def create_ingestion_config_streaming_client( recovery_strategy: RecoveryStrategyPy | None = None, checkpoint_interval_seconds: int | None = None, enable_tls: bool = True, + tracing_config: TracingConfig | None = None, ) -> IngestionConfigStreamingClient: """Create an IngestionConfigStreamingClient. @@ -73,6 +158,10 @@ async def create_ingestion_config_streaming_client( recovery_strategy: The recovery strategy to use for ingestion. checkpoint_interval_seconds: The checkpoint interval in seconds. enable_tls: Whether to enable TLS for the connection. + tracing_config: Configuration for SiftStream tracing. Use TracingConfig.stdout_only() + to enable tracing to stdout only, or TracingConfig.stdout_with_file() to enable + tracing to both stdout and rolling log files. Defaults to None (tracing will be + initialized with default settings if not already initialized). Returns: An initialized IngestionConfigStreamingClient. @@ -86,6 +175,7 @@ async def create_ingestion_config_streaming_client( recovery_strategy=recovery_strategy, checkpoint_interval_seconds=checkpoint_interval_seconds, enable_tls=enable_tls, + tracing_config=tracing_config, ) async def create_ingestion_config( @@ -155,6 +245,7 @@ async def create( recovery_strategy: RecoveryStrategyPy | None = None, checkpoint_interval_seconds: int | None = None, enable_tls: bool = True, + tracing_config: TracingConfig | None = None, ) -> IngestionConfigStreamingClient: """Create an IngestionConfigStreamingClient. @@ -167,6 +258,10 @@ async def create( recovery_strategy: The recovery strategy to use for ingestion. checkpoint_interval_seconds: The checkpoint interval in seconds. enable_tls: Whether to enable TLS for the connection. + tracing_config: Configuration for SiftStream tracing. Use TracingConfig.stdout_only() + to enable tracing to stdout only, or TracingConfig.stdout_with_file() to enable + tracing to both stdout and rolling log files. Defaults to None (tracing will be + initialized with default settings if not already initialized). Returns: An initialized IngestionConfigStreamingClient. @@ -226,6 +321,7 @@ async def create( recovery_strategy=recovery_strategy, checkpoint_interval=checkpoint_interval, enable_tls=enable_tls, + tracing_config=tracing_config, ) return cls(sift_client, low_level_client) From bc47eae4ed1fe44e14b20df74a399af0c7468a97 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Wed, 5 Nov 2025 19:09:48 -0800 Subject: [PATCH 07/47] fixes and rip out old ingestion method --- .../_internal/low_level_wrappers/ingestion.py | 276 +----------------- python/lib/sift_client/sift_types/__init__.py | 4 +- 2 files changed, 11 insertions(+), 269 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index fcf0d80d4..3a931bd73 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -8,15 +8,10 @@ from __future__ import annotations -import asyncio -import atexit import hashlib import logging -import threading -import time from collections import namedtuple -from queue import Queue -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING, cast from sift.ingestion_configs.v2.ingestion_configs_pb2 import ( GetIngestionConfigRequest, @@ -45,10 +40,9 @@ from sift_client._internal.low_level_wrappers.base import ( LowLevelClientBase, ) -from sift_client.sift_types.ingestion import Flow, IngestionConfig, _to_rust_value +from sift_client.sift_types.ingestion import FlowConfig, IngestionConfig from sift_client.transport import GrpcClient, WithGrpcClient from sift_client.util import cel_utils as cel -from sift_client.resources.ingestion import TracingConfig logger = logging.getLogger(__name__) @@ -66,6 +60,7 @@ TimeValuePy, ) + from sift_client.resources.ingestion import TracingConfig def to_rust_py_timestamp(time: datetime) -> TimeValuePy: @@ -85,106 +80,6 @@ def to_rust_py_timestamp(time: datetime) -> TimeValuePy: return TimeValuePy.from_timestamp(secs, nsecs) -class IngestionThread(threading.Thread): - """Manages ingestion for a single ingestion config.""" - - IDLE_LOOP_PERIOD = 0.1 # Time of intervals loop will sleep while waiting for data. - SIFT_STREAM_FINISH_TIMEOUT = 0.06 # Measured ~0.05s to finish stream. - CLEANUP_TIMEOUT = IDLE_LOOP_PERIOD + SIFT_STREAM_FINISH_TIMEOUT - - def __init__( - self, - sift_stream_builder: SiftStreamBuilderPy, - data_queue: Queue, - ingestion_config: IngestionConfigFormPy, - no_data_timeout: int = 1, - metric_interval: float = 0.5, - ): - """Initialize the IngestionThread. - - Args: - sift_stream_builder: The sift stream builder to build a new stream. - data_queue: The queue to put IngestWithConfigDataStreamRequestPy requests into for ingestion. - ingestion_config: The ingestion config to use for ingestion. - no_data_timeout: The number of (whole number) seconds to wait for data before stopping the thread (Saves minorly on startup resources. Ingesting new data will always restart the thread if it is stopped). - metric_interval: Time (seconds) to wait between logging metrics. - """ - super().__init__(daemon=True) - self.data_queue = data_queue - self._stop_event = threading.Event() - self.sift_stream_builder = sift_stream_builder - self.ingestion_config = ingestion_config - self.no_data_timeout = no_data_timeout - self.metric_interval = metric_interval - self.initialized = False - - def stop(self): - self._stop_event.set() - # Give a brief chance to finish the stream (should take < 50ms). - time.sleep(self.CLEANUP_TIMEOUT) - self.task.cancel() - - async def await_stream_build(self): - while not self.initialized: - await asyncio.sleep(0.01) - - async def main(self): - logger.debug("Ingestion thread started") - self.sift_stream_builder.ingestion_config = self.ingestion_config - sift_stream = await self.sift_stream_builder.build() - time_of_last_metric = time.time() - time_of_last_data = time.time() - count = 0 - self.initialized = True - try: - while True: - while not self.data_queue.empty(): - if self._stop_event.is_set(): - # Being forced to stop. Try to finish the stream. - logger.info( - f"Ingestion thread received stop signal. Exiting. Sent {count} requests. {self.data_queue.qsize()} requests remaining." - ) - await sift_stream.finish() - return - time_of_last_metric = time.time() - item = self.data_queue.get() - sift_stream = await sift_stream.send_requests(item) - count += 1 - time_since_last_metric = time.time() - time_of_last_metric - if time_since_last_metric > self.metric_interval: - logger.debug( - f"Ingestion thread sent {count} requests, remaining: {self.data_queue.qsize()}" - ) - time_of_last_metric = time.time() - - # Queue empty, check if we should stop. - time_since_last_data = time.time() - time_of_last_data - if self._stop_event.is_set() or time_since_last_data > self.no_data_timeout: - logger.debug( - f"No more requests. Stopping. Sent {count} requests. {self.data_queue.qsize()} requests remaining." - ) - await sift_stream.finish() - return - else: - await asyncio.sleep(self.IDLE_LOOP_PERIOD) - - except asyncio.CancelledError: - # It's possible the thread was joined while sleeping waiting for data. Only note error if we have data left. - if self.data_queue.qsize() > 0: - logger.error( - f"Ingestion thread cancelled without finishing stream. {self.data_queue.qsize()} requests were not sent." - ) - - async def _run(self): - self.task = asyncio.create_task(self.main()) - await self.task - - def run(self): - """This thread will handle sending data to Sift.""" - # Even thought this is a thread, we need to run this async task to await send_requests otherwise we get sift_stream consumed errors. - asyncio.run(self._run()) - - class IngestionLowLevelClient(LowLevelClientBase, WithGrpcClient): """Low-level client for the IngestionAPI. @@ -204,76 +99,15 @@ def __init__(self, grpc_client: GrpcClient): grpc_client: The gRPC client to use for making API calls. """ super().__init__(grpc_client=grpc_client) - self._sift_stream_builder = None # Lazy-initialized - self.stream_cache = {} - atexit.register(self.cleanup, timeout=0.1) - def _ensure_sift_stream_bindings(self): - """Ensure sift_stream_bindings is available and initialize the stream builder. - Raises: - ImportError: If sift_stream_bindings is not installed. - """ - if self._sift_stream_builder is not None: - return - - try: - from sift_stream_bindings import ( - RecoveryStrategyPy, - RetryPolicyPy, - SiftStreamBuilderPy, - ) - except ImportError as e: - raise ImportError( - "The 'sift-stream' package is required for ingestion operations. " - "Please install it with:` `pip install sift-stack-py[sift-stream]`" - ) from e - - # Rust GRPC client expects URI to have http(s):// prefix. - uri = self._grpc_client._config.uri - if not uri.startswith("http"): - uri = f"https://{uri}" if self._grpc_client._config.use_ssl else f"http://{uri}" - self._sift_stream_builder = SiftStreamBuilderPy( - uri=uri, - apikey=self._grpc_client._config.api_key, - ) - self._sift_stream_builder.enable_tls = self._grpc_client._config.use_ssl - # FD-177: Expose configuration for recovery strategy. - self._sift_stream_builder.recovery_strategy = RecoveryStrategyPy.retry_only( - RetryPolicyPy.default() - ) - - @property - def sift_stream_builder(self) -> SiftStreamBuilderPy: - """Get the sift stream builder, initializing it if necessary.""" - self._ensure_sift_stream_bindings() - assert self._sift_stream_builder is not None - return self._sift_stream_builder - - def cleanup(self, timeout: float | None = None): - """Cleanup the ingestion threads. - - Args: - timeout: The timeout in seconds to wait for ingestion to complete. If None, will wait forever. - """ - for cache_entry in self.stream_cache.values(): - data_queue, ingestion_config, thread = cache_entry - # "None" value on the queue signals its loop to terminate. - if thread: - thread.join(timeout=timeout) - if thread.is_alive(): - logger.error( - f"Ingestion thread did not finish after {timeout} seconds. Forcing stop." - ) - thread.stop() - - async def get_ingestion_config_flows(self, ingestion_config_id: str) -> list[Flow]: + async def get_ingestion_config_flows(self, ingestion_config_id: str) -> list[FlowConfig]: """Get the flows for an ingestion config.""" res = await self._grpc_client.get_stub(IngestionConfigServiceStub).GetIngestionConfig( GetIngestionConfigRequest(ingestion_config_id=ingestion_config_id) ) res = cast("ListIngestionConfigFlowsResponse", res) - return [Flow._from_proto(flow) for flow in res.flows] + return [FlowConfig._from_proto(flow) for flow in res.flows] async def list_ingestion_configs(self, filter_query: str) -> list[IngestionConfig]: """List ingestion configs.""" @@ -295,39 +129,7 @@ async def get_ingestion_config_id_from_client_key(self, client_key: str) -> str ) return ingestion_configs[0].id_ - def _new_ingestion_thread( - self, - ingestion_config_id: str, - ingestion_config: IngestionConfigFormPy | None = None, - ): - """Start a new ingestion thread. - This allows ingestion to happen in the background regardless of if the user is using the sync or async client - and without them having to set up threading themselves. We are using a thread vs asyncio since our - sync wrapper will block on incomlete tasks. - - Args: - ingestion_config_id: The id of the ingestion config for the flows this stream will ingest. Used to cache the stream. - ingestion_config: The ingestion config to use for ingestion. - """ - - self._ensure_sift_stream_bindings() - data_queue: Queue[list[IngestWithConfigDataStreamRequestPy]] = Queue() - existing = self.stream_cache.get(ingestion_config_id) - if existing: - existing_data_queue, existing_ingestion_config, existing_thread = existing - if existing_thread.is_alive(): - return existing_thread - else: - ingestion_config = existing_ingestion_config - # Re-use existing queue since ingest_flow has already put data on it. - data_queue = existing_data_queue - assert ingestion_config is not None # Appease mypy. - thread = IngestionThread(self.sift_stream_builder, data_queue, ingestion_config) - thread.start() - - return self.CacheEntry(data_queue, ingestion_config, thread) - - def _hash_flows(self, asset_name: str, flows: list[Flow]) -> str: + def _hash_flows(self, asset_name: str, flows: list[FlowConfig]) -> str: """Generate a client key that should be unique but deterministic for the given asset and flow configuration.""" # TODO: Taken from sift_py/ingestion/config/telemetry.py. Confirm intent from Marc. m = hashlib.sha256() @@ -359,7 +161,7 @@ async def create_ingestion_config( self, *, asset_name: str, - flows: list[Flow], + flows: list[FlowConfig], client_key: str | None = None, ) -> str: """Create an ingestion config. @@ -430,68 +232,6 @@ async def create_ingestion_config( raise ValueError("No ingestion config id found") return ingestion_config_id - def wait_for_ingestion_to_complete(self, timeout: float | None = None): - """Blocks until all ingestion to complete. - - Args: - timeout: The timeout in seconds to wait for ingestion to complete. If None, will wait forever. - """ - logger.debug("Waiting for ingestion to complete") - self.cleanup(timeout) - - def ingest_flow( - self, - *, - flow: Flow, - timestamp: datetime, - channel_values: dict[str, Any], - organization_id: str | None = None, - ): - """Ingest a flow. This is a synchronous call that queues an ingestion request that will be processed asynchronously on a background thread. - - Args: - flow: The flow to ingest. - timestamp: The timestamp of the flow. - channel_values: The channel values to ingest. - organization_id: The organization id to use for ingestion. Only relevant if the user is part of several organizations. - """ - from sift_stream_bindings import IngestWithConfigDataStreamRequestPy - - self._ensure_sift_stream_bindings() - - if not flow.ingestion_config_id: - raise ValueError( - "Flow has no ingestion config id -- have you created an ingestion config for this flow?" - ) - cache_entry = self.stream_cache.get(flow.ingestion_config_id) - if not cache_entry: - raise ValueError( - f"Ingestion config {flow.ingestion_config_id} not found. Have you created an ingestion config for this flow?" - ) - rust_channel_values = [] - # Iterate through all expected channels for flow and convert to ingestion types (missing channels use a special empty type) - for channel in flow.channels: - val = channel_values.get(channel.name) - rust_channel_values.append(_to_rust_value(channel, val)) - req = IngestWithConfigDataStreamRequestPy( - ingestion_config_id=flow.ingestion_config_id, - run_id=flow.run_id or "", - flow=flow.name, - timestamp=to_rust_py_timestamp(timestamp), - channel_values=rust_channel_values, - end_stream_on_validation_error=False, - organization_id=organization_id or "", # This will be filled in by the server - ) - data_queue, ingestion_config, thread = cache_entry - assert data_queue is not None - # Put data on queue before potentially starting a new thread so it doesn't initially sleep waiting for data. - data_queue.put([req]) - if not (thread and thread.is_alive()): - # We previously had a thread for this ingestion config but it finished ingestion so create a new one. - self.stream_cache[flow.ingestion_config_id] = self._new_ingestion_thread( - flow.ingestion_config_id, ingestion_config - ) - @@ -517,6 +257,8 @@ async def create_sift_stream_instance( enable_tls: bool = True, tracing_config: TracingConfig | None = None, ) -> IngestionConfigStreamingLowLevelClient: + from sift_client.resources.ingestion import TracingConfig + if not is_tracing_initialized(): if tracing_config is None: tracing_config = TracingConfig.console_only() diff --git a/python/lib/sift_client/sift_types/__init__.py b/python/lib/sift_client/sift_types/__init__.py index e5318dca7..54b2ac2af 100644 --- a/python/lib/sift_client/sift_types/__init__.py +++ b/python/lib/sift_client/sift_types/__init__.py @@ -141,7 +141,7 @@ ChannelDataType, ChannelReference, ) -from sift_client.sift_types.ingestion import ChannelConfig, Flow, IngestionConfig +from sift_client.sift_types.ingestion import ChannelConfig, FlowConfig, IngestionConfig from sift_client.sift_types.report import Report, ReportRuleStatus, ReportRuleSummary, ReportUpdate from sift_client.sift_types.rule import ( Rule, @@ -178,7 +178,7 @@ "ChannelConfig", "ChannelDataType", "ChannelReference", - "Flow", + "FlowConfig", "IngestionConfig", "Report", "ReportRuleStatus", From 94ce997633a512475817371847d8409ef90da140 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Wed, 5 Nov 2025 19:31:05 -0800 Subject: [PATCH 08/47] working ingestion --- .../lib/sift_client/_internal/low_level_wrappers/ingestion.py | 2 +- python/lib/sift_client/resources/ingestion.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index 3a931bd73..e45f14d2e 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -285,7 +285,7 @@ async def create_sift_stream_instance( builder.recovery_strategy = recovery_strategy builder.checkpoint_interval = checkpoint_interval builder.asset_tags = asset_tags - builder.asset_metadata = asset_metadata + builder.metadata = asset_metadata builder.run = run_form builder.run_id = run_id diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 4b98caf90..ca40bc73c 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -348,8 +348,7 @@ async def send(self, flow: FlowPy): Args: flow: The flow to send to Sift. """ - flow_py = flow._to_rust_config() - await self._low_level_client.send(flow_py) + await self._low_level_client.send(flow) async def send_requests(self, requests: list[IngestWithConfigDataStreamRequestPy]): """Send data in a manner identical to the raw gRPC service for ingestion-config based streaming. From 5086aac899b21d8aa87b9f9ddc6800482383b3db Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Thu, 6 Nov 2025 14:13:12 -0800 Subject: [PATCH 09/47] add recovery config --- python/lib/sift_client/resources/ingestion.py | 121 ++++++++++++++++-- python/lib/sift_client/sift_types/__init__.py | 10 +- .../lib/sift_client/sift_types/ingestion.py | 35 ++++- 3 files changed, 155 insertions(+), 11 deletions(-) diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index ca40bc73c..f11e0e0c9 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING from sift_stream_bindings import ( + DiskBackupPolicyPy, DurationPy, FlowPy, IngestionConfigFormPy, @@ -11,6 +12,7 @@ MetadataPy, MetadataValuePy, RecoveryStrategyPy, + RetryPolicyPy, RunFormPy, RunSelectorPy, SiftStreamMetricsSnapshotPy, @@ -21,13 +23,13 @@ IngestionLowLevelClient, ) from sift_client.resources._base import ResourceBase -from sift_client.sift_types.ingestion import IngestionConfig +from sift_client.sift_types.ingestion import IngestionConfig, IngestionConfigCreate from sift_client.sift_types.run import Run, RunCreate, Tag if TYPE_CHECKING: from sift_client.client import SiftClient - from sift_client.sift_types.ingestion import FlowConfig, IngestionConfig + from sift_client.sift_types.ingestion import FlowConfig logger = logging.getLogger(__name__) @@ -116,6 +118,87 @@ def with_file( ) +class RecoveryStrategyConfig: + """Configuration for the SiftStream recovery strategy. + + This class provides a Python-friendly interface for configuring the recovery strategy used in SiftStream. + Recovery strategies determine how SiftStream handles failures and retries when ingesting data. + + Recovery strategies control: + - How frequently to retry a failed connection to Sift. + - Whether to use per checkpoint backups to allow re-ingestion of data to Sift after a streaming failure. + - Settings to control the number and size of backup files, and whether to retain backups after verification of successful ingestion into sift. + + Most users should use one of the factory methods: + - `retry_only()` - Only attempt to reconnect to Sift after a connection failure. Any data which failed to be ingested will be lost. + - More performant, but with no guarantee of data ingestion. + - `retry_with_backups()` - Ingestion is checkpointed. If an ingestion issue occurs during a checkpoint, that data will be re-ingested into Sift + asynchronously along with incoming live data. Backup files are generated and by default, cleared after a successful checkpoint or re-ingestion. + """ + + def __init__(self, recovery_strategy_py: RecoveryStrategyPy | None): + """Initialize a RecoveryStrategyConfig. + + Args: + recovery_strategy_py: The underlying RecoveryStrategyPy instance. + If None, uses the default retry-only strategy. + + Note: + Most users should use the factory methods (`retry_only()` or `retry_with_backups()`) + instead of calling this constructor directly. + """ + self._recovery_strategy_py = recovery_strategy_py or RecoveryStrategyPy.default() + + def to_rust_config(self) -> RecoveryStrategyPy: + """Convert to RecoveryStrategyPy for use with the ingestion client. + + Returns: + A RecoveryStrategyPy instance that can be passed to the ingestion client. + """ + return self._recovery_strategy_py + + @classmethod + def retry_only(cls, retry_policy: RetryPolicyPy | None = None) -> RecoveryStrategyConfig: + """Create a recovery strategy that only retries connection failures. + + Args: + retry_policy: Retry policy configuration specifying retry attempts, backoff timing, etc. + If None, uses the default retry policy (5 attempts, 50ms initial backoff, + 5s max backoff, multiplier of 5). + + Returns: + A RecoveryStrategyConfig configured for retry-only strategy. + """ + retry_policy_py = retry_policy or RetryPolicyPy.default() + + recovery_strategy_py = RecoveryStrategyPy.retry_only(retry_policy_py) + return cls(recovery_strategy_py = recovery_strategy_py) + + @classmethod + def retry_with_backups(cls, retry_policy: RetryPolicyPy | None = None, disk_backup_policy: DiskBackupPolicyPy | None = None) -> RecoveryStrategyConfig: + """Create a recovery strategy with retries re-ingestion using disk based backups. + + Args: + retry_policy: Retry policy configuration specifying retry attempts, backoff timing, etc. + If None, uses the default retry policy (5 attempts, 50ms initial backoff, + 5s max backoff, multiplier of 5). + disk_backup_policy: Disk backup policy configuration specifying backup directory, + file size limits, etc. If None, uses the default disk backup policy. + + Returns: + A RecoveryStrategyConfig configured for retry with disk backups. + """ + retry_policy_py = retry_policy or RetryPolicyPy.default() + disk_backup_policy_py = disk_backup_policy or DiskBackupPolicyPy.default() + + recovery_strategy_py = RecoveryStrategyPy.retry_with_backups( + retry_policy = retry_policy_py, + disk_backup_policy = disk_backup_policy_py, + ) + return cls(recovery_strategy_py = recovery_strategy_py) + + + class IngestionAPIAsync(ResourceBase): """High-level API for interacting with ingestion services. @@ -138,12 +221,12 @@ def __init__(self, sift_client: SiftClient): async def create_ingestion_config_streaming_client( self, + ingestion_config: IngestionConfig | IngestionConfigCreate | IngestionConfigFormPy, *, - ingestion_config: IngestionConfig | None = None, run: RunCreate | dict | str | Run | None = None, asset_tags: list[str] | list[Tag] | None = None, asset_metadata: dict[str, str | float | bool] | None = None, - recovery_strategy: RecoveryStrategyPy | None = None, + recovery_strategy: RecoveryStrategyConfig | RecoveryStrategyPy | None = None, checkpoint_interval_seconds: int | None = None, enable_tls: bool = True, tracing_config: TracingConfig | None = None, @@ -151,7 +234,7 @@ async def create_ingestion_config_streaming_client( """Create an IngestionConfigStreamingClient. Args: - ingestion_config: The ingestion config. + ingestion_config: The ingestion config. Can be a IngestionConfig or IngestionConfigFormPy. run: The run to associate with ingestion. Can be a Run, RunCreate, dict, or run ID string. asset_tags: Tags to associate with the asset. asset_metadata: Metadata to associate with the asset. @@ -237,12 +320,12 @@ def __init__(self, sift_client: SiftClient, low_level_client: IngestionConfigStr async def create( cls, sift_client: SiftClient, - ingestion_config: IngestionConfigFormPy, + ingestion_config: IngestionConfig | IngestionConfigCreate | IngestionConfigFormPy, *, run: RunCreate | dict | str | Run | RunFormPy | None = None, asset_tags: list[str] | list[Tag] | None = None, asset_metadata: dict[str, str | float | bool] | None = None, - recovery_strategy: RecoveryStrategyPy | None = None, + recovery_strategy: RecoveryStrategyConfig | RecoveryStrategyPy | None = None, checkpoint_interval_seconds: int | None = None, enable_tls: bool = True, tracing_config: TracingConfig | None = None, @@ -274,6 +357,26 @@ async def create( api_key = grpc_config.api_key grpc_uri = grpc_config.uri + # Convert the ingestion_config variants to a IngestionConfigFormPy + if isinstance(ingestion_config, IngestionConfig): + # SiftStream will retrieve the existing config from the client_key + asset_name = sift_client.assets.get(asset_id=ingestion_config.asset_id) + ingestion_config_form = IngestionConfigFormPy( + asset_name = asset_name, + client_key = ingestion_config.client_key, + flows = [], + ) + elif isinstance(ingestion_config, IngestionConfigCreate): + ingestion_config_form = ingestion_config.to_rust_form() + else: + ingestion_config_form = ingestion_config + + # Convert the recovery strategy variants + if isinstance(recovery_strategy, RecoveryStrategyConfig): + recovery_strategy_py = recovery_strategy.to_rust_config() + else: + recovery_strategy_py = recovery_strategy + # Convert the run variants to a run or run_id run_form: RunFormPy | None = None run_id: str | None = None @@ -313,12 +416,12 @@ async def create( low_level_client = await IngestionConfigStreamingLowLevelClient.create_sift_stream_instance( api_key=api_key, grpc_uri=grpc_uri, - ingestion_config=ingestion_config, + ingestion_config=ingestion_config_form, run_form=run_form, run_id=run_id, asset_tags=asset_tags_list, asset_metadata=asset_metadata_list, - recovery_strategy=recovery_strategy, + recovery_strategy=recovery_strategy_py, checkpoint_interval=checkpoint_interval, enable_tls=enable_tls, tracing_config=tracing_config, diff --git a/python/lib/sift_client/sift_types/__init__.py b/python/lib/sift_client/sift_types/__init__.py index 54b2ac2af..74c375347 100644 --- a/python/lib/sift_client/sift_types/__init__.py +++ b/python/lib/sift_client/sift_types/__init__.py @@ -141,7 +141,13 @@ ChannelDataType, ChannelReference, ) -from sift_client.sift_types.ingestion import ChannelConfig, FlowConfig, IngestionConfig +from sift_client.sift_types.ingestion import ( + ChannelConfig, + FlowConfig, + IngestionConfig, + RecoveryStrategyConfig, + RetryPolicyConfig, +) from sift_client.sift_types.report import Report, ReportRuleStatus, ReportRuleSummary, ReportUpdate from sift_client.sift_types.rule import ( Rule, @@ -180,10 +186,12 @@ "ChannelReference", "FlowConfig", "IngestionConfig", + "RecoveryStrategyConfig", "Report", "ReportRuleStatus", "ReportRuleSummary", "ReportUpdate", + "RetryPolicyConfig", "Rule", "RuleAction", "RuleActionType", diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index 7a5d793ba..943290851 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import math from typing import TYPE_CHECKING, Any @@ -9,16 +10,25 @@ from sift.ingestion_configs.v2.ingestion_configs_pb2 import ( ChannelConfig as ChannelConfigProto, ) +from sift.ingestion_configs.v2.ingestion_configs_pb2 import ( + CreateIngestionConfigRequest as CreateIngestionConfigRequestProto, +) from sift.ingestion_configs.v2.ingestion_configs_pb2 import ( FlowConfig as FlowConfigProto, ) from sift.ingestion_configs.v2.ingestion_configs_pb2 import ( IngestionConfig as IngestionConfigProto, ) +from sift_stream_bindings import IngestionConfigFormPy -from sift_client.sift_types._base import BaseType +from sift_client.sift_types._base import ( + BaseType, + ModelCreate, +) from sift_client.sift_types.channel import ChannelBitFieldElement, ChannelDataType +logger = logging.getLogger(__name__) + if TYPE_CHECKING: from sift_stream_bindings import ( @@ -51,6 +61,29 @@ def _from_proto( ) +class IngestionConfigCreate(ModelCreate[CreateIngestionConfigRequestProto]): + """Create model for IngestionConfig.""" + + asset_name: str + flows: list[FlowConfig] = None + organization_id: str | None = None + client_key: str | None = None + + def _get_proto_class(self) -> type[CreateIngestionConfigRequestProto]: + return CreateIngestionConfigRequestProto + + def _to_rust_form(self) -> IngestionConfigFormPy: + if self.organization_id: + logger.warning("OrgId is ignored when passing an IngestionConfigCreate to the ingestion client") + + return IngestionConfigFormPy( + asset_name = self.asset_name, + flows = [flow_config._to_rust_config() for flow_config in self.flows], + client_key = self.client_key or self.asset_name # Default to using asset_name as the client_key + ) + + + class ChannelConfig(BaseType[ChannelConfigProto, "ChannelConfig"]): """Channel configuration model for ingestion purposes. From 5dacf683d74727e81192f280bead455287be43d6 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Thu, 6 Nov 2025 15:07:16 -0800 Subject: [PATCH 10/47] channel value --- python/lib/sift_client/resources/ingestion.py | 7 +- .../lib/sift_client/sift_types/ingestion.py | 94 ++++++++++++++++++- 2 files changed, 97 insertions(+), 4 deletions(-) diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index f11e0e0c9..78673551c 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -23,7 +23,7 @@ IngestionLowLevelClient, ) from sift_client.resources._base import ResourceBase -from sift_client.sift_types.ingestion import IngestionConfig, IngestionConfigCreate +from sift_client.sift_types.ingestion import Flow, IngestionConfig, IngestionConfigCreate from sift_client.sift_types.run import Run, RunCreate, Tag if TYPE_CHECKING: @@ -429,7 +429,7 @@ async def create( return cls(sift_client, low_level_client) - async def send(self, flow: FlowPy): + async def send(self, flow: Flow | FlowPy): """Send telemetry to Sift in the form of a Flow. This is the entry-point to send actual telemetry to Sift. If a message is sent that @@ -546,3 +546,6 @@ def get_metrics_snapshot(self) -> SiftStreamMetricsSnapshotPy: A snapshot of the current stream metrics. """ return self._low_level_client.get_metrics_snapshot() + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.finish() diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index 943290851..5b8563bd3 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -7,6 +7,9 @@ from google.protobuf.empty_pb2 import Empty from pydantic import ConfigDict, model_validator from sift.ingest.v1.ingest_pb2 import IngestWithConfigDataChannelValue +from sift.ingest.v1.ingest_pb2 import ( + IngestWithConfigDataStreamRequest as IngestWithConfigDataStreamRequestProto, +) from sift.ingestion_configs.v2.ingestion_configs_pb2 import ( ChannelConfig as ChannelConfigProto, ) @@ -21,6 +24,7 @@ ) from sift_stream_bindings import IngestionConfigFormPy +from sift_client._internal.low_level_wrappers.ingestion import to_rust_py_timestamp from sift_client.sift_types._base import ( BaseType, ModelCreate, @@ -30,11 +34,13 @@ logger = logging.getLogger(__name__) if TYPE_CHECKING: + from datetime import datetime from sift_stream_bindings import ( ChannelConfigPy, ChannelDataTypePy, FlowConfigPy, + FlowPy, IngestWithConfigDataChannelValuePy, ) @@ -181,11 +187,28 @@ def _to_config_proto(self) -> ChannelConfigProto: ], ) + def as_channel_value(self, value: Any) -> ChannelValue: + """Create a ChannelValue from a value using this channel's configuration. + + Args: + value: The value to wrap in a ChannelValue. The type should match + this channel's data_type. + + Returns: + A ChannelValue instance with this channel's name and data type, + containing the provided value. + """ + return ChannelValue( + name=self.name, + ty=self.data_type, + value=value, + ) + class FlowConfig(BaseType[FlowConfigProto, "FlowConfig"]): - """Model representing a data flow for ingestion. + """Model representing a data flow config for ingestion. - A FlowConfig represents a collection of channels that are ingested together. + A FlowConfig represents the configuration of a collection of channels that are ingested together. """ model_config = ConfigDict(frozen=False) @@ -231,6 +254,73 @@ def add_channel(self, channel: ChannelConfig): self.channels.append(channel) +class Flow(BaseType[IngestWithConfigDataStreamRequestProto, "Flow"]): + """Model representing a data flow for ingestion. + + A Flow represents a collection of channels that are ingested together. + + A representation of the IngestWithConfigDataStreamRequest proto + """ + + ingestion_config_id: str | None + flow: str + timestamp: datetime + channel_values: list[ChannelValue] + run_id: str | None + end_stream_on_validation_error: bool | None + organization_id: str | None + + def _to_rust_form(self) -> FlowPy: + from sift_stream_bindings import FlowPy + + return FlowPy( + flow_name = self.flow, + timestamp = to_rust_py_timestamp(self.timestamp), + values = [channel_value.to_rust_form() for channel_value in self.channel_values] + ) + + +class ChannelValue: + """Model representing a channel value for ingestion. + + A ChannelValue represents the data of a channel to be ingested. + """ + + name: str + ty: ChannelDataType + value: Any + + def _to_rust_form(self): + from sift_stream_bindings import ChannelValuePy, ValuePy + + if self.ty == ChannelDataType.BIT_FIELD: + value_py = ValuePy.BitField(self.value) + elif self.ty == ChannelDataType.ENUM: + value_py = ValuePy.Enum(self.value) + elif self.ty == ChannelDataType.BOOL: + value_py = ValuePy.Bool(self.value) + elif self.ty == ChannelDataType.FLOAT: + value_py = ValuePy.Float(self.value) + elif self.ty == ChannelDataType.DOUBLE: + value_py = ValuePy.Double(self.value) + elif self.ty == ChannelDataType.INT_32: + value_py = ValuePy.Int32(self.value) + elif self.ty == ChannelDataType.INT_64: + value_py = ValuePy.Int64(self.value) + elif self.ty == ChannelDataType.UINT_32: + value_py = ValuePy.Uint32(self.value) + elif self.ty == ChannelDataType.UINT_64: + value_py = ValuePy.Uint64(self.value) + elif self.ty == ChannelDataType.STRING: + value_py = ValuePy.String(self.value) + else: + raise ValueError(f"Invalid data type: {self.ty}") + + return ChannelValuePy( + name = self.name, + value = value_py, + ) + # Converter functions. def _channel_config_to_rust_config(channel: ChannelConfig) -> ChannelConfigPy: From ae8610d67baf72bc20f45565963034d72b863653 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Thu, 6 Nov 2025 18:06:48 -0800 Subject: [PATCH 11/47] various fixes --- python/lib/sift_client/resources/ingestion.py | 11 ++++-- python/lib/sift_client/sift_types/__init__.py | 8 ++--- .../lib/sift_client/sift_types/ingestion.py | 36 ++++++++++++++----- 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 78673551c..964ecda37 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -367,7 +367,7 @@ async def create( flows = [], ) elif isinstance(ingestion_config, IngestionConfigCreate): - ingestion_config_form = ingestion_config.to_rust_form() + ingestion_config_form = ingestion_config._to_rust_form() else: ingestion_config_form = ingestion_config @@ -451,7 +451,11 @@ async def send(self, flow: Flow | FlowPy): Args: flow: The flow to send to Sift. """ - await self._low_level_client.send(flow) + if isinstance(flow, Flow): + flow_py = flow._to_rust_form() + else: + flow_py = flow + await self._low_level_client.send(flow_py) async def send_requests(self, requests: list[IngestWithConfigDataStreamRequestPy]): """Send data in a manner identical to the raw gRPC service for ingestion-config based streaming. @@ -547,5 +551,8 @@ def get_metrics_snapshot(self) -> SiftStreamMetricsSnapshotPy: """ return self._low_level_client.get_metrics_snapshot() + async def __aenter__(self): + return self + async def __aexit__(self, exc_type, exc_val, exc_tb): await self.finish() diff --git a/python/lib/sift_client/sift_types/__init__.py b/python/lib/sift_client/sift_types/__init__.py index 74c375347..31fb95d8c 100644 --- a/python/lib/sift_client/sift_types/__init__.py +++ b/python/lib/sift_client/sift_types/__init__.py @@ -144,9 +144,9 @@ from sift_client.sift_types.ingestion import ( ChannelConfig, FlowConfig, + Flow, IngestionConfig, - RecoveryStrategyConfig, - RetryPolicyConfig, + IngestionConfigCreate, ) from sift_client.sift_types.report import Report, ReportRuleStatus, ReportRuleSummary, ReportUpdate from sift_client.sift_types.rule import ( @@ -184,14 +184,14 @@ "ChannelConfig", "ChannelDataType", "ChannelReference", + "Flow", "FlowConfig", "IngestionConfig", - "RecoveryStrategyConfig", + "IngestionConfigCreate", "Report", "ReportRuleStatus", "ReportRuleSummary", "ReportUpdate", - "RetryPolicyConfig", "Rule", "RuleAction", "RuleActionType", diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index 5b8563bd3..0eab4a624 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -3,9 +3,10 @@ import logging import math from typing import TYPE_CHECKING, Any +from datetime import datetime, timezone from google.protobuf.empty_pb2 import Empty -from pydantic import ConfigDict, model_validator +from pydantic import BaseModel, ConfigDict, model_validator from sift.ingest.v1.ingest_pb2 import IngestWithConfigDataChannelValue from sift.ingest.v1.ingest_pb2 import ( IngestWithConfigDataStreamRequest as IngestWithConfigDataStreamRequestProto, @@ -18,13 +19,13 @@ ) from sift.ingestion_configs.v2.ingestion_configs_pb2 import ( FlowConfig as FlowConfigProto, + CreateIngestionConfigFlowsRequest as FlowConfigsCreateProto, ) from sift.ingestion_configs.v2.ingestion_configs_pb2 import ( IngestionConfig as IngestionConfigProto, ) from sift_stream_bindings import IngestionConfigFormPy -from sift_client._internal.low_level_wrappers.ingestion import to_rust_py_timestamp from sift_client.sift_types._base import ( BaseType, ModelCreate, @@ -262,16 +263,33 @@ class Flow(BaseType[IngestWithConfigDataStreamRequestProto, "Flow"]): A representation of the IngestWithConfigDataStreamRequest proto """ - ingestion_config_id: str | None + model_config = ConfigDict(frozen=False) + ingestion_config_id: str | None = None flow: str timestamp: datetime channel_values: list[ChannelValue] - run_id: str | None - end_stream_on_validation_error: bool | None - organization_id: str | None + run_id: str | None = None + end_stream_on_validation_error: bool | None = None + organization_id: str | None = None + + @classmethod + def _from_proto( + cls, proto: IngestWithConfigDataStreamRequestProto, sift_client: SiftClient | None = None + ) -> IngestionConfig: + return cls( + proto=proto, + ingestion_config_id = proto.ingestion_config_id, + flow = proto.flow, + timestamp = proto.timestamp.ToDatetime(tzinfo=timezone.utc), + channel_values = proto.channel_values, + run_id = proto.run_id, + end_stream_on_validation_error = proto.end_stream_on_validation_error, + organization_id = proto.organization_id, + ) def _to_rust_form(self) -> FlowPy: from sift_stream_bindings import FlowPy + from sift_client._internal.low_level_wrappers.ingestion import to_rust_py_timestamp return FlowPy( flow_name = self.flow, @@ -280,17 +298,19 @@ def _to_rust_form(self) -> FlowPy: ) -class ChannelValue: +class ChannelValue(BaseModel): """Model representing a channel value for ingestion. A ChannelValue represents the data of a channel to be ingested. """ + model_config = ConfigDict(frozen=False) name: str ty: ChannelDataType value: Any - def _to_rust_form(self): + def to_rust_form(self): + """Convert this ChannelValue to its Rust form for ingestion.""" from sift_stream_bindings import ChannelValuePy, ValuePy if self.ty == ChannelDataType.BIT_FIELD: From a44b81e7b00c111cb6ff6ff878ea37407a7d3728 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Fri, 7 Nov 2025 11:30:15 -0800 Subject: [PATCH 12/47] Keep bindings as optional dep --- .../_internal/low_level_wrappers/ingestion.py | 26 +++++------ python/lib/sift_client/resources/ingestion.py | 44 ++++++++++++------- python/lib/sift_client/sift_types/__init__.py | 2 +- .../lib/sift_client/sift_types/ingestion.py | 8 ++-- python/lib/sift_client/sift_types/run.py | 5 ++- 5 files changed, 51 insertions(+), 34 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index e45f14d2e..e3aec6429 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -22,20 +22,6 @@ from sift.ingestion_configs.v2.ingestion_configs_pb2_grpc import ( IngestionConfigServiceStub, ) -from sift_stream_bindings import ( - DurationPy, - FlowConfigPy, - FlowPy, - IngestionConfigFormPy, - MetadataPy, - RecoveryStrategyPy, - RunSelectorPy, - SiftStreamBuilderPy, - SiftStreamMetricsSnapshotPy, - init_tracing, - init_tracing_with_file, - is_tracing_initialized, -) from sift_client._internal.low_level_wrappers.base import ( LowLevelClientBase, @@ -51,11 +37,16 @@ from sift_stream_bindings import ( DurationPy, + FlowConfigPy, + FlowPy, IngestionConfigFormPy, IngestWithConfigDataStreamRequestPy, MetadataPy, RecoveryStrategyPy, RunFormPy, + RunSelectorPy, + SiftStreamBuilderPy, + SiftStreamMetricsSnapshotPy, SiftStreamPy, TimeValuePy, ) @@ -257,6 +248,13 @@ async def create_sift_stream_instance( enable_tls: bool = True, tracing_config: TracingConfig | None = None, ) -> IngestionConfigStreamingLowLevelClient: + from sift_stream_bindings import ( + SiftStreamBuilderPy, + init_tracing, + init_tracing_with_file, + is_tracing_initialized, + ) + from sift_client.resources.ingestion import TracingConfig if not is_tracing_initialized(): diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 964ecda37..34f29dad0 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -3,21 +3,6 @@ import logging from typing import TYPE_CHECKING -from sift_stream_bindings import ( - DiskBackupPolicyPy, - DurationPy, - FlowPy, - IngestionConfigFormPy, - IngestWithConfigDataStreamRequestPy, - MetadataPy, - MetadataValuePy, - RecoveryStrategyPy, - RetryPolicyPy, - RunFormPy, - RunSelectorPy, - SiftStreamMetricsSnapshotPy, -) - from sift_client._internal.low_level_wrappers.ingestion import ( IngestionConfigStreamingLowLevelClient, IngestionLowLevelClient, @@ -27,6 +12,18 @@ from sift_client.sift_types.run import Run, RunCreate, Tag if TYPE_CHECKING: + from sift_stream_bindings import ( + DiskBackupPolicyPy, + DurationPy, + FlowPy, + IngestionConfigFormPy, + IngestWithConfigDataStreamRequestPy, + MetadataPy, + RecoveryStrategyPy, + RetryPolicyPy, + RunFormPy, + SiftStreamMetricsSnapshotPy, + ) from sift_client.client import SiftClient from sift_client.sift_types.ingestion import FlowConfig @@ -147,6 +144,8 @@ def __init__(self, recovery_strategy_py: RecoveryStrategyPy | None): Most users should use the factory methods (`retry_only()` or `retry_with_backups()`) instead of calling this constructor directly. """ + from sift_stream_bindings import RecoveryStrategyPy + self._recovery_strategy_py = recovery_strategy_py or RecoveryStrategyPy.default() def to_rust_config(self) -> RecoveryStrategyPy: @@ -169,6 +168,8 @@ def retry_only(cls, retry_policy: RetryPolicyPy | None = None) -> RecoveryStrate Returns: A RecoveryStrategyConfig configured for retry-only strategy. """ + from sift_stream_bindings import RecoveryStrategyPy, RetryPolicyPy + retry_policy_py = retry_policy or RetryPolicyPy.default() recovery_strategy_py = RecoveryStrategyPy.retry_only(retry_policy_py) @@ -188,6 +189,8 @@ def retry_with_backups(cls, retry_policy: RetryPolicyPy | None = None, disk_back Returns: A RecoveryStrategyConfig configured for retry with disk backups. """ + from sift_stream_bindings import DiskBackupPolicyPy, RecoveryStrategyPy, RetryPolicyPy + retry_policy_py = retry_policy or RetryPolicyPy.default() disk_backup_policy_py = disk_backup_policy or DiskBackupPolicyPy.default() @@ -349,6 +352,13 @@ async def create( Returns: An initialized IngestionConfigStreamingClient. """ + from sift_stream_bindings import ( + DurationPy, + IngestionConfigFormPy, + MetadataValuePy, + RunFormPy, + ) + instance = cls.__new__(cls) instance._sift_client = sift_client @@ -411,6 +421,8 @@ async def create( # Convert checkpoint_interval_seconds to DurationPy checkpoint_interval: DurationPy | None = None if checkpoint_interval_seconds is not None: + from sift_stream_bindings import DurationPy + checkpoint_interval = DurationPy(secs=checkpoint_interval_seconds, nanos=0) low_level_client = await IngestionConfigStreamingLowLevelClient.create_sift_stream_instance( @@ -495,6 +507,8 @@ async def attach_run(self, run: RunCreate | dict | str | Run | RunFormPy): Args: run: The run to attach. Can be a Run, RunCreate, dict, run ID string, or RunFormPy. """ + from sift_stream_bindings import RunFormPy, RunSelectorPy + if isinstance(run, RunFormPy): run_selector_py = RunSelectorPy.by_form(run) elif isinstance(run, dict): diff --git a/python/lib/sift_client/sift_types/__init__.py b/python/lib/sift_client/sift_types/__init__.py index 31fb95d8c..b55717c60 100644 --- a/python/lib/sift_client/sift_types/__init__.py +++ b/python/lib/sift_client/sift_types/__init__.py @@ -143,8 +143,8 @@ ) from sift_client.sift_types.ingestion import ( ChannelConfig, - FlowConfig, Flow, + FlowConfig, IngestionConfig, IngestionConfigCreate, ) diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index 0eab4a624..b1a47cc9a 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -2,8 +2,8 @@ import logging import math -from typing import TYPE_CHECKING, Any from datetime import datetime, timezone +from typing import TYPE_CHECKING, Any from google.protobuf.empty_pb2 import Empty from pydantic import BaseModel, ConfigDict, model_validator @@ -19,12 +19,10 @@ ) from sift.ingestion_configs.v2.ingestion_configs_pb2 import ( FlowConfig as FlowConfigProto, - CreateIngestionConfigFlowsRequest as FlowConfigsCreateProto, ) from sift.ingestion_configs.v2.ingestion_configs_pb2 import ( IngestionConfig as IngestionConfigProto, ) -from sift_stream_bindings import IngestionConfigFormPy from sift_client.sift_types._base import ( BaseType, @@ -42,6 +40,7 @@ ChannelDataTypePy, FlowConfigPy, FlowPy, + IngestionConfigFormPy, IngestWithConfigDataChannelValuePy, ) @@ -80,6 +79,8 @@ def _get_proto_class(self) -> type[CreateIngestionConfigRequestProto]: return CreateIngestionConfigRequestProto def _to_rust_form(self) -> IngestionConfigFormPy: + from sift_stream_bindings import IngestionConfigFormPy + if self.organization_id: logger.warning("OrgId is ignored when passing an IngestionConfigCreate to the ingestion client") @@ -289,6 +290,7 @@ def _from_proto( def _to_rust_form(self) -> FlowPy: from sift_stream_bindings import FlowPy + from sift_client._internal.low_level_wrappers.ingestion import to_rust_py_timestamp return FlowPy( diff --git a/python/lib/sift_client/sift_types/run.py b/python/lib/sift_client/sift_types/run.py index beced607a..6d3fc911e 100644 --- a/python/lib/sift_client/sift_types/run.py +++ b/python/lib/sift_client/sift_types/run.py @@ -6,7 +6,6 @@ from pydantic import model_validator from sift.runs.v2.runs_pb2 import CreateRunRequest as CreateRunRequestProto from sift.runs.v2.runs_pb2 import Run as RunProto -from sift_stream_bindings import MetadataPy, MetadataValuePy, RunFormPy from sift_client.sift_types._base import ( BaseType, @@ -19,6 +18,8 @@ from sift_client.util.metadata import metadata_dict_to_proto, metadata_proto_to_dict if TYPE_CHECKING: + from sift_stream_bindings import RunFormPy + from sift_client.client import SiftClient from sift_client.sift_types.asset import Asset @@ -173,6 +174,8 @@ def _get_proto_class(self) -> type[CreateRunRequestProto]: def _to_rust_form(self) -> RunFormPy: + from sift_stream_bindings import MetadataPy, MetadataValuePy, RunFormPy + if self.client_key: client_key = self.client_key else: From f0b25827d30fab2d0c88fed6dd802d1c41b95023 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Fri, 7 Nov 2025 13:24:21 -0800 Subject: [PATCH 13/47] add qol functions and cleanup --- .../_internal/low_level_wrappers/ingestion.py | 9 +++- python/lib/sift_client/resources/ingestion.py | 26 +++++++-- python/lib/sift_client/sift_types/channel.py | 36 +++++++++++++ .../lib/sift_client/sift_types/ingestion.py | 53 +++++++++++++++++-- 4 files changed, 114 insertions(+), 10 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index e3aec6429..f3ee302b6 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -228,10 +228,12 @@ async def create_ingestion_config( class IngestionConfigStreamingLowLevelClient(LowLevelClientBase): _sift_stream_instance: SiftStreamPy + _known_flows: dict[str, FlowConfig] - def __init__(self, sift_stream_instance: SiftStreamPy): + def __init__(self, sift_stream_instance: SiftStreamPy, known_flows: dict[str, FlowConfig]): super().__init__() self._sift_stream_instance = sift_stream_instance + self._known_flows = known_flows @classmethod async def create_sift_stream_instance( @@ -289,7 +291,9 @@ async def create_sift_stream_instance( sift_stream_instance = await builder.build() - return cls(sift_stream_instance) + known_flows = {flow.name: FlowConfig._from_rust_config(flow) for flow in ingestion_config.flows} + + return cls(sift_stream_instance, known_flows) async def send(self, flow: FlowPy): await self._sift_stream_instance.send(flow) @@ -298,6 +302,7 @@ async def send_requests(self, requests: list[IngestWithConfigDataStreamRequestPy await self._sift_stream_instance.send_requests(requests) async def add_new_flows(self, flow_configs: list[FlowConfigPy]): + self._known_flows.update({flow_config.name: FlowConfig._from_rust_config(flow_config) for flow_config in flow_configs}) await self._sift_stream_instance.add_new_flows(flow_configs) async def attach_run(self, run_selector: RunSelectorPy): diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 34f29dad0..2cb3ae349 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -314,6 +314,7 @@ class IngestionConfigStreamingClient(ResourceBase): This client should be initialized using the create classmethod, and not directly. Once streaming has ended, the client should be shutdown using the finish method. """ + def __init__(self, sift_client: SiftClient, low_level_client: IngestionConfigStreamingLowLevelClient): """Initialize an IngestionConfigStreamingClient. Users should not initialize this class directly, but rather use the create classmethod.""" super().__init__(sift_client) @@ -441,7 +442,7 @@ async def create( return cls(sift_client, low_level_client) - async def send(self, flow: Flow | FlowPy): + async def send(self, *, flow: Flow | FlowPy): """Send telemetry to Sift in the form of a Flow. This is the entry-point to send actual telemetry to Sift. If a message is sent that @@ -469,7 +470,7 @@ async def send(self, flow: Flow | FlowPy): flow_py = flow await self._low_level_client.send(flow_py) - async def send_requests(self, requests: list[IngestWithConfigDataStreamRequestPy]): + async def send_requests(self, *, requests: list[IngestWithConfigDataStreamRequestPy]): """Send data in a manner identical to the raw gRPC service for ingestion-config based streaming. This method offers a way to send data that matches the raw gRPC service interface. You are @@ -484,7 +485,7 @@ async def send_requests(self, requests: list[IngestWithConfigDataStreamRequestPy """ await self._low_level_client.send_requests(requests) - async def add_new_flows(self, flow_configs: list[FlowConfig]): + async def add_new_flows(self, *, flow_configs: list[FlowConfig]): """Modify the existing ingestion config by adding new flows that weren't accounted for during initialization. This allows you to dynamically add new flow configurations to the ingestion config after @@ -497,7 +498,7 @@ async def add_new_flows(self, flow_configs: list[FlowConfig]): flow_configs_py = [flow_config._to_rust_config() for flow_config in flow_configs] await self._low_level_client.add_new_flows(flow_configs_py) - async def attach_run(self, run: RunCreate | dict | str | Run | RunFormPy): + async def attach_run(self, *, run: RunCreate | dict | str | Run | RunFormPy): """Attach a run to the stream. Any data provided through `send` after this function returns will be associated with @@ -565,6 +566,23 @@ def get_metrics_snapshot(self) -> SiftStreamMetricsSnapshotPy: """ return self._low_level_client.get_metrics_snapshot() + def get_flow_config(self, *, flow_name: str) -> FlowConfig: + """Retrieve a flow configuration by name. + + Args: + flow_name: The name of the flow configuration to retrieve. + + Returns: + The FlowConfig associated with the given flow name. + + Raises: + KeyError: If the flow name is not found in the known flows. + """ + flow_config = self._low_level_client._known_flows.get(flow_name) + if flow_config is None: + raise KeyError(f"FlowConfig {flow_name} is unknown to the ingestion client") + return flow_config + async def __aenter__(self): return self diff --git a/python/lib/sift_client/sift_types/channel.py b/python/lib/sift_client/sift_types/channel.py index 2c18dbb83..69452d87a 100644 --- a/python/lib/sift_client/sift_types/channel.py +++ b/python/lib/sift_client/sift_types/channel.py @@ -28,6 +28,8 @@ from sift_client.sift_types._base import BaseType if TYPE_CHECKING: + from sift_stream_bindings import ChannelBitFieldElementPy, ChannelDataTypePy + from sift_client.client import SiftClient from sift_client.sift_types.asset import Asset from sift_client.sift_types.run import Run @@ -110,6 +112,32 @@ def from_str(raw: str) -> ChannelDataType | None: return None raise Exception(f"Unknown channel data type: {raw}") + @staticmethod + def _from_rust_type(channel_data_type_py: ChannelDataTypePy) -> ChannelDataType: + # Use enum name for comparison to avoid PyO3 enum comparison issues + # Extract the enum name from the string representation + enum_str = str(channel_data_type_py) + enum_name = enum_str.split('.')[-1] if '.' in enum_str else enum_str + + mapping = { + "Double": ChannelDataType.DOUBLE, + "String": ChannelDataType.STRING, + "Enum": ChannelDataType.ENUM, + "BitField": ChannelDataType.BIT_FIELD, + "Bool": ChannelDataType.BOOL, + "Float": ChannelDataType.FLOAT, + "Int32": ChannelDataType.INT_32, + "Uint32": ChannelDataType.UINT_32, + "Int64": ChannelDataType.INT_64, + "Uint64": ChannelDataType.UINT_64, + "Bytes": ChannelDataType.BYTES, + } + + if enum_name in mapping: + return mapping[enum_name] + else: + raise ValueError(f"Unknown channel data type: {channel_data_type_py}") + @staticmethod def proto_data_class(data_type: ChannelDataType): """Return the appropriate protobuf class for the given channel data type. @@ -194,6 +222,14 @@ def _from_proto(cls, message: ChannelBitFieldElementPb) -> ChannelBitFieldElemen bit_count=message.bit_count, ) + @classmethod + def _from_rust_type(cls, bit_field_element_py: ChannelBitFieldElementPy) -> ChannelBitFieldElement: + return ChannelBitFieldElement( + name = bit_field_element_py.name, + index = bit_field_element_py.index, + bit_count = bit_field_element_py.bit_count + ) + def _to_proto(self) -> ChannelBitFieldElementPb: return ChannelBitFieldElementPb( name=self.name, diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index b1a47cc9a..da02d6829 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Any from google.protobuf.empty_pb2 import Empty -from pydantic import BaseModel, ConfigDict, model_validator +from pydantic import BaseModel, ConfigDict, Field, model_validator from sift.ingest.v1.ingest_pb2 import IngestWithConfigDataChannelValue from sift.ingest.v1.ingest_pb2 import ( IngestWithConfigDataStreamRequest as IngestWithConfigDataStreamRequestProto, @@ -33,8 +33,6 @@ logger = logging.getLogger(__name__) if TYPE_CHECKING: - from datetime import datetime - from sift_stream_bindings import ( ChannelConfigPy, ChannelDataTypePy, @@ -92,6 +90,7 @@ def _to_rust_form(self) -> IngestionConfigFormPy: + class ChannelConfig(BaseType[ChannelConfigProto, "ChannelConfig"]): """Channel configuration model for ingestion purposes. @@ -161,6 +160,17 @@ def from_channel(cls, channel: Channel) -> ChannelConfig: enum_types=channel.enum_types, ) + @classmethod + def _from_rust_config(cls, channel_config_py: ChannelConfigPy) -> ChannelConfig: + return ChannelConfig( + name = channel_config_py.name, + description = channel_config_py.description or None, + unit = channel_config_py.unit or None, + data_type = ChannelDataType._from_rust_type(channel_config_py.data_type), + bit_field_elements = [ChannelBitFieldElement._from_rust_type(bfe) for bfe in channel_config_py.bit_field_elements], + enum_types = {enum.name: enum.key for enum in channel_config_py.enum_types}, + ) + def _to_config_proto(self) -> ChannelConfigProto: """Convert to ChannelConfigProto for ingestion.""" from sift.common.type.v1.channel_bit_field_element_pb2 import ( @@ -228,6 +238,13 @@ def _from_proto(cls, proto: FlowConfigProto, sift_client: SiftClient | None = No _client=sift_client, ) + @classmethod + def _from_rust_config(cls, flow_config_py: FlowConfigPy) -> FlowConfig: + return FlowConfig( + name = flow_config_py.name, + channels = [ChannelConfig._from_rust_config(channel) for channel in flow_config_py.channels] + ) + def _to_proto(self) -> FlowConfigProto: return FlowConfigProto( name=self.name, @@ -255,6 +272,34 @@ def add_channel(self, channel: ChannelConfig): raise ValueError("Cannot add a channel to a flow after creation") self.channels.append(channel) + def as_flow(self, *, timestamp: datetime | None, values: dict[str, Any]) -> Flow: + """Create a Flow from this FlowConfig with the provided values. + + Args: + timestamp: The timestamp for the flow. If None, uses the current UTC time. + values: A dictionary mapping channel names to their values. Only channels + present in this dictionary will be included in the resulting Flow. + + Returns: + A Flow object with channel values created from the provided values dictionary. + """ + found_values = {} + channel_values = [] + for channel in self.channels: + if channel.name in values: + channel_values.append(channel.as_channel_value(values[channel.name])) + found_values[channel.name] = None + + missing_values = values.keys() - found_values.keys() + if missing_values: + raise ValueError(f"Provided channel values which do not exist in the flow config: {missing_values}") + + return Flow( + flow = self.name, + timestamp = timestamp or datetime.now(timezone.utc), + channel_values = channel_values + ) + class Flow(BaseType[IngestWithConfigDataStreamRequestProto, "Flow"]): """Model representing a data flow for ingestion. @@ -267,7 +312,7 @@ class Flow(BaseType[IngestWithConfigDataStreamRequestProto, "Flow"]): model_config = ConfigDict(frozen=False) ingestion_config_id: str | None = None flow: str - timestamp: datetime + timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) channel_values: list[ChannelValue] run_id: str | None = None end_stream_on_validation_error: bool | None = None From 6ba20da28435c8de52ac3e393fcfac9d501510fd Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Fri, 7 Nov 2025 13:26:44 -0800 Subject: [PATCH 14/47] ruff fmt --- .../_internal/low_level_wrappers/ingestion.py | 18 +++-- python/lib/sift_client/resources/ingestion.py | 35 ++++----- python/lib/sift_client/sift_types/channel.py | 12 ++-- .../lib/sift_client/sift_types/ingestion.py | 72 +++++++++++-------- python/lib/sift_client/sift_types/run.py | 1 - 5 files changed, 78 insertions(+), 60 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index f3ee302b6..3d7c63ea6 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -91,7 +91,6 @@ def __init__(self, grpc_client: GrpcClient): """ super().__init__(grpc_client=grpc_client) - async def get_ingestion_config_flows(self, ingestion_config_id: str) -> list[FlowConfig]: """Get the flows for an ingestion config.""" res = await self._grpc_client.get_stub(IngestionConfigServiceStub).GetIngestionConfig( @@ -224,8 +223,6 @@ async def create_ingestion_config( return ingestion_config_id - - class IngestionConfigStreamingLowLevelClient(LowLevelClientBase): _sift_stream_instance: SiftStreamPy _known_flows: dict[str, FlowConfig] @@ -276,8 +273,8 @@ async def create_sift_stream_instance( init_tracing(tracing_config.level) builder = SiftStreamBuilderPy( - uri = grpc_uri, - apikey = api_key, + uri=grpc_uri, + apikey=api_key, ) builder.enable_tls = enable_tls @@ -291,7 +288,9 @@ async def create_sift_stream_instance( sift_stream_instance = await builder.build() - known_flows = {flow.name: FlowConfig._from_rust_config(flow) for flow in ingestion_config.flows} + known_flows = { + flow.name: FlowConfig._from_rust_config(flow) for flow in ingestion_config.flows + } return cls(sift_stream_instance, known_flows) @@ -302,7 +301,12 @@ async def send_requests(self, requests: list[IngestWithConfigDataStreamRequestPy await self._sift_stream_instance.send_requests(requests) async def add_new_flows(self, flow_configs: list[FlowConfigPy]): - self._known_flows.update({flow_config.name: FlowConfig._from_rust_config(flow_config) for flow_config in flow_configs}) + self._known_flows.update( + { + flow_config.name: FlowConfig._from_rust_config(flow_config) + for flow_config in flow_configs + } + ) await self._sift_stream_instance.add_new_flows(flow_configs) async def attach_run(self, run_selector: RunSelectorPy): diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 2cb3ae349..f1ad4fd5a 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -173,10 +173,14 @@ def retry_only(cls, retry_policy: RetryPolicyPy | None = None) -> RecoveryStrate retry_policy_py = retry_policy or RetryPolicyPy.default() recovery_strategy_py = RecoveryStrategyPy.retry_only(retry_policy_py) - return cls(recovery_strategy_py = recovery_strategy_py) + return cls(recovery_strategy_py=recovery_strategy_py) @classmethod - def retry_with_backups(cls, retry_policy: RetryPolicyPy | None = None, disk_backup_policy: DiskBackupPolicyPy | None = None) -> RecoveryStrategyConfig: + def retry_with_backups( + cls, + retry_policy: RetryPolicyPy | None = None, + disk_backup_policy: DiskBackupPolicyPy | None = None, + ) -> RecoveryStrategyConfig: """Create a recovery strategy with retries re-ingestion using disk based backups. Args: @@ -195,11 +199,10 @@ def retry_with_backups(cls, retry_policy: RetryPolicyPy | None = None, disk_back disk_backup_policy_py = disk_backup_policy or DiskBackupPolicyPy.default() recovery_strategy_py = RecoveryStrategyPy.retry_with_backups( - retry_policy = retry_policy_py, - disk_backup_policy = disk_backup_policy_py, + retry_policy=retry_policy_py, + disk_backup_policy=disk_backup_policy_py, ) - return cls(recovery_strategy_py = recovery_strategy_py) - + return cls(recovery_strategy_py=recovery_strategy_py) class IngestionAPIAsync(ResourceBase): @@ -221,7 +224,6 @@ def __init__(self, sift_client: SiftClient): super().__init__(sift_client) self._low_level_client = IngestionLowLevelClient(grpc_client=self.client.grpc_client) - async def create_ingestion_config_streaming_client( self, ingestion_config: IngestionConfig | IngestionConfigCreate | IngestionConfigFormPy, @@ -315,7 +317,9 @@ class IngestionConfigStreamingClient(ResourceBase): This client should be initialized using the create classmethod, and not directly. Once streaming has ended, the client should be shutdown using the finish method. """ - def __init__(self, sift_client: SiftClient, low_level_client: IngestionConfigStreamingLowLevelClient): + def __init__( + self, sift_client: SiftClient, low_level_client: IngestionConfigStreamingLowLevelClient + ): """Initialize an IngestionConfigStreamingClient. Users should not initialize this class directly, but rather use the create classmethod.""" super().__init__(sift_client) self._low_level_client = low_level_client @@ -373,9 +377,9 @@ async def create( # SiftStream will retrieve the existing config from the client_key asset_name = sift_client.assets.get(asset_id=ingestion_config.asset_id) ingestion_config_form = IngestionConfigFormPy( - asset_name = asset_name, - client_key = ingestion_config.client_key, - flows = [], + asset_name=asset_name, + client_key=ingestion_config.client_key, + flows=[], ) elif isinstance(ingestion_config, IngestionConfigCreate): ingestion_config_form = ingestion_config._to_rust_form() @@ -406,17 +410,16 @@ async def create( # Convert asset_tags to list of strings asset_tags_list: list[str] | None = None if asset_tags is not None: - asset_tags_list = [ - tag.name if isinstance(tag, Tag) else tag for tag in asset_tags - ] + asset_tags_list = [tag.name if isinstance(tag, Tag) else tag for tag in asset_tags] - # Convert asset_metadata dict to list of MetadataPy + # Convert asset_metadata dict to list of MetadataPy asset_metadata_list: list[MetadataPy] | None = None if asset_metadata is not None: from sift_stream_bindings import MetadataPy asset_metadata_list = [ - MetadataPy(key=key, value=MetadataValuePy(value)) for key, value in asset_metadata.items() + MetadataPy(key=key, value=MetadataValuePy(value)) + for key, value in asset_metadata.items() ] # Convert checkpoint_interval_seconds to DurationPy diff --git a/python/lib/sift_client/sift_types/channel.py b/python/lib/sift_client/sift_types/channel.py index 69452d87a..69ba4b8ed 100644 --- a/python/lib/sift_client/sift_types/channel.py +++ b/python/lib/sift_client/sift_types/channel.py @@ -117,7 +117,7 @@ def _from_rust_type(channel_data_type_py: ChannelDataTypePy) -> ChannelDataType: # Use enum name for comparison to avoid PyO3 enum comparison issues # Extract the enum name from the string representation enum_str = str(channel_data_type_py) - enum_name = enum_str.split('.')[-1] if '.' in enum_str else enum_str + enum_name = enum_str.split(".")[-1] if "." in enum_str else enum_str mapping = { "Double": ChannelDataType.DOUBLE, @@ -223,11 +223,13 @@ def _from_proto(cls, message: ChannelBitFieldElementPb) -> ChannelBitFieldElemen ) @classmethod - def _from_rust_type(cls, bit_field_element_py: ChannelBitFieldElementPy) -> ChannelBitFieldElement: + def _from_rust_type( + cls, bit_field_element_py: ChannelBitFieldElementPy + ) -> ChannelBitFieldElement: return ChannelBitFieldElement( - name = bit_field_element_py.name, - index = bit_field_element_py.index, - bit_count = bit_field_element_py.bit_count + name=bit_field_element_py.name, + index=bit_field_element_py.index, + bit_count=bit_field_element_py.bit_count, ) def _to_proto(self) -> ChannelBitFieldElementPb: diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index da02d6829..0bd584ef4 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -80,17 +80,18 @@ def _to_rust_form(self) -> IngestionConfigFormPy: from sift_stream_bindings import IngestionConfigFormPy if self.organization_id: - logger.warning("OrgId is ignored when passing an IngestionConfigCreate to the ingestion client") + logger.warning( + "OrgId is ignored when passing an IngestionConfigCreate to the ingestion client" + ) return IngestionConfigFormPy( - asset_name = self.asset_name, - flows = [flow_config._to_rust_config() for flow_config in self.flows], - client_key = self.client_key or self.asset_name # Default to using asset_name as the client_key + asset_name=self.asset_name, + flows=[flow_config._to_rust_config() for flow_config in self.flows], + client_key=self.client_key + or self.asset_name, # Default to using asset_name as the client_key ) - - class ChannelConfig(BaseType[ChannelConfigProto, "ChannelConfig"]): """Channel configuration model for ingestion purposes. @@ -163,12 +164,15 @@ def from_channel(cls, channel: Channel) -> ChannelConfig: @classmethod def _from_rust_config(cls, channel_config_py: ChannelConfigPy) -> ChannelConfig: return ChannelConfig( - name = channel_config_py.name, - description = channel_config_py.description or None, - unit = channel_config_py.unit or None, - data_type = ChannelDataType._from_rust_type(channel_config_py.data_type), - bit_field_elements = [ChannelBitFieldElement._from_rust_type(bfe) for bfe in channel_config_py.bit_field_elements], - enum_types = {enum.name: enum.key for enum in channel_config_py.enum_types}, + name=channel_config_py.name, + description=channel_config_py.description or None, + unit=channel_config_py.unit or None, + data_type=ChannelDataType._from_rust_type(channel_config_py.data_type), + bit_field_elements=[ + ChannelBitFieldElement._from_rust_type(bfe) + for bfe in channel_config_py.bit_field_elements + ], + enum_types={enum.name: enum.key for enum in channel_config_py.enum_types}, ) def _to_config_proto(self) -> ChannelConfigProto: @@ -230,7 +234,9 @@ class FlowConfig(BaseType[FlowConfigProto, "FlowConfig"]): run_id: str | None = None @classmethod - def _from_proto(cls, proto: FlowConfigProto, sift_client: SiftClient | None = None) -> FlowConfig: + def _from_proto( + cls, proto: FlowConfigProto, sift_client: SiftClient | None = None + ) -> FlowConfig: return cls( proto=proto, name=proto.name, @@ -241,8 +247,10 @@ def _from_proto(cls, proto: FlowConfigProto, sift_client: SiftClient | None = No @classmethod def _from_rust_config(cls, flow_config_py: FlowConfigPy) -> FlowConfig: return FlowConfig( - name = flow_config_py.name, - channels = [ChannelConfig._from_rust_config(channel) for channel in flow_config_py.channels] + name=flow_config_py.name, + channels=[ + ChannelConfig._from_rust_config(channel) for channel in flow_config_py.channels + ], ) def _to_proto(self) -> FlowConfigProto: @@ -292,12 +300,14 @@ def as_flow(self, *, timestamp: datetime | None, values: dict[str, Any]) -> Flow missing_values = values.keys() - found_values.keys() if missing_values: - raise ValueError(f"Provided channel values which do not exist in the flow config: {missing_values}") + raise ValueError( + f"Provided channel values which do not exist in the flow config: {missing_values}" + ) return Flow( - flow = self.name, - timestamp = timestamp or datetime.now(timezone.utc), - channel_values = channel_values + flow=self.name, + timestamp=timestamp or datetime.now(timezone.utc), + channel_values=channel_values, ) @@ -324,13 +334,13 @@ def _from_proto( ) -> IngestionConfig: return cls( proto=proto, - ingestion_config_id = proto.ingestion_config_id, - flow = proto.flow, - timestamp = proto.timestamp.ToDatetime(tzinfo=timezone.utc), - channel_values = proto.channel_values, - run_id = proto.run_id, - end_stream_on_validation_error = proto.end_stream_on_validation_error, - organization_id = proto.organization_id, + ingestion_config_id=proto.ingestion_config_id, + flow=proto.flow, + timestamp=proto.timestamp.ToDatetime(tzinfo=timezone.utc), + channel_values=proto.channel_values, + run_id=proto.run_id, + end_stream_on_validation_error=proto.end_stream_on_validation_error, + organization_id=proto.organization_id, ) def _to_rust_form(self) -> FlowPy: @@ -339,9 +349,9 @@ def _to_rust_form(self) -> FlowPy: from sift_client._internal.low_level_wrappers.ingestion import to_rust_py_timestamp return FlowPy( - flow_name = self.flow, - timestamp = to_rust_py_timestamp(self.timestamp), - values = [channel_value.to_rust_form() for channel_value in self.channel_values] + flow_name=self.flow, + timestamp=to_rust_py_timestamp(self.timestamp), + values=[channel_value.to_rust_form() for channel_value in self.channel_values], ) @@ -384,8 +394,8 @@ def to_rust_form(self): raise ValueError(f"Invalid data type: {self.ty}") return ChannelValuePy( - name = self.name, - value = value_py, + name=self.name, + value=value_py, ) diff --git a/python/lib/sift_client/sift_types/run.py b/python/lib/sift_client/sift_types/run.py index 6d3fc911e..5668fa9fc 100644 --- a/python/lib/sift_client/sift_types/run.py +++ b/python/lib/sift_client/sift_types/run.py @@ -172,7 +172,6 @@ class RunCreate(RunBase, ModelCreate[CreateRunRequestProto]): def _get_proto_class(self) -> type[CreateRunRequestProto]: return CreateRunRequestProto - def _to_rust_form(self) -> RunFormPy: from sift_stream_bindings import MetadataPy, MetadataValuePy, RunFormPy From beba8c3b696243d99421f34a2422680268d34fd4 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Sat, 8 Nov 2025 10:16:50 -0800 Subject: [PATCH 15/47] add tests and cleanup --- .../_internal/low_level_wrappers/ingestion.py | 76 ------------------- .../_tests/resources/test_ingestion.py | 53 +++++++++++++ .../_tests/sift_types/test_ingestion.py | 70 ++++++++++++++++- python/lib/sift_client/resources/ingestion.py | 49 ++---------- .../lib/sift_client/sift_types/ingestion.py | 10 +-- 5 files changed, 133 insertions(+), 125 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index 3d7c63ea6..cc18ac7d2 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -147,82 +147,6 @@ def _hash_flows(self, asset_name: str, flows: list[FlowConfig]) -> str: return m.hexdigest() - async def create_ingestion_config( - self, - *, - asset_name: str, - flows: list[FlowConfig], - client_key: str | None = None, - ) -> str: - """Create an ingestion config. - - Args: - asset_name: The name of the asset to ingest to. - flows: The flows to ingest. - client_key: The client key to use for ingestion. If not provided, a new one will be generated. - organization_id: The organization id to use for ingestion. Only needed if the user is part of several organizations. - - Returns: - The id of the new or found ingestion config. - """ - from sift_stream_bindings import IngestionConfigFormPy - - self._ensure_sift_stream_bindings() - - ingestion_config_id = None - if client_key: - logger.debug(f"Getting ingestion config id for client key {client_key}") - ingestion_config_id = await self.get_ingestion_config_id_from_client_key(client_key) - if ingestion_config_id: - # Perform validation that the flows are valid for the ingestion config. - existing_flows = await self.get_ingestion_config_flows(ingestion_config_id) - for flow in flows: - if flow.name in {existing_flow.name for existing_flow in existing_flows}: - raise ValueError( - f"Flow {flow.name} already exists for ingestion client {client_key}" - ) - else: - client_key = self._hash_flows(asset_name, flows) - try: - logger.debug(f"Getting ingestion config id from generated client key {client_key}") - ingestion_config_id = await self.get_ingestion_config_id_from_client_key(client_key) - except ValueError: - logger.debug( - f"No ingestion config found for client key {client_key}. Creating new one." - ) - pass - - data_queue, ingestion_config, thread = ( - self.stream_cache.get(ingestion_config_id, (None, None, None)) - if ingestion_config_id - else (None, None, None) - ) - if not (thread and thread.is_alive()): - ingestion_config = IngestionConfigFormPy( - asset_name=asset_name, - flows=[flow._to_rust_config() for flow in flows], - client_key=client_key, - ) - - cache_entry = self._new_ingestion_thread(ingestion_config_id or "", ingestion_config) - if not ingestion_config_id: - # No ingestion config ID exists for client key but stream builder in ingestion thread should create it. - await cache_entry.thread.await_stream_build() - ingestion_config_id = await self.get_ingestion_config_id_from_client_key(client_key) - assert ingestion_config_id is not None, ( - "No ingestion config id found after building new stream. Likely server error." - ) - logger.debug(f"Built new stream for ingestion config {ingestion_config_id}") - self.stream_cache[ingestion_config_id] = cache_entry - - for flow in flows: - flow.ingestion_config_id = ingestion_config_id - - if not ingestion_config_id: - raise ValueError("No ingestion config id found") - return ingestion_config_id - - class IngestionConfigStreamingLowLevelClient(LowLevelClientBase): _sift_stream_instance: SiftStreamPy _known_flows: dict[str, FlowConfig] diff --git a/python/lib/sift_client/_tests/resources/test_ingestion.py b/python/lib/sift_client/_tests/resources/test_ingestion.py index d64b91ea6..6f2fbd51a 100644 --- a/python/lib/sift_client/_tests/resources/test_ingestion.py +++ b/python/lib/sift_client/_tests/resources/test_ingestion.py @@ -442,3 +442,56 @@ async def test_resume_ingestion_after_wait(self, sift_client, test_run): flow.ingest(timestamp=timestamp2, channel_values={"test-channel": 2.0}) sift_client.async_.ingestion.wait_for_ingestion_to_complete(timeout=2) + + class TestIngestionConfigStreamingClient: + """Tests for IngestionConfigStreamingClient methods.""" + + @pytest.mark.asyncio + async def test_get_flow_config_retrieves_known_flow(self, sift_client, test_run): + """Test that get_flow_config retrieves a known flow configuration.""" + from sift_client.sift_types.ingestion import FlowConfig, IngestionConfigCreate + + flow_config = FlowConfig( + name="test-flow-config", + channels=[ + ChannelConfig(name="test-channel", data_type=ChannelDataType.DOUBLE), + ], + ) + + ingestion_config = IngestionConfigCreate( + asset_name=ASSET_NAME, + flows=[flow_config], + ) + + async with await sift_client.async_.ingestion.create_ingestion_config_streaming_client( + ingestion_config=ingestion_config, + run=test_run, + ) as client: + retrieved_flow = client.get_flow_config(flow_name="test-flow-config") + assert retrieved_flow.name == "test-flow-config" + assert len(retrieved_flow.channels) == 1 + assert retrieved_flow.channels[0].name == "test-channel" + + @pytest.mark.asyncio + async def test_get_flow_config_raises_on_unknown_flow(self, sift_client, test_run): + """Test that get_flow_config raises KeyError for unknown flow.""" + from sift_client.sift_types.ingestion import FlowConfig, IngestionConfigCreate + + flow_config = FlowConfig( + name="test-flow-config", + channels=[ + ChannelConfig(name="test-channel", data_type=ChannelDataType.DOUBLE), + ], + ) + + ingestion_config = IngestionConfigCreate( + asset_name=ASSET_NAME, + flows=[flow_config], + ) + + async with await sift_client.async_.ingestion.create_ingestion_config_streaming_client( + ingestion_config=ingestion_config, + run=test_run, + ) as client: + with pytest.raises(KeyError, match="FlowConfig unknown-flow is unknown"): + client.get_flow_config(flow_name="unknown-flow") diff --git a/python/lib/sift_client/_tests/sift_types/test_ingestion.py b/python/lib/sift_client/_tests/sift_types/test_ingestion.py index 6b29abafe..1990a77fa 100644 --- a/python/lib/sift_client/_tests/sift_types/test_ingestion.py +++ b/python/lib/sift_client/_tests/sift_types/test_ingestion.py @@ -6,7 +6,12 @@ import pytest from sift_client.sift_types.channel import ChannelBitFieldElement, ChannelDataType -from sift_client.sift_types.ingestion import ChannelConfig, Flow, IngestionConfig +from sift_client.sift_types.ingestion import ( + ChannelConfig, + Flow, + FlowConfig, + IngestionConfig, +) class TestChannelConfig: @@ -163,6 +168,69 @@ def test_ingest_raises_without_config_id(self, mock_client): flow.ingest(timestamp=timestamp, channel_values=channel_values) +class TestFlowConfig: + """Unit tests for FlowConfig model.""" + + def test_as_flow_creates_flow_with_values(self): + """Test that as_flow creates a Flow with correct channel values.""" + flow_config = FlowConfig( + name="test_flow", + channels=[ + ChannelConfig(name="channel1", data_type=ChannelDataType.DOUBLE), + ChannelConfig(name="channel2", data_type=ChannelDataType.INT_64), + ], + ) + + timestamp = datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc) + values = {"channel1": 42.5, "channel2": 100} + + flow = flow_config.as_flow(timestamp=timestamp, values=values) + + assert flow.flow == "test_flow" + assert flow.timestamp == timestamp + assert len(flow.channel_values) == 2 + assert flow.channel_values[0].name == "channel1" + assert flow.channel_values[0].value == 42.5 + assert flow.channel_values[1].name == "channel2" + assert flow.channel_values[1].value == 100 + + def test_as_flow_raises_on_unknown_channel(self): + """Test that as_flow raises ValueError for unknown channel values.""" + flow_config = FlowConfig( + name="test_flow", + channels=[ChannelConfig(name="channel1", data_type=ChannelDataType.DOUBLE)], + ) + + timestamp = datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc) + values = {"channel1": 42.5, "unknown_channel": 100} + + with pytest.raises( + ValueError, + match="Provided channel values which do not exist in the flow config", + ): + flow_config.as_flow(timestamp=timestamp, values=values) + + def test_as_flow_only_includes_provided_channels(self): + """Test that as_flow only includes channels with provided values.""" + flow_config = FlowConfig( + name="test_flow", + channels=[ + ChannelConfig(name="channel1", data_type=ChannelDataType.DOUBLE), + ChannelConfig(name="channel2", data_type=ChannelDataType.FLOAT), + ChannelConfig(name="channel3", data_type=ChannelDataType.INT_64), + ], + ) + + timestamp = datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc) + values = {"channel1": 42.5, "channel3": 100} + + flow = flow_config.as_flow(timestamp=timestamp, values=values) + + assert len(flow.channel_values) == 2 + assert flow.channel_values[0].name == "channel1" + assert flow.channel_values[1].name == "channel3" + + class TestIngestionConfig: """Unit tests for IngestionConfig model.""" diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index f1ad4fd5a..7ebe65ddb 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -266,46 +266,6 @@ async def create_ingestion_config_streaming_client( tracing_config=tracing_config, ) - async def create_ingestion_config( - self, - *, - asset_name: str, - run_id: str | None = None, - flows: list[FlowConfig], - client_key: str | None = None, - ) -> str: - """Create an ingestion config. This is provided for direct use of the ingestion config API, and not the preferred way to create ingestion configs for streaming through SiftClient. - - Args: - asset_name: The name of the asset for this ingestion config. - run_id: Optionally provide a run ID to create a run for the given asset. - flows: List of flow configurations. - client_key: Optional client key for identifying this config. - organization_id: The organization ID. - - Returns: - The ingestion config ID. - - Raises: - ValueError: If asset_name is not provided or flows is empty. - """ - if not asset_name: - raise ValueError("asset_name must be provided") - if not flows: - raise ValueError("flows must not be empty") - - ingestion_config_id = await self._low_level_client.create_ingestion_config( - asset_name=asset_name, - flows=flows, - client_key=client_key, - ) - for flow in flows: - flow._apply_client_to_instance(self.client) - if run_id: - flow.run_id = run_id - - return ingestion_config_id - class IngestionConfigStreamingClient(ResourceBase): """A client for streaming ingestion with an ingestion config. @@ -375,9 +335,9 @@ async def create( # Convert the ingestion_config variants to a IngestionConfigFormPy if isinstance(ingestion_config, IngestionConfig): # SiftStream will retrieve the existing config from the client_key - asset_name = sift_client.assets.get(asset_id=ingestion_config.asset_id) + asset = sift_client.assets.get(asset_id=ingestion_config.asset_id) ingestion_config_form = IngestionConfigFormPy( - asset_name=asset_name, + asset_name=asset.name, client_key=ingestion_config.client_key, flows=[], ) @@ -387,9 +347,10 @@ async def create( ingestion_config_form = ingestion_config # Convert the recovery strategy variants + recovery_strategy_py: RecoveryStrategyPy | None = None if isinstance(recovery_strategy, RecoveryStrategyConfig): recovery_strategy_py = recovery_strategy.to_rust_config() - else: + elif isinstance(recovery_strategy, RecoveryStrategyPy): recovery_strategy_py = recovery_strategy # Convert the run variants to a run or run_id @@ -520,6 +481,8 @@ async def attach_run(self, *, run: RunCreate | dict | str | Run | RunFormPy): run_form_py = run_create._to_rust_form() run_selector_py = RunSelectorPy.by_form(run_form_py) elif isinstance(run, Run): + if run.id_ is None: + raise ValueError("The Run object must contain a run_id") run_selector_py = RunSelectorPy.by_id(run.id_) elif isinstance(run, RunCreate): run_form_py = run._to_rust_form() diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index 0bd584ef4..0edc20655 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -69,7 +69,7 @@ class IngestionConfigCreate(ModelCreate[CreateIngestionConfigRequestProto]): """Create model for IngestionConfig.""" asset_name: str - flows: list[FlowConfig] = None + flows: list[FlowConfig] | None = None organization_id: str | None = None client_key: str | None = None @@ -86,7 +86,7 @@ def _to_rust_form(self) -> IngestionConfigFormPy: return IngestionConfigFormPy( asset_name=self.asset_name, - flows=[flow_config._to_rust_config() for flow_config in self.flows], + flows=[flow_config._to_rust_config() for flow_config in self.flows] if self.flows else [], client_key=self.client_key or self.asset_name, # Default to using asset_name as the client_key ) @@ -280,7 +280,7 @@ def add_channel(self, channel: ChannelConfig): raise ValueError("Cannot add a channel to a flow after creation") self.channels.append(channel) - def as_flow(self, *, timestamp: datetime | None, values: dict[str, Any]) -> Flow: + def as_flow(self, *, timestamp: datetime | None = None, values: dict[str, Any]) -> Flow: """Create a Flow from this FlowConfig with the provided values. Args: @@ -291,7 +291,7 @@ def as_flow(self, *, timestamp: datetime | None, values: dict[str, Any]) -> Flow Returns: A Flow object with channel values created from the provided values dictionary. """ - found_values = {} + found_values: dict[str, None] = {} channel_values = [] for channel in self.channels: if channel.name in values: @@ -331,7 +331,7 @@ class Flow(BaseType[IngestWithConfigDataStreamRequestProto, "Flow"]): @classmethod def _from_proto( cls, proto: IngestWithConfigDataStreamRequestProto, sift_client: SiftClient | None = None - ) -> IngestionConfig: + ) -> Flow: return cls( proto=proto, ingestion_config_id=proto.ingestion_config_id, From 0cb7e4aa704f95ce063a8950b202edd7db3bad64 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Sat, 8 Nov 2025 10:22:06 -0800 Subject: [PATCH 16/47] update pyproject --- python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 5ca088b3d..cc1047c39 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -71,7 +71,7 @@ docs = ["mkdocs", openssl = ["pyOpenSSL<24.0.0", "types-pyOpenSSL<24.0.0", "cffi~=1.14"] tdms = ["npTDMS~=1.9"] rosbags = ["rosbags~=0.0"] -sift-stream = ["sift-stream-bindings>=0.1.2"] +sift-stream = ["sift-stream-bindings>=0.2.0-rc"] hdf5 = ["h5py~=3.11", "polars~=1.8"] # Ensure any new user build extras are added to .github/workflows/python_build.yaml From eb74a04ac349ff6490e27d6643b04b0b2c446631 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Sat, 8 Nov 2025 10:25:23 -0800 Subject: [PATCH 17/47] type fix --- python/lib/sift_client/resources/ingestion.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 7ebe65ddb..68a069bad 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -321,6 +321,7 @@ async def create( DurationPy, IngestionConfigFormPy, MetadataValuePy, + RecoveryStrategyPy, RunFormPy, ) From 329872d9b825be97d5c50bc2fb340b059017583a Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 13:08:46 -0800 Subject: [PATCH 18/47] change remaining "to_rust..." functions to private --- .../sift_client/_internal/low_level_wrappers/ingestion.py | 2 +- python/lib/sift_client/resources/ingestion.py | 4 ++-- python/lib/sift_client/sift_types/ingestion.py | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index cc18ac7d2..85428be0f 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -54,7 +54,7 @@ from sift_client.resources.ingestion import TracingConfig -def to_rust_py_timestamp(time: datetime) -> TimeValuePy: +def _to_rust_py_timestamp(time: datetime) -> TimeValuePy: """Convert a Python datetime to a Rust TimeValuePy. Args: diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 68a069bad..e71653251 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -148,7 +148,7 @@ def __init__(self, recovery_strategy_py: RecoveryStrategyPy | None): self._recovery_strategy_py = recovery_strategy_py or RecoveryStrategyPy.default() - def to_rust_config(self) -> RecoveryStrategyPy: + def _to_rust_config(self) -> RecoveryStrategyPy: """Convert to RecoveryStrategyPy for use with the ingestion client. Returns: @@ -350,7 +350,7 @@ async def create( # Convert the recovery strategy variants recovery_strategy_py: RecoveryStrategyPy | None = None if isinstance(recovery_strategy, RecoveryStrategyConfig): - recovery_strategy_py = recovery_strategy.to_rust_config() + recovery_strategy_py = recovery_strategy._to_rust_config() elif isinstance(recovery_strategy, RecoveryStrategyPy): recovery_strategy_py = recovery_strategy diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index 0edc20655..d100184a2 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -346,12 +346,12 @@ def _from_proto( def _to_rust_form(self) -> FlowPy: from sift_stream_bindings import FlowPy - from sift_client._internal.low_level_wrappers.ingestion import to_rust_py_timestamp + from sift_client._internal.low_level_wrappers.ingestion import _to_rust_py_timestamp return FlowPy( flow_name=self.flow, - timestamp=to_rust_py_timestamp(self.timestamp), - values=[channel_value.to_rust_form() for channel_value in self.channel_values], + timestamp=_to_rust_py_timestamp(self.timestamp), + values=[channel_value._to_rust_form() for channel_value in self.channel_values], ) @@ -366,7 +366,7 @@ class ChannelValue(BaseModel): ty: ChannelDataType value: Any - def to_rust_form(self): + def _to_rust_form(self): """Convert this ChannelValue to its Rust form for ingestion.""" from sift_stream_bindings import ChannelValuePy, ValuePy From b96580f7561539c4b07fb836678fa94381b38d17 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 13:27:16 -0800 Subject: [PATCH 19/47] Add comments and clean up imports --- .../_internal/low_level_wrappers/ingestion.py | 3 +++ python/lib/sift_client/resources/ingestion.py | 10 ++++++---- python/lib/sift_client/sift_types/ingestion.py | 8 ++++++++ python/lib/sift_client/sift_types/run.py | 1 + 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index 85428be0f..6024b698d 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -63,6 +63,7 @@ def _to_rust_py_timestamp(time: datetime) -> TimeValuePy: Returns: A TimeValuePy representation """ + # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import TimeValuePy ts = time.timestamp() @@ -171,6 +172,7 @@ async def create_sift_stream_instance( enable_tls: bool = True, tracing_config: TracingConfig | None = None, ) -> IngestionConfigStreamingLowLevelClient: + # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import ( SiftStreamBuilderPy, init_tracing, @@ -186,6 +188,7 @@ async def create_sift_stream_instance( if tracing_config.log_dir is not None: # Use file logging + # If no max_log_files provided, default to 7 (1 week of logs) init_tracing_with_file( tracing_config.level, tracing_config.log_dir, diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index e71653251..00ec4f579 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -144,6 +144,7 @@ def __init__(self, recovery_strategy_py: RecoveryStrategyPy | None): Most users should use the factory methods (`retry_only()` or `retry_with_backups()`) instead of calling this constructor directly. """ + # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import RecoveryStrategyPy self._recovery_strategy_py = recovery_strategy_py or RecoveryStrategyPy.default() @@ -168,6 +169,7 @@ def retry_only(cls, retry_policy: RetryPolicyPy | None = None) -> RecoveryStrate Returns: A RecoveryStrategyConfig configured for retry-only strategy. """ + # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import RecoveryStrategyPy, RetryPolicyPy retry_policy_py = retry_policy or RetryPolicyPy.default() @@ -193,6 +195,7 @@ def retry_with_backups( Returns: A RecoveryStrategyConfig configured for retry with disk backups. """ + # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import DiskBackupPolicyPy, RecoveryStrategyPy, RetryPolicyPy retry_policy_py = retry_policy or RetryPolicyPy.default() @@ -317,9 +320,11 @@ async def create( Returns: An initialized IngestionConfigStreamingClient. """ + # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import ( DurationPy, IngestionConfigFormPy, + MetadataPy, MetadataValuePy, RecoveryStrategyPy, RunFormPy, @@ -377,8 +382,6 @@ async def create( # Convert asset_metadata dict to list of MetadataPy asset_metadata_list: list[MetadataPy] | None = None if asset_metadata is not None: - from sift_stream_bindings import MetadataPy - asset_metadata_list = [ MetadataPy(key=key, value=MetadataValuePy(value)) for key, value in asset_metadata.items() @@ -387,8 +390,6 @@ async def create( # Convert checkpoint_interval_seconds to DurationPy checkpoint_interval: DurationPy | None = None if checkpoint_interval_seconds is not None: - from sift_stream_bindings import DurationPy - checkpoint_interval = DurationPy(secs=checkpoint_interval_seconds, nanos=0) low_level_client = await IngestionConfigStreamingLowLevelClient.create_sift_stream_instance( @@ -473,6 +474,7 @@ async def attach_run(self, *, run: RunCreate | dict | str | Run | RunFormPy): Args: run: The run to attach. Can be a Run, RunCreate, dict, run ID string, or RunFormPy. """ + # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import RunFormPy, RunSelectorPy if isinstance(run, RunFormPy): diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index d100184a2..fee68a905 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -77,6 +77,7 @@ def _get_proto_class(self) -> type[CreateIngestionConfigRequestProto]: return CreateIngestionConfigRequestProto def _to_rust_form(self) -> IngestionConfigFormPy: + # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import IngestionConfigFormPy if self.organization_id: @@ -260,6 +261,7 @@ def _to_proto(self) -> FlowConfigProto: ) def _to_rust_config(self) -> FlowConfigPy: + # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import FlowConfigPy return FlowConfigPy( @@ -344,6 +346,7 @@ def _from_proto( ) def _to_rust_form(self) -> FlowPy: + # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import FlowPy from sift_client._internal.low_level_wrappers.ingestion import _to_rust_py_timestamp @@ -368,6 +371,7 @@ class ChannelValue(BaseModel): def _to_rust_form(self): """Convert this ChannelValue to its Rust form for ingestion.""" + # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import ChannelValuePy, ValuePy if self.ty == ChannelDataType.BIT_FIELD: @@ -401,6 +405,7 @@ def _to_rust_form(self): # Converter functions. def _channel_config_to_rust_config(channel: ChannelConfig) -> ChannelConfigPy: + # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import ( ChannelBitFieldElementPy, ChannelConfigPy, @@ -441,6 +446,7 @@ def _rust_channel_value_from_bitfield( Returns: A ChannelValuePy object. """ + # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import IngestWithConfigDataChannelValuePy assert channel.bit_field_elements is not None @@ -467,6 +473,7 @@ def _rust_channel_value_from_bitfield( def _to_rust_value(channel: ChannelConfig, value: Any) -> IngestWithConfigDataChannelValuePy: + # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import IngestWithConfigDataChannelValuePy if value is None: @@ -507,6 +514,7 @@ def _to_rust_value(channel: ChannelConfig, value: Any) -> IngestWithConfigDataCh def _to_rust_type(data_type: ChannelDataType) -> ChannelDataTypePy: + # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import ChannelDataTypePy if data_type == ChannelDataType.DOUBLE: diff --git a/python/lib/sift_client/sift_types/run.py b/python/lib/sift_client/sift_types/run.py index 5668fa9fc..e11451dca 100644 --- a/python/lib/sift_client/sift_types/run.py +++ b/python/lib/sift_client/sift_types/run.py @@ -173,6 +173,7 @@ def _get_proto_class(self) -> type[CreateRunRequestProto]: return CreateRunRequestProto def _to_rust_form(self) -> RunFormPy: + # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import MetadataPy, MetadataValuePy, RunFormPy if self.client_key: From 05052a7c824e33283811860524e9e1c3f3ba9d7e Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 13:28:26 -0800 Subject: [PATCH 20/47] reorder add_new_flows logic --- .../lib/sift_client/_internal/low_level_wrappers/ingestion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index 6024b698d..f703ba020 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -228,13 +228,13 @@ async def send_requests(self, requests: list[IngestWithConfigDataStreamRequestPy await self._sift_stream_instance.send_requests(requests) async def add_new_flows(self, flow_configs: list[FlowConfigPy]): + await self._sift_stream_instance.add_new_flows(flow_configs) self._known_flows.update( { flow_config.name: FlowConfig._from_rust_config(flow_config) for flow_config in flow_configs } ) - await self._sift_stream_instance.add_new_flows(flow_configs) async def attach_run(self, run_selector: RunSelectorPy): await self._sift_stream_instance.attach_run(run_selector) From 0aedc250882d22797343401da78b5ba7b7c17a8c Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 13:34:12 -0800 Subject: [PATCH 21/47] Make IngestionConfigStreamingClient private --- python/lib/sift_client/resources/ingestion.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 00ec4f579..35ba7f088 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -238,7 +238,7 @@ async def create_ingestion_config_streaming_client( checkpoint_interval_seconds: int | None = None, enable_tls: bool = True, tracing_config: TracingConfig | None = None, - ) -> IngestionConfigStreamingClient: + ) -> _IngestionConfigStreamingClient: """Create an IngestionConfigStreamingClient. Args: @@ -257,7 +257,7 @@ async def create_ingestion_config_streaming_client( Returns: An initialized IngestionConfigStreamingClient. """ - return await IngestionConfigStreamingClient.create( + return await _IngestionConfigStreamingClient.create( self.client, ingestion_config=ingestion_config, run=run, @@ -270,7 +270,7 @@ async def create_ingestion_config_streaming_client( ) -class IngestionConfigStreamingClient(ResourceBase): +class _IngestionConfigStreamingClient(ResourceBase): """A client for streaming ingestion with an ingestion config. This client provides a high-level interface for streaming data to Sift using @@ -300,7 +300,7 @@ async def create( checkpoint_interval_seconds: int | None = None, enable_tls: bool = True, tracing_config: TracingConfig | None = None, - ) -> IngestionConfigStreamingClient: + ) -> _IngestionConfigStreamingClient: """Create an IngestionConfigStreamingClient. Args: From 8d2f1ae56ff847e5f7d1b440d65b04d9b324da41 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 13:47:18 -0800 Subject: [PATCH 22/47] hash client key by default --- .../_internal/low_level_wrappers/ingestion.py | 55 ++++++++++--------- .../lib/sift_client/sift_types/ingestion.py | 9 ++- 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index f703ba020..12a1db4b7 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -122,31 +122,7 @@ async def get_ingestion_config_id_from_client_key(self, client_key: str) -> str def _hash_flows(self, asset_name: str, flows: list[FlowConfig]) -> str: """Generate a client key that should be unique but deterministic for the given asset and flow configuration.""" - # TODO: Taken from sift_py/ingestion/config/telemetry.py. Confirm intent from Marc. - m = hashlib.sha256() - m.update(asset_name.encode()) - for flow in sorted(flows, key=lambda f: f.name): - m.update(flow.name.encode()) - # Do not sort channels in alphabetical order since order matters. - for channel in flow.channels: - m.update(channel.name.encode()) - # Use api_format for data type since that should be consistent between languages. - m.update(channel.data_type.hash_str(api_format=True).encode()) - m.update((channel.description or "").encode()) - m.update((channel.unit or "").encode()) - if channel.bit_field_elements: - for bfe in sorted(channel.bit_field_elements, key=lambda bfe: bfe.index): - m.update(bfe.name.encode()) - m.update(str(bfe.index).encode()) - m.update(str(bfe.bit_count).encode()) - if channel.enum_types: - for enum_name, enum_key in sorted( - channel.enum_types.items(), key=lambda it: it[1] - ): - m.update(str(enum_key).encode()) - m.update(enum_name.encode()) - - return m.hexdigest() + return _hash_flows(asset_name=asset_name, flows=flows) class IngestionConfigStreamingLowLevelClient(LowLevelClientBase): _sift_stream_instance: SiftStreamPy @@ -250,3 +226,32 @@ async def finish(self): def get_metrics_snapshot(self) -> SiftStreamMetricsSnapshotPy: return self._sift_stream_instance.get_metrics_snapshot() + + +def _hash_flows(asset_name: str, flows: list[FlowConfig]) -> str: + """Generate a client key that should be unique but deterministic for the given asset and flow configuration.""" + # TODO: Taken from sift_py/ingestion/config/telemetry.py. Confirm intent from Marc. + m = hashlib.sha256() + m.update(asset_name.encode()) + for flow in sorted(flows, key=lambda f: f.name): + m.update(flow.name.encode()) + # Do not sort channels in alphabetical order since order matters. + for channel in flow.channels: + m.update(channel.name.encode()) + # Use api_format for data type since that should be consistent between languages. + m.update(channel.data_type.hash_str(api_format=True).encode()) + m.update((channel.description or "").encode()) + m.update((channel.unit or "").encode()) + if channel.bit_field_elements: + for bfe in sorted(channel.bit_field_elements, key=lambda bfe: bfe.index): + m.update(bfe.name.encode()) + m.update(str(bfe.index).encode()) + m.update(str(bfe.bit_count).encode()) + if channel.enum_types: + for enum_name, enum_key in sorted( + channel.enum_types.items(), key=lambda it: it[1] + ): + m.update(str(enum_key).encode()) + m.update(enum_name.encode()) + + return m.hexdigest() diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index fee68a905..3da4857a1 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -24,6 +24,7 @@ IngestionConfig as IngestionConfigProto, ) +from sift_client._internal.low_level_wrappers.ingestion import _hash_flows from sift_client.sift_types._base import ( BaseType, ModelCreate, @@ -85,11 +86,15 @@ def _to_rust_form(self) -> IngestionConfigFormPy: "OrgId is ignored when passing an IngestionConfigCreate to the ingestion client" ) + if self.client_key: + client_key = self.client_key + else: + client_key = _hash_flows(self.asset_name, self.flows) + return IngestionConfigFormPy( asset_name=self.asset_name, flows=[flow_config._to_rust_config() for flow_config in self.flows] if self.flows else [], - client_key=self.client_key - or self.asset_name, # Default to using asset_name as the client_key + client_key=client_key ) From ba46637785de356960b13406e865e923fbd2b198 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 13:55:54 -0800 Subject: [PATCH 23/47] PR feedback --- python/lib/sift_client/sift_types/ingestion.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index 3da4857a1..d01c17cee 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -298,14 +298,17 @@ def as_flow(self, *, timestamp: datetime | None = None, values: dict[str, Any]) Returns: A Flow object with channel values created from the provided values dictionary. """ - found_values: dict[str, None] = {} + # Get current timestamp ASAP if not provided + timestamp = timestamp or datetime.now(timezone.utc) + + found_values = set[str]() channel_values = [] for channel in self.channels: if channel.name in values: channel_values.append(channel.as_channel_value(values[channel.name])) - found_values[channel.name] = None + found_values.add(channel.name) - missing_values = values.keys() - found_values.keys() + missing_values = values.keys() - found_values if missing_values: raise ValueError( f"Provided channel values which do not exist in the flow config: {missing_values}" @@ -313,7 +316,7 @@ def as_flow(self, *, timestamp: datetime | None = None, values: dict[str, Any]) return Flow( flow=self.name, - timestamp=timestamp or datetime.now(timezone.utc), + timestamp=timestamp, channel_values=channel_values, ) From 23b2f2f9c8a0fabdf6285603f08b9c9721b569c6 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 14:12:26 -0800 Subject: [PATCH 24/47] Move log file constants to class --- .../sift_client/_internal/low_level_wrappers/ingestion.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index 12a1db4b7..5f52c70c0 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -125,6 +125,8 @@ def _hash_flows(self, asset_name: str, flows: list[FlowConfig]) -> str: return _hash_flows(asset_name=asset_name, flows=flows) class IngestionConfigStreamingLowLevelClient(LowLevelClientBase): + DEFAULT_MAX_LOG_FILES = 7 # Equal to 1 week of logs + DEFAULT_LOGFILE_PREFIX = "sift_stream_bindings.log" _sift_stream_instance: SiftStreamPy _known_flows: dict[str, FlowConfig] @@ -164,12 +166,11 @@ async def create_sift_stream_instance( if tracing_config.log_dir is not None: # Use file logging - # If no max_log_files provided, default to 7 (1 week of logs) init_tracing_with_file( tracing_config.level, tracing_config.log_dir, - tracing_config.filename_prefix or "sift_stream_bindings.log", - tracing_config.max_log_files or 7, + tracing_config.filename_prefix or cls.DEFAULT_LOGFILE_PREFIX, + tracing_config.max_log_files or cls.DEFAULT_MAX_LOG_FILES, ) else: # Use stdout/stderr only From 716f1797ec6637255565e0097a87c8c84f1e0ad4 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 14:21:07 -0800 Subject: [PATCH 25/47] fix circular dependancy issue --- python/lib/sift_client/sift_types/ingestion.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index d01c17cee..76f76fc9e 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -24,7 +24,6 @@ IngestionConfig as IngestionConfigProto, ) -from sift_client._internal.low_level_wrappers.ingestion import _hash_flows from sift_client.sift_types._base import ( BaseType, ModelCreate, @@ -81,6 +80,9 @@ def _to_rust_form(self) -> IngestionConfigFormPy: # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import IngestionConfigFormPy + # Imported here to avoid circular dependancy + from sift_client._internal.low_level_wrappers.ingestion import _hash_flows + if self.organization_id: logger.warning( "OrgId is ignored when passing an IngestionConfigCreate to the ingestion client" From dcf89f820aad94094e6db01a1971ef34f0dc8e5e Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 16:03:14 -0800 Subject: [PATCH 26/47] ruff --- .../sift_client/_internal/low_level_wrappers/ingestion.py | 7 +++---- python/lib/sift_client/sift_types/ingestion.py | 6 ++++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index 5f52c70c0..63ade94cd 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -124,8 +124,9 @@ def _hash_flows(self, asset_name: str, flows: list[FlowConfig]) -> str: """Generate a client key that should be unique but deterministic for the given asset and flow configuration.""" return _hash_flows(asset_name=asset_name, flows=flows) + class IngestionConfigStreamingLowLevelClient(LowLevelClientBase): - DEFAULT_MAX_LOG_FILES = 7 # Equal to 1 week of logs + DEFAULT_MAX_LOG_FILES = 7 # Equal to 1 week of logs DEFAULT_LOGFILE_PREFIX = "sift_stream_bindings.log" _sift_stream_instance: SiftStreamPy _known_flows: dict[str, FlowConfig] @@ -249,9 +250,7 @@ def _hash_flows(asset_name: str, flows: list[FlowConfig]) -> str: m.update(str(bfe.index).encode()) m.update(str(bfe.bit_count).encode()) if channel.enum_types: - for enum_name, enum_key in sorted( - channel.enum_types.items(), key=lambda it: it[1] - ): + for enum_name, enum_key in sorted(channel.enum_types.items(), key=lambda it: it[1]): m.update(str(enum_key).encode()) m.update(enum_name.encode()) diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index 76f76fc9e..685d20198 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -95,8 +95,10 @@ def _to_rust_form(self) -> IngestionConfigFormPy: return IngestionConfigFormPy( asset_name=self.asset_name, - flows=[flow_config._to_rust_config() for flow_config in self.flows] if self.flows else [], - client_key=client_key + flows=[flow_config._to_rust_config() for flow_config in self.flows] + if self.flows + else [], + client_key=client_key, ) From 4d9e5e46135fd091614442b232a52ea0e8133f78 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 16:13:59 -0800 Subject: [PATCH 27/47] Update defaults --- .../_internal/low_level_wrappers/ingestion.py | 2 +- python/lib/sift_client/resources/ingestion.py | 15 ++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index 63ade94cd..6151501cc 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -163,7 +163,7 @@ async def create_sift_stream_instance( if not is_tracing_initialized(): if tracing_config is None: - tracing_config = TracingConfig.console_only() + tracing_config = TracingConfig.with_file() if tracing_config.log_dir is not None: # Use file logging diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 35ba7f088..814d994d7 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -138,7 +138,7 @@ def __init__(self, recovery_strategy_py: RecoveryStrategyPy | None): Args: recovery_strategy_py: The underlying RecoveryStrategyPy instance. - If None, uses the default retry-only strategy. + If None, uses the default retry_with_backups strategy. Note: Most users should use the factory methods (`retry_only()` or `retry_with_backups()`) @@ -147,7 +147,12 @@ def __init__(self, recovery_strategy_py: RecoveryStrategyPy | None): # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import RecoveryStrategyPy - self._recovery_strategy_py = recovery_strategy_py or RecoveryStrategyPy.default() + # Default to retry_with_backups() + # This is intentionally different from SiftStream, which defaults to retry_only + self._recovery_strategy_py = recovery_strategy_py or RecoveryStrategyPy.retry_with_backups( + retry_policy=RetryPolicyPy.default(), + disk_backup_policy=DiskBackupPolicyPy.default() + ) def _to_rust_config(self) -> RecoveryStrategyPy: """Convert to RecoveryStrategyPy for use with the ingestion client. @@ -312,10 +317,10 @@ async def create( recovery_strategy: The recovery strategy to use for ingestion. checkpoint_interval_seconds: The checkpoint interval in seconds. enable_tls: Whether to enable TLS for the connection. - tracing_config: Configuration for SiftStream tracing. Use TracingConfig.stdout_only() - to enable tracing to stdout only, or TracingConfig.stdout_with_file() to enable + tracing_config: Configuration for SiftStream tracing. Use TracingConfig.console_only() + to enable tracing to stdout only, or TracingConfig.with_file() to enable tracing to both stdout and rolling log files. Defaults to None (tracing will be - initialized with default settings if not already initialized). + initialized with default settings for TracingConfig.with_file()). Returns: An initialized IngestionConfigStreamingClient. From ee6aa772fbcb48ffa8784adc2ebbe2e0828a6b7d Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 16:28:15 -0800 Subject: [PATCH 28/47] ruff and mypy cleanup --- python/lib/sift_client/resources/ingestion.py | 5 ++- .../lib/sift_client/sift_types/ingestion.py | 31 ++----------------- 2 files changed, 4 insertions(+), 32 deletions(-) diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 814d994d7..8f6d5329a 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -145,13 +145,12 @@ def __init__(self, recovery_strategy_py: RecoveryStrategyPy | None): instead of calling this constructor directly. """ # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users - from sift_stream_bindings import RecoveryStrategyPy + from sift_stream_bindings import DiskBackupPolicyPy, RecoveryStrategyPy, RetryPolicyPy # Default to retry_with_backups() # This is intentionally different from SiftStream, which defaults to retry_only self._recovery_strategy_py = recovery_strategy_py or RecoveryStrategyPy.retry_with_backups( - retry_policy=RetryPolicyPy.default(), - disk_backup_policy=DiskBackupPolicyPy.default() + retry_policy=RetryPolicyPy.default(), disk_backup_policy=DiskBackupPolicyPy.default() ) def _to_rust_config(self) -> RecoveryStrategyPy: diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index 685d20198..0a73708c0 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -5,12 +5,7 @@ from datetime import datetime, timezone from typing import TYPE_CHECKING, Any -from google.protobuf.empty_pb2 import Empty from pydantic import BaseModel, ConfigDict, Field, model_validator -from sift.ingest.v1.ingest_pb2 import IngestWithConfigDataChannelValue -from sift.ingest.v1.ingest_pb2 import ( - IngestWithConfigDataStreamRequest as IngestWithConfigDataStreamRequestProto, -) from sift.ingestion_configs.v2.ingestion_configs_pb2 import ( ChannelConfig as ChannelConfigProto, ) @@ -91,7 +86,7 @@ def _to_rust_form(self) -> IngestionConfigFormPy: if self.client_key: client_key = self.client_key else: - client_key = _hash_flows(self.asset_name, self.flows) + client_key = _hash_flows(self.asset_name, self.flows or []) return IngestionConfigFormPy( asset_name=self.asset_name, @@ -325,7 +320,7 @@ def as_flow(self, *, timestamp: datetime | None = None, values: dict[str, Any]) ) -class Flow(BaseType[IngestWithConfigDataStreamRequestProto, "Flow"]): +class Flow(BaseModel): """Model representing a data flow for ingestion. A Flow represents a collection of channels that are ingested together. @@ -342,21 +337,6 @@ class Flow(BaseType[IngestWithConfigDataStreamRequestProto, "Flow"]): end_stream_on_validation_error: bool | None = None organization_id: str | None = None - @classmethod - def _from_proto( - cls, proto: IngestWithConfigDataStreamRequestProto, sift_client: SiftClient | None = None - ) -> Flow: - return cls( - proto=proto, - ingestion_config_id=proto.ingestion_config_id, - flow=proto.flow, - timestamp=proto.timestamp.ToDatetime(tzinfo=timezone.utc), - channel_values=proto.channel_values, - run_id=proto.run_id, - end_stream_on_validation_error=proto.end_stream_on_validation_error, - organization_id=proto.organization_id, - ) - def _to_rust_form(self) -> FlowPy: # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import FlowPy @@ -550,10 +530,3 @@ def _to_rust_type(data_type: ChannelDataType) -> ChannelDataTypePy: elif data_type == ChannelDataType.UINT_64: return ChannelDataTypePy.Uint64 raise ValueError(f"Unknown data type: {data_type}") - - -def _to_ingestion_value(data_type: ChannelDataType, value: Any) -> IngestWithConfigDataChannelValue: - if value is None: - return IngestWithConfigDataChannelValue(empty=Empty()) - ingestion_type_string = data_type.name.lower().replace("int_", "int") - return IngestWithConfigDataChannelValue(**{ingestion_type_string: value}) From 8020d4ae86a76fe3488a5df2aeeaa5baae35cb6a Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 16:36:14 -0800 Subject: [PATCH 29/47] mypy fix --- .../sift_client/_internal/low_level_wrappers/ingestion.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index 6151501cc..b72b5996a 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -152,11 +152,12 @@ async def create_sift_stream_instance( tracing_config: TracingConfig | None = None, ) -> IngestionConfigStreamingLowLevelClient: # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users + # TODO(nathan): Fix bindings to fix mypy issues with tracing functions from sift_stream_bindings import ( SiftStreamBuilderPy, - init_tracing, - init_tracing_with_file, - is_tracing_initialized, + init_tracing, # type: ignore[attr-defined] + init_tracing_with_file, # type: ignore[attr-defined] + is_tracing_initialized, # type: ignore[attr-defined] ) from sift_client.resources.ingestion import TracingConfig From 3bba01c2c340db4204f20c95e0009d5cf2e3437f Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 16:41:45 -0800 Subject: [PATCH 30/47] mypy fix --- python/pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/pyproject.toml b/python/pyproject.toml index cc1047c39..558bc5f54 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -149,6 +149,10 @@ module = "requests_toolbelt" ignore_missing_imports = true ignore_errors = true +[[tool.mypy.overrides]] +module = "sift_stream_bindings" +ignore_missing_imports = true + [tool.setuptools.packages.find] where = ["lib"] From 07db503d85257b31c345cdf7367d4a42e8a0bd97 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 16:48:17 -0800 Subject: [PATCH 31/47] more mypy fixes --- python/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyproject.toml b/python/pyproject.toml index 558bc5f54..1679043cf 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -152,6 +152,7 @@ ignore_errors = true [[tool.mypy.overrides]] module = "sift_stream_bindings" ignore_missing_imports = true +ignore_errors = true [tool.setuptools.packages.find] where = ["lib"] From 297b920800160d493e903238ea603df40c0149ab Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 16:52:32 -0800 Subject: [PATCH 32/47] mypy fix --- .../sift_client/_internal/low_level_wrappers/ingestion.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index b72b5996a..7fb481a35 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -155,10 +155,10 @@ async def create_sift_stream_instance( # TODO(nathan): Fix bindings to fix mypy issues with tracing functions from sift_stream_bindings import ( SiftStreamBuilderPy, - init_tracing, # type: ignore[attr-defined] - init_tracing_with_file, # type: ignore[attr-defined] - is_tracing_initialized, # type: ignore[attr-defined] - ) + init_tracing, + init_tracing_with_file, + is_tracing_initialized, + ) # type: ignore[attr-defined] from sift_client.resources.ingestion import TracingConfig From e4d7725a131371235f2360bdfe5a91c90e558dbc Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 17:02:07 -0800 Subject: [PATCH 33/47] mypy --- python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 1679043cf..9972cba15 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -150,7 +150,7 @@ ignore_missing_imports = true ignore_errors = true [[tool.mypy.overrides]] -module = "sift_stream_bindings" +module = "sift-stream-bindings" ignore_missing_imports = true ignore_errors = true From e64d03175b8382dddfc201aaba8103917337e358 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 17:06:04 -0800 Subject: [PATCH 34/47] mypy --- .../sift_client/_internal/low_level_wrappers/ingestion.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index 7fb481a35..487271346 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -153,11 +153,11 @@ async def create_sift_stream_instance( ) -> IngestionConfigStreamingLowLevelClient: # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users # TODO(nathan): Fix bindings to fix mypy issues with tracing functions - from sift_stream_bindings import ( + from sift_stream_bindings import ( # type: ignore[attr-defined] SiftStreamBuilderPy, - init_tracing, - init_tracing_with_file, - is_tracing_initialized, + init_tracing, # type: ignore[attr-defined] + init_tracing_with_file, # type: ignore[attr-defined] + is_tracing_initialized, # type: ignore[attr-defined] ) # type: ignore[attr-defined] from sift_client.resources.ingestion import TracingConfig From 30df5bfe49fad4fde27fa95d9dfa2ed0871c7bd2 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 17:07:41 -0800 Subject: [PATCH 35/47] ruff --- .../sift_client/_internal/low_level_wrappers/ingestion.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py index 487271346..0bf38cc37 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/ingestion.py @@ -153,11 +153,11 @@ async def create_sift_stream_instance( ) -> IngestionConfigStreamingLowLevelClient: # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users # TODO(nathan): Fix bindings to fix mypy issues with tracing functions - from sift_stream_bindings import ( # type: ignore[attr-defined] + from sift_stream_bindings import ( # type: ignore[attr-defined] SiftStreamBuilderPy, - init_tracing, # type: ignore[attr-defined] - init_tracing_with_file, # type: ignore[attr-defined] - is_tracing_initialized, # type: ignore[attr-defined] + init_tracing, # type: ignore[attr-defined] + init_tracing_with_file, # type: ignore[attr-defined] + is_tracing_initialized, # type: ignore[attr-defined] ) # type: ignore[attr-defined] from sift_client.resources.ingestion import TracingConfig From 414a7afa6ef7e6248f00876b2f5b88d873f6add2 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 17:15:30 -0800 Subject: [PATCH 36/47] update tests --- .../_tests/resources/test_ingestion.py | 497 ------------------ .../_tests/sift_types/test_ingestion.py | 95 ---- 2 files changed, 592 deletions(-) delete mode 100644 python/lib/sift_client/_tests/resources/test_ingestion.py diff --git a/python/lib/sift_client/_tests/resources/test_ingestion.py b/python/lib/sift_client/_tests/resources/test_ingestion.py deleted file mode 100644 index 6f2fbd51a..000000000 --- a/python/lib/sift_client/_tests/resources/test_ingestion.py +++ /dev/null @@ -1,497 +0,0 @@ -"""Pytest tests for the Ingestion API. - -These tests demonstrate and validate the usage of the Ingestion API including: -- Creating ingestion configurations -- Ingesting data with various channel types (double, enum, bit field) -- Flow management and validation -- High-speed and regular flow ingestion -- Error handling and edge cases -""" - -import math -import random -import time -from datetime import datetime, timedelta, timezone - -import pytest - -from sift_client import SiftClient -from sift_client.sift_types.channel import ChannelBitFieldElement, ChannelDataType -from sift_client.sift_types.ingestion import ChannelConfig, Flow - -pytestmark = pytest.mark.integration - -ASSET_NAME = "test-ingestion-asset" - - -def test_client_binding(sift_client): - assert getattr(sift_client, "ingestion", None) is None # Only async! - assert sift_client.async_.ingestion - - -@pytest.fixture -def test_run(sift_client: SiftClient): - """Create a test run for ingestion tests.""" - run = sift_client.runs.create( - { - "name": f"test-ingestion-run-{datetime.now(tz=timezone.utc).timestamp()}", - "description": "Test run for ingestion integration tests", - "tags": ["test", "ingestion", "pytest"], - } - ) - yield run - # Cleanup - sift_client.runs.archive(run=run) - - -class TestIngestionAPIAsync: - """Test suite for the async Ingestion API functionality.""" - - class TestCreateIngestionConfig: - """Tests for creating ingestion configurations.""" - - @pytest.mark.asyncio - async def test_create_basic_config(self, sift_client, test_run): - """Test creating a basic ingestion configuration.""" - flow = Flow( - name="test-basic-flow", - channels=[ - ChannelConfig(name="test-channel", data_type=ChannelDataType.DOUBLE), - ], - ) - - config_id = await sift_client.async_.ingestion.create_ingestion_config( - asset_name=ASSET_NAME, - run_id=test_run.id_, - flows=[flow], - ) - - assert config_id is not None - assert isinstance(config_id, str) - - @pytest.mark.asyncio - async def test_create_config_with_multiple_flows(self, sift_client, test_run): - """Test creating an ingestion configuration with multiple flows.""" - regular_flow = Flow( - name="test-regular-flow", - channels=[ - ChannelConfig(name="regular-channel", data_type=ChannelDataType.DOUBLE), - ], - ) - - highspeed_flow = Flow( - name="test-highspeed-flow", - channels=[ - ChannelConfig(name="highspeed-channel", data_type=ChannelDataType.DOUBLE), - ], - ) - - config_id = await sift_client.async_.ingestion.create_ingestion_config( - asset_name=ASSET_NAME, - run_id=test_run.id_, - flows=[regular_flow, highspeed_flow], - ) - - assert config_id is not None - - @pytest.mark.asyncio - async def test_create_config_with_enum_channel(self, sift_client, test_run): - """Test creating an ingestion configuration with enum channel.""" - flow = Flow( - name="test-enum-flow", - channels=[ - ChannelConfig( - name="test-enum-channel", - data_type=ChannelDataType.ENUM, - enum_types={"state1": 1, "state2": 2, "state3": 3}, - ), - ], - ) - - config_id = await sift_client.async_.ingestion.create_ingestion_config( - asset_name=ASSET_NAME, - run_id=test_run.id_, - flows=[flow], - ) - - assert config_id is not None - - @pytest.mark.asyncio - async def test_create_config_with_bit_field_channel(self, sift_client, test_run): - """Test creating an ingestion configuration with bit field channel.""" - flow = Flow( - name="test-bitfield-flow", - channels=[ - ChannelConfig( - name="test-bit-field-channel", - data_type=ChannelDataType.BIT_FIELD, - bit_field_elements=[ - ChannelBitFieldElement(name="voltage", index=0, bit_count=4), - ChannelBitFieldElement(name="current", index=4, bit_count=2), - ChannelBitFieldElement(name="status", index=6, bit_count=2), - ], - ), - ], - ) - - config_id = await sift_client.async_.ingestion.create_ingestion_config( - asset_name=ASSET_NAME, - run_id=test_run.id_, - flows=[flow], - ) - - assert config_id is not None - - @pytest.mark.asyncio - async def test_flow_sealed_after_config_creation(self, sift_client, test_run): - """Test that flows are sealed after ingestion config creation.""" - flow = Flow( - name="test-sealed-flow", - channels=[ - ChannelConfig(name="test-channel", data_type=ChannelDataType.DOUBLE), - ], - ) - - await sift_client.async_.ingestion.create_ingestion_config( - asset_name=ASSET_NAME, - run_id=test_run.id_, - flows=[flow], - ) - - # Try to add a channel after config creation - with pytest.raises(ValueError, match="Cannot add a channel to a flow after creation"): - flow.add_channel( - ChannelConfig(name="new-channel", data_type=ChannelDataType.DOUBLE) - ) - - class TestIngestData: - """Tests for ingesting data.""" - - @pytest.mark.asyncio - async def test_ingest_double_data(self, sift_client, test_run): - """Test ingesting double data.""" - flow = Flow( - name="test-double-flow", - channels=[ - ChannelConfig(name="double-channel", data_type=ChannelDataType.DOUBLE), - ], - ) - - await sift_client.async_.ingestion.create_ingestion_config( - asset_name=ASSET_NAME, - run_id=test_run.id_, - flows=[flow], - ) - - start_time = datetime.now(tz=timezone.utc) - for i in range(10): - timestamp = start_time + timedelta(seconds=i) - flow.ingest( - timestamp=timestamp, - channel_values={"double-channel": float(i)}, - ) - - sift_client.async_.ingestion.wait_for_ingestion_to_complete(timeout=2) - - @pytest.mark.asyncio - async def test_ingest_enum_data(self, sift_client, test_run): - """Test ingesting enum data.""" - flow = Flow( - name="test-enum-ingest-flow", - channels=[ - ChannelConfig( - name="enum-channel", - data_type=ChannelDataType.ENUM, - enum_types={"low": 1, "medium": 2, "high": 3}, - ), - ], - ) - - await sift_client.async_.ingestion.create_ingestion_config( - asset_name=ASSET_NAME, - run_id=test_run.id_, - flows=[flow], - ) - - start_time = datetime.now(tz=timezone.utc) - for i in range(10): - timestamp = start_time + timedelta(seconds=i) - flow.ingest( - timestamp=timestamp, - channel_values={"enum-channel": (i % 3) + 1}, - ) - - sift_client.async_.ingestion.wait_for_ingestion_to_complete(timeout=2) - - @pytest.mark.asyncio - async def test_ingest_bit_field_data_as_dict(self, sift_client, test_run): - """Test ingesting bit field data as dictionary.""" - flow = Flow( - name="test-bitfield-ingest-flow", - channels=[ - ChannelConfig( - name="bitfield-channel", - data_type=ChannelDataType.BIT_FIELD, - bit_field_elements=[ - ChannelBitFieldElement(name="voltage", index=0, bit_count=4), - ChannelBitFieldElement(name="current", index=4, bit_count=2), - ChannelBitFieldElement(name="led", index=6, bit_count=1), - ChannelBitFieldElement(name="heater", index=7, bit_count=1), - ], - ), - ], - ) - - await sift_client.async_.ingestion.create_ingestion_config( - asset_name=ASSET_NAME, - run_id=test_run.id_, - flows=[flow], - ) - - start_time = datetime.now(tz=timezone.utc) - for i in range(10): - timestamp = start_time + timedelta(seconds=i) - flow.ingest( - timestamp=timestamp, - channel_values={ - "bitfield-channel": { - "voltage": random.randint(3, 13), - "current": random.randint(1, 3), - "led": random.choice([0, 1]), - "heater": random.choice([0, 1]), - } - }, - ) - - sift_client.async_.ingestion.wait_for_ingestion_to_complete(timeout=2) - - @pytest.mark.asyncio - async def test_ingest_bit_field_data_as_bytes(self, sift_client, test_run): - """Test ingesting bit field data as bytes.""" - flow = Flow( - name="test-bitfield-bytes-flow", - channels=[ - ChannelConfig( - name="bitfield-channel", - data_type=ChannelDataType.BIT_FIELD, - bit_field_elements=[ - ChannelBitFieldElement(name="field1", index=0, bit_count=4), - ChannelBitFieldElement(name="field2", index=4, bit_count=4), - ], - ), - ], - ) - - await sift_client.async_.ingestion.create_ingestion_config( - asset_name=ASSET_NAME, - run_id=test_run.id_, - flows=[flow], - ) - - timestamp = datetime.now(tz=timezone.utc) - flow.ingest( - timestamp=timestamp, - channel_values={"bitfield-channel": bytes([0b11110000])}, - ) - - sift_client.async_.ingestion.wait_for_ingestion_to_complete(timeout=2) - - @pytest.mark.asyncio - async def test_ingest_multiple_channels(self, sift_client, test_run): - """Test ingesting data for multiple channels simultaneously.""" - flow = Flow( - name="test-multi-channel-flow", - channels=[ - ChannelConfig(name="channel1", data_type=ChannelDataType.DOUBLE), - ChannelConfig( - name="channel2", - data_type=ChannelDataType.ENUM, - enum_types={"a": 1, "b": 2}, - ), - ChannelConfig( - name="channel3", - data_type=ChannelDataType.BIT_FIELD, - bit_field_elements=[ - ChannelBitFieldElement(name="bit1", index=0, bit_count=4), - ChannelBitFieldElement(name="bit2", index=4, bit_count=4), - ], - ), - ], - ) - - await sift_client.async_.ingestion.create_ingestion_config( - asset_name=ASSET_NAME, - run_id=test_run.id_, - flows=[flow], - ) - - start_time = datetime.now(tz=timezone.utc) - for i in range(5): - timestamp = start_time + timedelta(seconds=i) - flow.ingest( - timestamp=timestamp, - channel_values={ - "channel1": float(i), - "channel2": (i % 2) + 1, - "channel3": {"bit1": i % 16, "bit2": (i * 2) % 16}, - }, - ) - - sift_client.async_.ingestion.wait_for_ingestion_to_complete(timeout=2) - - @pytest.mark.asyncio - async def test_ingest_highspeed_data(self, sift_client, test_run): - """Test ingesting high-speed data.""" - flow = Flow( - name="test-highspeed-data-flow", - channels=[ - ChannelConfig(name="highspeed-channel", data_type=ChannelDataType.DOUBLE), - ], - ) - - await sift_client.async_.ingestion.create_ingestion_config( - asset_name=ASSET_NAME, - run_id=test_run.id_, - flows=[flow], - ) - - start_time = datetime.now(tz=timezone.utc) - fake_hs_rate = 50 # Hz - fake_hs_period = 1 / fake_hs_rate - duration = 2 # seconds - - for i in range(duration): - for j in range(fake_hs_rate): - val = 3.0 * math.sin(2 * math.pi * fake_hs_rate * (i + j * 0.001)) - timestamp = start_time + timedelta( - seconds=i, milliseconds=j * fake_hs_period * 1000 - ) - flow.ingest( - timestamp=timestamp, - channel_values={"highspeed-channel": val}, - ) - time.sleep(0.01) - - sift_client.async_.ingestion.wait_for_ingestion_to_complete(timeout=2) - - class TestIngestionValidation: - """Tests for ingestion validation and error handling.""" - - @pytest.mark.asyncio - async def test_ingest_invalid_enum_value_raises_error(self, sift_client, test_run): - """Test that ingesting an invalid enum value raises an error.""" - flow = Flow( - name="test-enum-validation-flow", - channels=[ - ChannelConfig( - name="enum-channel", - data_type=ChannelDataType.ENUM, - enum_types={"valid1": 1, "valid2": 2}, - ), - ], - ) - - await sift_client.async_.ingestion.create_ingestion_config( - asset_name=ASSET_NAME, - run_id=test_run.id_, - flows=[flow], - ) - - timestamp = datetime.now(tz=timezone.utc) - # Test with invalid integer - with pytest.raises(ValueError, match="Could not find enum value"): - flow.ingest( - timestamp=timestamp, - channel_values={"enum-channel": 99}, - ) - - # Test with invalid string - with pytest.raises(ValueError, match="Could not find enum value"): - flow.ingest( - timestamp=timestamp, - channel_values={"enum-channel": "invalid-enum"}, - ) - - @pytest.mark.asyncio - async def test_resume_ingestion_after_wait(self, sift_client, test_run): - """Test that ingestion can resume after waiting for completion.""" - flow = Flow( - name="test-resume-flow", - channels=[ - ChannelConfig(name="test-channel", data_type=ChannelDataType.DOUBLE), - ], - ) - - await sift_client.async_.ingestion.create_ingestion_config( - asset_name=ASSET_NAME, - run_id=test_run.id_, - flows=[flow], - ) - - # First batch - timestamp1 = datetime.now(tz=timezone.utc) - flow.ingest(timestamp=timestamp1, channel_values={"test-channel": 1.0}) - - sift_client.async_.ingestion.wait_for_ingestion_to_complete(timeout=2) - - # Wait a bit - time.sleep(0.1) - - # Second batch after wait - timestamp2 = timestamp1 + timedelta(seconds=2) - flow.ingest(timestamp=timestamp2, channel_values={"test-channel": 2.0}) - - sift_client.async_.ingestion.wait_for_ingestion_to_complete(timeout=2) - - class TestIngestionConfigStreamingClient: - """Tests for IngestionConfigStreamingClient methods.""" - - @pytest.mark.asyncio - async def test_get_flow_config_retrieves_known_flow(self, sift_client, test_run): - """Test that get_flow_config retrieves a known flow configuration.""" - from sift_client.sift_types.ingestion import FlowConfig, IngestionConfigCreate - - flow_config = FlowConfig( - name="test-flow-config", - channels=[ - ChannelConfig(name="test-channel", data_type=ChannelDataType.DOUBLE), - ], - ) - - ingestion_config = IngestionConfigCreate( - asset_name=ASSET_NAME, - flows=[flow_config], - ) - - async with await sift_client.async_.ingestion.create_ingestion_config_streaming_client( - ingestion_config=ingestion_config, - run=test_run, - ) as client: - retrieved_flow = client.get_flow_config(flow_name="test-flow-config") - assert retrieved_flow.name == "test-flow-config" - assert len(retrieved_flow.channels) == 1 - assert retrieved_flow.channels[0].name == "test-channel" - - @pytest.mark.asyncio - async def test_get_flow_config_raises_on_unknown_flow(self, sift_client, test_run): - """Test that get_flow_config raises KeyError for unknown flow.""" - from sift_client.sift_types.ingestion import FlowConfig, IngestionConfigCreate - - flow_config = FlowConfig( - name="test-flow-config", - channels=[ - ChannelConfig(name="test-channel", data_type=ChannelDataType.DOUBLE), - ], - ) - - ingestion_config = IngestionConfigCreate( - asset_name=ASSET_NAME, - flows=[flow_config], - ) - - async with await sift_client.async_.ingestion.create_ingestion_config_streaming_client( - ingestion_config=ingestion_config, - run=test_run, - ) as client: - with pytest.raises(KeyError, match="FlowConfig unknown-flow is unknown"): - client.get_flow_config(flow_name="unknown-flow") diff --git a/python/lib/sift_client/_tests/sift_types/test_ingestion.py b/python/lib/sift_client/_tests/sift_types/test_ingestion.py index 1990a77fa..cd31221d6 100644 --- a/python/lib/sift_client/_tests/sift_types/test_ingestion.py +++ b/python/lib/sift_client/_tests/sift_types/test_ingestion.py @@ -8,7 +8,6 @@ from sift_client.sift_types.channel import ChannelBitFieldElement, ChannelDataType from sift_client.sift_types.ingestion import ( ChannelConfig, - Flow, FlowConfig, IngestionConfig, ) @@ -74,100 +73,6 @@ def test_other_data_types_dont_require_special_fields(self): assert channel.data_type == ChannelDataType.DOUBLE -@pytest.fixture -def mock_flow(mock_client): - """Create a mock Flow instance for testing.""" - flow = Flow( - proto=MagicMock(), - name="test_flow", - channels=[ - ChannelConfig( - name="channel1", - data_type=ChannelDataType.DOUBLE, - description="Test channel 1", - ), - ChannelConfig( - name="channel2", - data_type=ChannelDataType.FLOAT, - description="Test channel 2", - ), - ], - ingestion_config_id="test_config_id", - run_id="test_run_id", - ) - flow._apply_client_to_instance(mock_client) - return flow - - -class TestFlow: - """Unit tests for Flow model - tests methods.""" - - def test_add_channel_success(self): - """Test that add_channel() adds a channel when no ingestion_config_id is set.""" - flow = Flow( - name="test_flow", - channels=[], - ingestion_config_id=None, - ) - - channel = ChannelConfig( - name="new_channel", - data_type=ChannelDataType.DOUBLE, - ) - - # Should not raise - flow.add_channel(channel) - - assert len(flow.channels) == 1 - assert flow.channels[0].name == "new_channel" - - def test_add_channel_raises_after_creation(self): - """Test that add_channel() raises ValueError when ingestion_config_id is set.""" - flow = Flow( - name="test_flow", - channels=[], - ingestion_config_id="config123", - ) - - channel = ChannelConfig( - name="new_channel", - data_type=ChannelDataType.DOUBLE, - ) - - with pytest.raises(ValueError, match="Cannot add a channel to a flow after creation"): - flow.add_channel(channel) - - def test_ingest_calls_client(self, mock_flow, mock_client): - """Test that ingest() calls client.async_.ingestion.ingest with correct parameters.""" - timestamp = datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc) - channel_values = {"channel1": 42.5, "channel2": 100.0} - - # Call ingest - mock_flow.ingest(timestamp=timestamp, channel_values=channel_values) - - # Verify client method was called with correct parameters - mock_client.async_.ingestion.ingest.assert_called_once_with( - flow=mock_flow, - timestamp=timestamp, - channel_values=channel_values, - ) - - def test_ingest_raises_without_config_id(self, mock_client): - """Test that ingest() raises ValueError when ingestion_config_id is not set.""" - flow = Flow( - name="test_flow", - channels=[], - ingestion_config_id=None, - ) - flow._apply_client_to_instance(mock_client) - - timestamp = datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc) - channel_values = {"channel1": 42.5} - - with pytest.raises(ValueError, match="Ingestion config ID is not set"): - flow.ingest(timestamp=timestamp, channel_values=channel_values) - - class TestFlowConfig: """Unit tests for FlowConfig model.""" From 84ebe3715e0509dc4055b8ad52a35f900aadfe3b Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 17:25:36 -0800 Subject: [PATCH 37/47] pyright fixes --- python/lib/sift_client/resources/ingestion.py | 2 +- python/lib/sift_client/sift_types/ingestion.py | 6 +++--- python/pyproject.toml | 2 ++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 8f6d5329a..2ce5ec520 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -346,7 +346,7 @@ async def create( if isinstance(ingestion_config, IngestionConfig): # SiftStream will retrieve the existing config from the client_key asset = sift_client.assets.get(asset_id=ingestion_config.asset_id) - ingestion_config_form = IngestionConfigFormPy( + ingestion_config_form = IngestionConfigFormPy( # type: ignore[call-arg] asset_name=asset.name, client_key=ingestion_config.client_key, flows=[], diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index 0a73708c0..e61c38303 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -88,7 +88,7 @@ def _to_rust_form(self) -> IngestionConfigFormPy: else: client_key = _hash_flows(self.asset_name, self.flows or []) - return IngestionConfigFormPy( + return IngestionConfigFormPy( # type: ignore[call-arg] asset_name=self.asset_name, flows=[flow_config._to_rust_config() for flow_config in self.flows] if self.flows @@ -268,7 +268,7 @@ def _to_rust_config(self) -> FlowConfigPy: # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import FlowConfigPy - return FlowConfigPy( + return FlowConfigPy( # type: ignore[call-arg] name=self.name, channels=[_channel_config_to_rust_config(channel) for channel in self.channels], ) @@ -343,7 +343,7 @@ def _to_rust_form(self) -> FlowPy: from sift_client._internal.low_level_wrappers.ingestion import _to_rust_py_timestamp - return FlowPy( + return FlowPy( # type: ignore[call-arg] flow_name=self.flow, timestamp=_to_rust_py_timestamp(self.timestamp), values=[channel_value._to_rust_form() for channel_value in self.channel_values], diff --git a/python/pyproject.toml b/python/pyproject.toml index 9972cba15..b3176e567 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -87,6 +87,8 @@ python_version = "3.8" reportOptionalMemberAccess = "none" reportArgumentType = "none" reportAttributeAccessIssue = "none" +reportCallIssue = "warning" +reportMissingTypeStubs = "none" stubPath = "lib/sift_client/resources/sync_stubs" useLibraryCodeForTypes = true exclude = [ From 3ff24df8537b034fd89e8b6e50594a6504e929b1 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 17:30:33 -0800 Subject: [PATCH 38/47] test fix --- python/lib/sift_client/sift_types/ingestion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index e61c38303..aa537c1ae 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -300,7 +300,7 @@ def as_flow(self, *, timestamp: datetime | None = None, values: dict[str, Any]) # Get current timestamp ASAP if not provided timestamp = timestamp or datetime.now(timezone.utc) - found_values = set[str]() + found_values: set[str] = set() channel_values = [] for channel in self.channels: if channel.name in values: From 946fd7bcdeadcd4356b40d2f9e98b969e10a23fb Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 17:50:02 -0800 Subject: [PATCH 39/47] ci fix --- .github/workflows/python_ci.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python_ci.yaml b/.github/workflows/python_ci.yaml index 836e00437..ae0ec00a7 100644 --- a/.github/workflows/python_ci.yaml +++ b/.github/workflows/python_ci.yaml @@ -17,7 +17,9 @@ jobs: working-directory: python steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python uses: actions/setup-python@v2 From 5dec022909e1cbf919a55977445c201a7c737095 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 18:10:29 -0800 Subject: [PATCH 40/47] add basic example --- python/docs/examples/ingestion_example.py | 63 +++++++++++++++++++ python/lib/sift_client/resources/ingestion.py | 2 +- .../lib/sift_client/sift_types/ingestion.py | 6 +- python/pyproject.toml | 2 - 4 files changed, 67 insertions(+), 6 deletions(-) create mode 100644 python/docs/examples/ingestion_example.py diff --git a/python/docs/examples/ingestion_example.py b/python/docs/examples/ingestion_example.py new file mode 100644 index 000000000..d45ad8905 --- /dev/null +++ b/python/docs/examples/ingestion_example.py @@ -0,0 +1,63 @@ +import asyncio +import random +import time +from datetime import datetime, timezone + +from sift_client import SiftClient, SiftConnectionConfig +from sift_client.sift_types import ( + ChannelConfig, + ChannelDataType, + FlowConfig, + IngestionConfigCreate, + RunCreate, +) + + +async def main(): + connection_config = SiftConnectionConfig( + api_key="my_api_key", + grpc_url="sift_grpc_url", + rest_url="sift_rest_url", + ) + + client = SiftClient(connection_config=connection_config) + + # Ingestion configs are created using SiftClient types + ingestion_config = IngestionConfigCreate( + asset_name="sift_rover_1", + flows=[ + FlowConfig( + name="onboard_sensors", + channels=[ + ChannelConfig(name="motor_temp", unit="C", data_type=ChannelDataType.DOUBLE), + ChannelConfig(name="tank_pressure", unit="kPa", data_type=ChannelDataType.DOUBLE), + ], + ) + ], + ) + + run = RunCreate(name="sift_rover-" + str(int(time.time()))) + + async with await client.async_.ingestion.create_ingestion_config_streaming_client( + ingestion_config=ingestion_config, + run=run, + ) as ingest_client: + while True: + + # Flows can be generated easily from the ingest client + flow_config = ingest_client.get_flow_config(flow_name="onboard_sensors") + flow = flow_config.as_flow( + timestamp=datetime.now(timezone.utc), + values={ + "motor_temp": 50.0 + random.random() * 5.0, + "tank_pressure": 2000.0 + random.random() * 100.0, + }, + ) + # Ingest the flow with .send() + await ingest_client.send(flow=flow) + + await asyncio.sleep(1) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 2ce5ec520..8f6d5329a 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -346,7 +346,7 @@ async def create( if isinstance(ingestion_config, IngestionConfig): # SiftStream will retrieve the existing config from the client_key asset = sift_client.assets.get(asset_id=ingestion_config.asset_id) - ingestion_config_form = IngestionConfigFormPy( # type: ignore[call-arg] + ingestion_config_form = IngestionConfigFormPy( asset_name=asset.name, client_key=ingestion_config.client_key, flows=[], diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index aa537c1ae..d8db6d207 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -88,7 +88,7 @@ def _to_rust_form(self) -> IngestionConfigFormPy: else: client_key = _hash_flows(self.asset_name, self.flows or []) - return IngestionConfigFormPy( # type: ignore[call-arg] + return IngestionConfigFormPy( asset_name=self.asset_name, flows=[flow_config._to_rust_config() for flow_config in self.flows] if self.flows @@ -268,7 +268,7 @@ def _to_rust_config(self) -> FlowConfigPy: # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import FlowConfigPy - return FlowConfigPy( # type: ignore[call-arg] + return FlowConfigPy( name=self.name, channels=[_channel_config_to_rust_config(channel) for channel in self.channels], ) @@ -343,7 +343,7 @@ def _to_rust_form(self) -> FlowPy: from sift_client._internal.low_level_wrappers.ingestion import _to_rust_py_timestamp - return FlowPy( # type: ignore[call-arg] + return FlowPy( flow_name=self.flow, timestamp=_to_rust_py_timestamp(self.timestamp), values=[channel_value._to_rust_form() for channel_value in self.channel_values], diff --git a/python/pyproject.toml b/python/pyproject.toml index b3176e567..9972cba15 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -87,8 +87,6 @@ python_version = "3.8" reportOptionalMemberAccess = "none" reportArgumentType = "none" reportAttributeAccessIssue = "none" -reportCallIssue = "warning" -reportMissingTypeStubs = "none" stubPath = "lib/sift_client/resources/sync_stubs" useLibraryCodeForTypes = true exclude = [ From 4a17a48e1172c233e1182f0bc91f99e34f92f456 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 18:12:02 -0800 Subject: [PATCH 41/47] ruff --- python/docs/examples/ingestion_example.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/docs/examples/ingestion_example.py b/python/docs/examples/ingestion_example.py index d45ad8905..171d6c61b 100644 --- a/python/docs/examples/ingestion_example.py +++ b/python/docs/examples/ingestion_example.py @@ -30,7 +30,9 @@ async def main(): name="onboard_sensors", channels=[ ChannelConfig(name="motor_temp", unit="C", data_type=ChannelDataType.DOUBLE), - ChannelConfig(name="tank_pressure", unit="kPa", data_type=ChannelDataType.DOUBLE), + ChannelConfig( + name="tank_pressure", unit="kPa", data_type=ChannelDataType.DOUBLE + ), ], ) ], @@ -43,7 +45,6 @@ async def main(): run=run, ) as ingest_client: while True: - # Flows can be generated easily from the ingest client flow_config = ingest_client.get_flow_config(flow_name="onboard_sensors") flow = flow_config.as_flow( From 2b31629a57892a1f5a7354e876ee8b1b1353115d Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 18:14:55 -0800 Subject: [PATCH 42/47] pyright issues again --- python/lib/sift_client/resources/ingestion.py | 2 +- python/lib/sift_client/sift_types/ingestion.py | 6 +++--- python/pyproject.toml | 2 ++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 8f6d5329a..2ce5ec520 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -346,7 +346,7 @@ async def create( if isinstance(ingestion_config, IngestionConfig): # SiftStream will retrieve the existing config from the client_key asset = sift_client.assets.get(asset_id=ingestion_config.asset_id) - ingestion_config_form = IngestionConfigFormPy( + ingestion_config_form = IngestionConfigFormPy( # type: ignore[call-arg] asset_name=asset.name, client_key=ingestion_config.client_key, flows=[], diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index d8db6d207..aa537c1ae 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -88,7 +88,7 @@ def _to_rust_form(self) -> IngestionConfigFormPy: else: client_key = _hash_flows(self.asset_name, self.flows or []) - return IngestionConfigFormPy( + return IngestionConfigFormPy( # type: ignore[call-arg] asset_name=self.asset_name, flows=[flow_config._to_rust_config() for flow_config in self.flows] if self.flows @@ -268,7 +268,7 @@ def _to_rust_config(self) -> FlowConfigPy: # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import FlowConfigPy - return FlowConfigPy( + return FlowConfigPy( # type: ignore[call-arg] name=self.name, channels=[_channel_config_to_rust_config(channel) for channel in self.channels], ) @@ -343,7 +343,7 @@ def _to_rust_form(self) -> FlowPy: from sift_client._internal.low_level_wrappers.ingestion import _to_rust_py_timestamp - return FlowPy( + return FlowPy( # type: ignore[call-arg] flow_name=self.flow, timestamp=_to_rust_py_timestamp(self.timestamp), values=[channel_value._to_rust_form() for channel_value in self.channel_values], diff --git a/python/pyproject.toml b/python/pyproject.toml index 9972cba15..b3176e567 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -87,6 +87,8 @@ python_version = "3.8" reportOptionalMemberAccess = "none" reportArgumentType = "none" reportAttributeAccessIssue = "none" +reportCallIssue = "warning" +reportMissingTypeStubs = "none" stubPath = "lib/sift_client/resources/sync_stubs" useLibraryCodeForTypes = true exclude = [ From 424c2caca5d728cc4c358bbeac0b59ebf357caaa Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 18:22:04 -0800 Subject: [PATCH 43/47] remove tests requiring ingest --- .../sift_client/_tests/resources/test_runs.py | 66 ------------------- 1 file changed, 66 deletions(-) diff --git a/python/lib/sift_client/_tests/resources/test_runs.py b/python/lib/sift_client/_tests/resources/test_runs.py index 133a675b1..486764ab6 100644 --- a/python/lib/sift_client/_tests/resources/test_runs.py +++ b/python/lib/sift_client/_tests/resources/test_runs.py @@ -491,72 +491,6 @@ async def test_stop_run_with_start_time(self, runs_api_async, new_run): class TestAssetAssociation: """Tests for the async asset association methods.""" - - async def ingest_data_to_asset(self, sift_client, asset_name): - """Ingest some data into an asset.""" - flow = Flow( - name="test-double-flow", - channels=[ - ChannelConfig(name="double-channel", data_type=ChannelDataType.DOUBLE), - ], - ) - - await sift_client.async_.ingestion.create_ingestion_config( - asset_name=asset_name, - flows=[flow], - ) - - start_time = datetime.now(tz=timezone.utc) - for i in range(10): - timestamp = start_time + timedelta(seconds=i) - flow.ingest( - timestamp=timestamp, - channel_values={"double-channel": float(i)}, - ) - - sift_client.async_.ingestion.wait_for_ingestion_to_complete(timeout=2) - - @pytest.mark.asyncio - async def test_create_automatic_association_for_assets(self, runs_api_async, sift_client): - """Test associating assets with a run for automatic data ingestion.""" - # Create a test run - run_name = f"test_run_asset_assoc_{datetime.now(timezone.utc).isoformat()}" - run_create = RunCreate( - name=run_name, - description="Test run for asset association", - tags=["sift-client-pytest"], - start_time=datetime.now(timezone.utc), - stop_time=datetime.now(timezone.utc) + timedelta(seconds=11), - ) - created_run = None - - try: - # Get some assets to associate - assets = await sift_client.async_.assets.list_(limit=2) - assert len(assets) >= 2 - - # Associate assets with the run - created_run = await runs_api_async.create( - run_create, assets=assets, associate_new_data=True - ) - - for asset in assets: - await self.ingest_data_to_asset(sift_client, asset.name) - # Verify the association by getting the run and checking asset_ids - updated_run = await runs_api_async.get(run_id=created_run._id_or_error) - assert updated_run.asset_ids is not None - assert len(updated_run.asset_ids) >= len(assets) - for asset in assets: - assert asset.id_ in updated_run.asset_ids - - # Fetching these channels is flaky/slow depending on how long update monitor takes to run. - # channels = await sift_client.async_.channels.list_(run=created_run) - # assert channels is not None - # assert "double-channel" in [channel.name for channel in channels] - - finally: - await runs_api_async.archive(created_run) - @pytest.mark.asyncio async def test_create_adhoc_run_all( self, runs_api_async, sift_client, test_tag, ci_pytest_tag From 968341a357e7ec4f08431315165cbd64b19e0b80 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 18:23:01 -0800 Subject: [PATCH 44/47] fix test imports --- python/lib/sift_client/_tests/resources/test_runs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lib/sift_client/_tests/resources/test_runs.py b/python/lib/sift_client/_tests/resources/test_runs.py index 486764ab6..30a93a263 100644 --- a/python/lib/sift_client/_tests/resources/test_runs.py +++ b/python/lib/sift_client/_tests/resources/test_runs.py @@ -14,7 +14,7 @@ from sift_client import SiftClient from sift_client.resources import RunsAPI, RunsAPIAsync -from sift_client.sift_types import ChannelConfig, ChannelDataType, Flow, Run +from sift_client.sift_types import Run from sift_client.sift_types.run import RunCreate, RunUpdate pytestmark = pytest.mark.integration From dec499bc4b1d9fb8dbf19adb810de8de19bd292a Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 18:23:41 -0800 Subject: [PATCH 45/47] ruff --- python/lib/sift_client/_tests/resources/test_runs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/lib/sift_client/_tests/resources/test_runs.py b/python/lib/sift_client/_tests/resources/test_runs.py index 30a93a263..0de6bc04e 100644 --- a/python/lib/sift_client/_tests/resources/test_runs.py +++ b/python/lib/sift_client/_tests/resources/test_runs.py @@ -491,6 +491,7 @@ async def test_stop_run_with_start_time(self, runs_api_async, new_run): class TestAssetAssociation: """Tests for the async asset association methods.""" + @pytest.mark.asyncio async def test_create_adhoc_run_all( self, runs_api_async, sift_client, test_tag, ci_pytest_tag From a517fe1be35b84c2cda4cbbf581de730956301b0 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 18:26:07 -0800 Subject: [PATCH 46/47] remove pyright fixes again --- python/lib/sift_client/resources/ingestion.py | 2 +- python/lib/sift_client/sift_types/ingestion.py | 6 +++--- python/pyproject.toml | 2 -- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 2ce5ec520..8f6d5329a 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -346,7 +346,7 @@ async def create( if isinstance(ingestion_config, IngestionConfig): # SiftStream will retrieve the existing config from the client_key asset = sift_client.assets.get(asset_id=ingestion_config.asset_id) - ingestion_config_form = IngestionConfigFormPy( # type: ignore[call-arg] + ingestion_config_form = IngestionConfigFormPy( asset_name=asset.name, client_key=ingestion_config.client_key, flows=[], diff --git a/python/lib/sift_client/sift_types/ingestion.py b/python/lib/sift_client/sift_types/ingestion.py index aa537c1ae..d8db6d207 100644 --- a/python/lib/sift_client/sift_types/ingestion.py +++ b/python/lib/sift_client/sift_types/ingestion.py @@ -88,7 +88,7 @@ def _to_rust_form(self) -> IngestionConfigFormPy: else: client_key = _hash_flows(self.asset_name, self.flows or []) - return IngestionConfigFormPy( # type: ignore[call-arg] + return IngestionConfigFormPy( asset_name=self.asset_name, flows=[flow_config._to_rust_config() for flow_config in self.flows] if self.flows @@ -268,7 +268,7 @@ def _to_rust_config(self) -> FlowConfigPy: # Importing here to allow sift_stream_bindings to be an optional dependancy for non-ingestion users from sift_stream_bindings import FlowConfigPy - return FlowConfigPy( # type: ignore[call-arg] + return FlowConfigPy( name=self.name, channels=[_channel_config_to_rust_config(channel) for channel in self.channels], ) @@ -343,7 +343,7 @@ def _to_rust_form(self) -> FlowPy: from sift_client._internal.low_level_wrappers.ingestion import _to_rust_py_timestamp - return FlowPy( # type: ignore[call-arg] + return FlowPy( flow_name=self.flow, timestamp=_to_rust_py_timestamp(self.timestamp), values=[channel_value._to_rust_form() for channel_value in self.channel_values], diff --git a/python/pyproject.toml b/python/pyproject.toml index b3176e567..9972cba15 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -87,8 +87,6 @@ python_version = "3.8" reportOptionalMemberAccess = "none" reportArgumentType = "none" reportAttributeAccessIssue = "none" -reportCallIssue = "warning" -reportMissingTypeStubs = "none" stubPath = "lib/sift_client/resources/sync_stubs" useLibraryCodeForTypes = true exclude = [ From fb66ab5f7759210a653cc8a935d58e874da0bb56 Mon Sep 17 00:00:00 2001 From: Nathan Federknopp Date: Mon, 10 Nov 2025 19:54:51 -0800 Subject: [PATCH 47/47] pr feedback --- python/docs/examples/ingestion.ipynb | 102 ++++++++++++++++++ python/docs/examples/ingestion_example.py | 64 ----------- python/lib/sift_client/resources/ingestion.py | 20 ++-- python/mkdocs.yml | 1 + 4 files changed, 113 insertions(+), 74 deletions(-) create mode 100644 python/docs/examples/ingestion.ipynb delete mode 100644 python/docs/examples/ingestion_example.py diff --git a/python/docs/examples/ingestion.ipynb b/python/docs/examples/ingestion.ipynb new file mode 100644 index 000000000..adf011421 --- /dev/null +++ b/python/docs/examples/ingestion.ipynb @@ -0,0 +1,102 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0b202351", + "metadata": {}, + "source": [ + "# Sift Client Ingestion Basic Example\n", + "\n", + "This notebook demonstrates some examples features of SiftClient ingestion\n", + "- Initializing the Sift client\n", + "- Creating an ingestion config\n", + "- Creating a run\n", + "- Creating and sending flows\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02268d76", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "import asyncio\n", + "import random\n", + "import time\n", + "from datetime import datetime, timezone\n", + "\n", + "from sift_client import SiftClient, SiftConnectionConfig\n", + "from sift_client.sift_types import (\n", + " ChannelConfig,\n", + " ChannelDataType,\n", + " FlowConfig,\n", + " IngestionConfigCreate,\n", + " RunCreate,\n", + ")\n", + "\n", + "\n", + "async def main():\n", + " connection_config = SiftConnectionConfig(\n", + " api_key=\"my_api_key\",\n", + " grpc_url=\"sift_grpc_url\",\n", + " rest_url=\"sift_rest_url\",\n", + " )\n", + "\n", + " client = SiftClient(connection_config=connection_config)\n", + "\n", + " # Ingestion configs are created using SiftClient types\n", + " ingestion_config = IngestionConfigCreate(\n", + " asset_name=\"sift_rover_1\",\n", + " flows=[\n", + " FlowConfig(\n", + " name=\"onboard_sensors\",\n", + " channels=[\n", + " ChannelConfig(name=\"motor_temp\", unit=\"C\", data_type=ChannelDataType.DOUBLE),\n", + " ChannelConfig(\n", + " name=\"tank_pressure\", unit=\"kPa\", data_type=ChannelDataType.DOUBLE\n", + " ),\n", + " ],\n", + " )\n", + " ],\n", + " )\n", + "\n", + " run = RunCreate(name=\"sift_rover-\" + str(int(time.time())))\n", + "\n", + " async with await client.async_.ingestion.create_ingestion_config_streaming_client(\n", + " ingestion_config=ingestion_config,\n", + " run=run,\n", + " ) as ingest_client:\n", + " while True:\n", + " # Flows can be generated easily from the ingest client\n", + " flow_config = ingest_client.get_flow_config(flow_name=\"onboard_sensors\")\n", + " flow = flow_config.as_flow(\n", + " timestamp=datetime.now(timezone.utc),\n", + " values={\n", + " \"motor_temp\": 50.0 + random.random() * 5.0,\n", + " \"tank_pressure\": 2000.0 + random.random() * 100.0,\n", + " },\n", + " )\n", + " # Ingest the flow with .send()\n", + " await ingest_client.send(flow=flow)\n", + "\n", + " await asyncio.sleep(1)\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " asyncio.run(main())\n" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python/docs/examples/ingestion_example.py b/python/docs/examples/ingestion_example.py deleted file mode 100644 index 171d6c61b..000000000 --- a/python/docs/examples/ingestion_example.py +++ /dev/null @@ -1,64 +0,0 @@ -import asyncio -import random -import time -from datetime import datetime, timezone - -from sift_client import SiftClient, SiftConnectionConfig -from sift_client.sift_types import ( - ChannelConfig, - ChannelDataType, - FlowConfig, - IngestionConfigCreate, - RunCreate, -) - - -async def main(): - connection_config = SiftConnectionConfig( - api_key="my_api_key", - grpc_url="sift_grpc_url", - rest_url="sift_rest_url", - ) - - client = SiftClient(connection_config=connection_config) - - # Ingestion configs are created using SiftClient types - ingestion_config = IngestionConfigCreate( - asset_name="sift_rover_1", - flows=[ - FlowConfig( - name="onboard_sensors", - channels=[ - ChannelConfig(name="motor_temp", unit="C", data_type=ChannelDataType.DOUBLE), - ChannelConfig( - name="tank_pressure", unit="kPa", data_type=ChannelDataType.DOUBLE - ), - ], - ) - ], - ) - - run = RunCreate(name="sift_rover-" + str(int(time.time()))) - - async with await client.async_.ingestion.create_ingestion_config_streaming_client( - ingestion_config=ingestion_config, - run=run, - ) as ingest_client: - while True: - # Flows can be generated easily from the ingest client - flow_config = ingest_client.get_flow_config(flow_name="onboard_sensors") - flow = flow_config.as_flow( - timestamp=datetime.now(timezone.utc), - values={ - "motor_temp": 50.0 + random.random() * 5.0, - "tank_pressure": 2000.0 + random.random() * 100.0, - }, - ) - # Ingest the flow with .send() - await ingest_client.send(flow=flow) - - await asyncio.sleep(1) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/python/lib/sift_client/resources/ingestion.py b/python/lib/sift_client/resources/ingestion.py index 8f6d5329a..59668378f 100644 --- a/python/lib/sift_client/resources/ingestion.py +++ b/python/lib/sift_client/resources/ingestion.py @@ -242,7 +242,7 @@ async def create_ingestion_config_streaming_client( checkpoint_interval_seconds: int | None = None, enable_tls: bool = True, tracing_config: TracingConfig | None = None, - ) -> _IngestionConfigStreamingClient: + ) -> IngestionConfigStreamingClient: """Create an IngestionConfigStreamingClient. Args: @@ -261,7 +261,7 @@ async def create_ingestion_config_streaming_client( Returns: An initialized IngestionConfigStreamingClient. """ - return await _IngestionConfigStreamingClient.create( + return await IngestionConfigStreamingClient._create( self.client, ingestion_config=ingestion_config, run=run, @@ -274,7 +274,7 @@ async def create_ingestion_config_streaming_client( ) -class _IngestionConfigStreamingClient(ResourceBase): +class IngestionConfigStreamingClient(ResourceBase): """A client for streaming ingestion with an ingestion config. This client provides a high-level interface for streaming data to Sift using @@ -292,7 +292,7 @@ def __init__( self._low_level_client = low_level_client @classmethod - async def create( + async def _create( cls, sift_client: SiftClient, ingestion_config: IngestionConfig | IngestionConfigCreate | IngestionConfigFormPy, @@ -304,7 +304,7 @@ async def create( checkpoint_interval_seconds: int | None = None, enable_tls: bool = True, tracing_config: TracingConfig | None = None, - ) -> _IngestionConfigStreamingClient: + ) -> IngestionConfigStreamingClient: """Create an IngestionConfigStreamingClient. Args: @@ -412,7 +412,7 @@ async def create( return cls(sift_client, low_level_client) - async def send(self, *, flow: Flow | FlowPy): + async def send(self, flow: Flow | FlowPy): """Send telemetry to Sift in the form of a Flow. This is the entry-point to send actual telemetry to Sift. If a message is sent that @@ -440,7 +440,7 @@ async def send(self, *, flow: Flow | FlowPy): flow_py = flow await self._low_level_client.send(flow_py) - async def send_requests(self, *, requests: list[IngestWithConfigDataStreamRequestPy]): + async def send_requests(self, requests: list[IngestWithConfigDataStreamRequestPy]): """Send data in a manner identical to the raw gRPC service for ingestion-config based streaming. This method offers a way to send data that matches the raw gRPC service interface. You are @@ -455,7 +455,7 @@ async def send_requests(self, *, requests: list[IngestWithConfigDataStreamReques """ await self._low_level_client.send_requests(requests) - async def add_new_flows(self, *, flow_configs: list[FlowConfig]): + async def add_new_flows(self, flow_configs: list[FlowConfig]): """Modify the existing ingestion config by adding new flows that weren't accounted for during initialization. This allows you to dynamically add new flow configurations to the ingestion config after @@ -468,7 +468,7 @@ async def add_new_flows(self, *, flow_configs: list[FlowConfig]): flow_configs_py = [flow_config._to_rust_config() for flow_config in flow_configs] await self._low_level_client.add_new_flows(flow_configs_py) - async def attach_run(self, *, run: RunCreate | dict | str | Run | RunFormPy): + async def attach_run(self, run: RunCreate | dict | str | Run | RunFormPy): """Attach a run to the stream. Any data provided through `send` after this function returns will be associated with @@ -539,7 +539,7 @@ def get_metrics_snapshot(self) -> SiftStreamMetricsSnapshotPy: """ return self._low_level_client.get_metrics_snapshot() - def get_flow_config(self, *, flow_name: str) -> FlowConfig: + def get_flow_config(self, flow_name: str) -> FlowConfig: """Retrieve a flow configuration by name. Args: diff --git a/python/mkdocs.yml b/python/mkdocs.yml index 3c68c3564..ed73da187 100644 --- a/python/mkdocs.yml +++ b/python/mkdocs.yml @@ -57,6 +57,7 @@ nav: - Sift Client API (New) - Examples: - examples/basic.ipynb + - examples/ingestion.ipynb # - Guides: # - Logging # - Error Handling