From 5b3f191ac25e1d3b48fac96e1a2578513714cf2f Mon Sep 17 00:00:00 2001 From: Nikos Antoniou Date: Thu, 4 Dec 2025 08:55:02 +0000 Subject: [PATCH 1/3] add generator detection for deepfake client, segragate pydantic models --- src/behavioralsignals/behavioral.py | 2 +- src/behavioralsignals/deepfakes.py | 39 ++++++++++++++++++++++------- src/behavioralsignals/models.py | 12 +++++++++ 3 files changed, 43 insertions(+), 10 deletions(-) diff --git a/src/behavioralsignals/behavioral.py b/src/behavioralsignals/behavioral.py index d182fd9..8f7cde0 100644 --- a/src/behavioralsignals/behavioral.py +++ b/src/behavioralsignals/behavioral.py @@ -9,8 +9,8 @@ ResultResponse, StreamingOptions, AudioUploadParams, - S3UrlUploadParams, ProcessListParams, + S3UrlUploadParams, ProcessListResponse, StreamingResultResponse, ) diff --git a/src/behavioralsignals/deepfakes.py b/src/behavioralsignals/deepfakes.py index 7d7142c..26e8579 100644 --- a/src/behavioralsignals/deepfakes.py +++ b/src/behavioralsignals/deepfakes.py @@ -8,11 +8,11 @@ ProcessItem, ResultResponse, StreamingOptions, - AudioUploadParams, - S3UrlUploadParams, ProcessListParams, ProcessListResponse, StreamingResultResponse, + DeepfakeAudioUploadParams, + DeepfakeS3UrlUploadParams, ) from .generated import api_pb2 as pb from .generated import api_pb2_grpc as pb_grpc @@ -24,6 +24,7 @@ def upload_audio( file_path: str, name: Optional[str] = None, embeddings: bool = False, + enable_generator_detection: bool = False, meta: Optional[str] = None, ) -> ProcessItem: """Uploads an audio file for processing and returns the process item. @@ -31,20 +32,31 @@ def upload_audio( Args: file_path (str): Path to the audio file to upload. name (str, optional): Optional name for the job request. Defaults to filename. - embeddings (bool): Whether to include speaker and behavioral embeddings. Defaults to False. + embeddings (bool): Whether to include speaker embeddings. Defaults to False. + enable_generator_detection (bool): Whether to include prediction for the source of the deepfake (generator model). Defaults to False. meta (str, optional): Metadata json containing any extra user-defined metadata. Returns: ProcessItem: The process item containing details about the submitted process. """ # Create and validate parameters - params = AudioUploadParams(file_path=file_path, name=name, embeddings=embeddings, meta=meta) + params = DeepfakeAudioUploadParams( + file_path=file_path, + name=name, + embeddings=embeddings, + meta=meta, + enable_generator_detection=enable_generator_detection, + ) # Use provided name or default to filename job_name = params.name or Path(params.file_path).name with open(params.file_path, "rb") as audio_file: files = {"file": audio_file} - data = {"name": job_name, "embeddings": params.embeddings} + data = { + "name": job_name, + "embeddings": params.embeddings, + "enable_generator_detection": params.enable_generator_detection, + } if params.meta: data["meta"] = params.meta @@ -63,6 +75,7 @@ def upload_s3_presigned_url( url: str, name: Optional[str] = None, embeddings: bool = False, + enable_generator_detection: bool = False, meta: Optional[str] = None, ) -> ProcessItem: """Uploads an S3 presigned url pointing to an audio file and returns the process item. @@ -70,13 +83,20 @@ def upload_s3_presigned_url( Args: url (str): The S3 presigned url. name (str, optional): Optional name for the job request. Defaults to filename. - embeddings (bool): Whether to include speaker and behavioral embeddings. Defaults to False. + embeddings (bool): Whether to include speaker embeddings. Defaults to False. + enable_generator_detection (bool): Whether to include prediction for the source of the deepfake (generator model). Defaults to False. meta (str, optional): Metadata json containing any extra user-defined metadata. Returns: ProcessItem: The process item containing details about the submitted process. """ # Create and validate parameters - params = S3UrlUploadParams(url=url, name=name, embeddings=embeddings, meta=meta) + params = DeepfakeS3UrlUploadParams( + url=url, + name=name, + embeddings=embeddings, + meta=meta, + enable_generator_detection=enable_generator_detection, + ) # Use provided name or default to filename job_name = params.name @@ -84,7 +104,8 @@ def upload_s3_presigned_url( payload = { "url": params.url, "name": job_name, - "embeddings": params.embeddings + "embeddings": params.embeddings, + "enable_generator_detection": params.enable_generator_detection, } if params.meta: @@ -96,7 +117,7 @@ def upload_s3_presigned_url( path=f"detection/clients/{self.config.cid}/processes/s3-presigned-url", method="POST", json=payload, - headers=headers + headers=headers, ) return ProcessItem(**response) diff --git a/src/behavioralsignals/models.py b/src/behavioralsignals/models.py index 7941276..723c54b 100644 --- a/src/behavioralsignals/models.py +++ b/src/behavioralsignals/models.py @@ -100,6 +100,18 @@ def validate_meta_json(cls, v): raise ValueError("meta must be valid JSON string") return v +class DeepfakeAudioUploadParams(AudioUploadParams): + enable_generator_detection: bool = Field( + False, description="Whether to include prediction for the source of the deepfake (generator model)" + ) + + +class DeepfakeS3UrlUploadParams(S3UrlUploadParams): + enable_generator_detection: bool = Field( + False, description="Whether to include prediction for the source of the deepfake (generator model)" + ) + + class ProcessItem(BaseModel): """Individual process in the list""" From e17ab4404976b97f5a979bae71ae18b85f81bfd2 Mon Sep 17 00:00:00 2001 From: Nikos Antoniou Date: Tue, 9 Dec 2025 13:24:36 +0200 Subject: [PATCH 2/3] README updates, fix typos --- README.md | 33 +++++++++++++++++++++++++++++ examples/batch/README.md | 2 +- examples/batch/batch_api_polling.py | 11 ++++++++-- src/behavioralsignals/behavioral.py | 2 +- src/behavioralsignals/deepfakes.py | 2 +- 5 files changed, 45 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 208a956..dcaa212 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,13 @@ response = client.behavioral.upload_audio(file_path="audio.wav") output = client.behavioral.get_result(pid=response.pid) ``` +Setting `embeddings=True` during audio upload will include speaker and behavioral embeddings in the output (see [documentation](https://behavioralsignals.readme.io/v5.4.0/docs/embeddings#/)): + +```python +response = client.behavioral.upload_audio(file_path="audio.wav", embeddings=True) +output = client.behavioral.get_result(pid=response.pid) +``` + ### Behavioral API Streaming Mode In streaming mode, you can send audio data in real-time to the Behavioral Signals API. The API will return results as they are processed. @@ -114,6 +121,32 @@ response = client.deepfakes.upload_audio(file_path="audio.wav") output = client.deepfakes.get_result(pid=response.pid) ``` +Setting `embeddings=True` during audio upload will include speaker and deepfake embeddings in the output (see [documentation](https://behavioralsignals.readme.io/v5.4.0/docs/embeddings-1#/)): + +```python +response = client.deepfakes.upload_audio(file_path="audio.wav", embeddings=True) +output = client.deepfakes.get_result(pid=response.pid) +``` + + +#### 🔬 Experimental: Deepfake Generator Prediction (Batch Only) + +An experimental option is now available that attempts to predict the generator model used to produce a deepfake. +When enabled, the returned results will contain an additional field - only for audios with detected deepfake content - indicating the predicted generator model along with a confidence score. + +You can activate this feature by passing `enable_generator_detection=True` during audio upload: + +```python +from behavioralsignals import Client + +client = Client(YOUR_CID, YOUR_API_KEY) + +response = client.deepfakes.upload_audio(file_path="audio.wav", enable_generator_detection=True) +output = client.deepfakes.get_result(pid=response.pid) +``` + +See more in our [API documentation](https://behavioralsignals.readme.io/v5.4.0/docs/generator-detection#/). + ### Deepfakes API Streaming Mode A similar streaming example for the Deepfakes API allows you to send audio data in real-time for speech deepfake detection: diff --git a/examples/batch/README.md b/examples/batch/README.md index 130b943..cdce3aa 100644 --- a/examples/batch/README.md +++ b/examples/batch/README.md @@ -30,7 +30,7 @@ The `batch_api_polling.py` is a simple script that submits and audio file to the ```bash python batch_api_polling.py --file_path audio.wav --output audio_results.json --api behavioral ``` -With the `--api` argument you can specify which API to use (either `behavioral` or `deepfakes`). +With the `--api` argument you can specify which API to use (either `behavioral` or `deepfakes`). You can also set the `--embeddings` flag to include speaker and behavioral/deepfake embeddings in the output. The results are saved to `audio_results.json` file once they are ready. diff --git a/examples/batch/batch_api_polling.py b/examples/batch/batch_api_polling.py index 9e7189e..8f43106 100644 --- a/examples/batch/batch_api_polling.py +++ b/examples/batch/batch_api_polling.py @@ -30,8 +30,15 @@ def parse_args(): type=str, default="behavioral", choices=["behavioral", "deepfakes"], - help="API to use for streaming", + help="API to use for processing", ) + parser.add_argument( + "--embeddings", + action="store_true", + help="Whether to include embeddings in the output (only for behavioral API)", + ) + + return parser.parse_args() @@ -48,7 +55,7 @@ def parse_args(): client = base_client.behavioral elif args.api == "deepfakes": client = base_client.deepfakes - upload_response = client.upload_audio(file_path=file_path) + upload_response = client.upload_audio(file_path=file_path, embeddings=args.embeddings) pid = upload_response.pid print(f"Sent audio for processing! Process ID (pid): {pid}") diff --git a/src/behavioralsignals/behavioral.py b/src/behavioralsignals/behavioral.py index 8f7cde0..d5d3082 100644 --- a/src/behavioralsignals/behavioral.py +++ b/src/behavioralsignals/behavioral.py @@ -172,7 +172,7 @@ def stream_audio( def _request_generator() -> Iterator[pb.AudioStream]: # Streaming API always requires the first message to contain - # the audio configurationand authentication details + # the audio configuration and authentication details audio_config = options.to_pb_config() req = pb.AudioStream( cid=int(self.config.cid), diff --git a/src/behavioralsignals/deepfakes.py b/src/behavioralsignals/deepfakes.py index 26e8579..74949a3 100644 --- a/src/behavioralsignals/deepfakes.py +++ b/src/behavioralsignals/deepfakes.py @@ -193,7 +193,7 @@ def stream_audio( def _request_generator() -> Iterator[pb.AudioStream]: # Streaming API always requires the first message to contain - # the audio configurationand authentication details + # the audio configuration and authentication details audio_config = options.to_pb_config() req = pb.AudioStream( cid=int(self.config.cid), From 58797d5de0a5ba9521d783c335167293ce80db66 Mon Sep 17 00:00:00 2001 From: Lefteris Kapelonis Date: Tue, 9 Dec 2025 15:38:59 +0000 Subject: [PATCH 3/3] Increment version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a232f96..5ae86de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "behavioralsignals" -version = "0.2.0" +version = "0.3.0" description = "Python SDK for Behavioral Signals API" readme = "README.md" authors = []