Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,13 @@ response = client.behavioral.upload_audio(file_path="audio.wav")
output = client.behavioral.get_result(pid=response.pid)
```

Setting `embeddings=True` during audio upload will include speaker and behavioral embeddings in the output (see [documentation](https://behavioralsignals.readme.io/v5.4.0/docs/embeddings#/)):

```python
response = client.behavioral.upload_audio(file_path="audio.wav", embeddings=True)
output = client.behavioral.get_result(pid=response.pid)
```

### Behavioral API Streaming Mode

In streaming mode, you can send audio data in real-time to the Behavioral Signals API. The API will return results as they are processed.
Expand Down Expand Up @@ -114,6 +121,32 @@ response = client.deepfakes.upload_audio(file_path="audio.wav")
output = client.deepfakes.get_result(pid=response.pid)
```

Setting `embeddings=True` during audio upload will include speaker and deepfake embeddings in the output (see [documentation](https://behavioralsignals.readme.io/v5.4.0/docs/embeddings-1#/)):

```python
response = client.deepfakes.upload_audio(file_path="audio.wav", embeddings=True)
output = client.deepfakes.get_result(pid=response.pid)
```


#### 🔬 Experimental: Deepfake Generator Prediction (Batch Only)

An experimental option is now available that attempts to predict the generator model used to produce a deepfake.
When enabled, the returned results will contain an additional field - only for audios with detected deepfake content - indicating the predicted generator model along with a confidence score.

You can activate this feature by passing `enable_generator_detection=True` during audio upload:

```python
from behavioralsignals import Client

client = Client(YOUR_CID, YOUR_API_KEY)

response = client.deepfakes.upload_audio(file_path="audio.wav", enable_generator_detection=True)
output = client.deepfakes.get_result(pid=response.pid)
```

See more in our [API documentation](https://behavioralsignals.readme.io/v5.4.0/docs/generator-detection#/).

### Deepfakes API Streaming Mode

A similar streaming example for the Deepfakes API allows you to send audio data in real-time for speech deepfake detection:
Expand Down
2 changes: 1 addition & 1 deletion examples/batch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ The `batch_api_polling.py` is a simple script that submits and audio file to the
```bash
python batch_api_polling.py --file_path audio.wav --output audio_results.json --api behavioral
```
With the `--api` argument you can specify which API to use (either `behavioral` or `deepfakes`).
With the `--api` argument you can specify which API to use (either `behavioral` or `deepfakes`). You can also set the `--embeddings` flag to include speaker and behavioral/deepfake embeddings in the output.

The results are saved to `audio_results.json` file once they are ready.

Expand Down
11 changes: 9 additions & 2 deletions examples/batch/batch_api_polling.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,15 @@ def parse_args():
type=str,
default="behavioral",
choices=["behavioral", "deepfakes"],
help="API to use for streaming",
help="API to use for processing",
)
parser.add_argument(
"--embeddings",
action="store_true",
help="Whether to include embeddings in the output (only for behavioral API)",
)


return parser.parse_args()


Expand All @@ -48,7 +55,7 @@ def parse_args():
client = base_client.behavioral
elif args.api == "deepfakes":
client = base_client.deepfakes
upload_response = client.upload_audio(file_path=file_path)
upload_response = client.upload_audio(file_path=file_path, embeddings=args.embeddings)
pid = upload_response.pid
print(f"Sent audio for processing! Process ID (pid): {pid}")

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "behavioralsignals"
version = "0.2.0"
version = "0.3.0"
description = "Python SDK for Behavioral Signals API"
readme = "README.md"
authors = []
Expand Down
4 changes: 2 additions & 2 deletions src/behavioralsignals/behavioral.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
ResultResponse,
StreamingOptions,
AudioUploadParams,
S3UrlUploadParams,
ProcessListParams,
S3UrlUploadParams,
ProcessListResponse,
StreamingResultResponse,
)
Expand Down Expand Up @@ -172,7 +172,7 @@ def stream_audio(

def _request_generator() -> Iterator[pb.AudioStream]:
# Streaming API always requires the first message to contain
# the audio configurationand authentication details
# the audio configuration and authentication details
audio_config = options.to_pb_config()
req = pb.AudioStream(
cid=int(self.config.cid),
Expand Down
41 changes: 31 additions & 10 deletions src/behavioralsignals/deepfakes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
ProcessItem,
ResultResponse,
StreamingOptions,
AudioUploadParams,
S3UrlUploadParams,
ProcessListParams,
ProcessListResponse,
StreamingResultResponse,
DeepfakeAudioUploadParams,
DeepfakeS3UrlUploadParams,
)
from .generated import api_pb2 as pb
from .generated import api_pb2_grpc as pb_grpc
Expand All @@ -24,27 +24,39 @@ def upload_audio(
file_path: str,
name: Optional[str] = None,
embeddings: bool = False,
enable_generator_detection: bool = False,
meta: Optional[str] = None,
) -> ProcessItem:
"""Uploads an audio file for processing and returns the process item.

Args:
file_path (str): Path to the audio file to upload.
name (str, optional): Optional name for the job request. Defaults to filename.
embeddings (bool): Whether to include speaker and behavioral embeddings. Defaults to False.
embeddings (bool): Whether to include speaker embeddings. Defaults to False.
enable_generator_detection (bool): Whether to include prediction for the source of the deepfake (generator model). Defaults to False.
meta (str, optional): Metadata json containing any extra user-defined metadata.
Returns:
ProcessItem: The process item containing details about the submitted process.
"""
# Create and validate parameters
params = AudioUploadParams(file_path=file_path, name=name, embeddings=embeddings, meta=meta)
params = DeepfakeAudioUploadParams(
file_path=file_path,
name=name,
embeddings=embeddings,
meta=meta,
enable_generator_detection=enable_generator_detection,
)

# Use provided name or default to filename
job_name = params.name or Path(params.file_path).name

with open(params.file_path, "rb") as audio_file:
files = {"file": audio_file}
data = {"name": job_name, "embeddings": params.embeddings}
data = {
"name": job_name,
"embeddings": params.embeddings,
"enable_generator_detection": params.enable_generator_detection,
}

if params.meta:
data["meta"] = params.meta
Expand All @@ -63,28 +75,37 @@ def upload_s3_presigned_url(
url: str,
name: Optional[str] = None,
embeddings: bool = False,
enable_generator_detection: bool = False,
meta: Optional[str] = None,
) -> ProcessItem:
"""Uploads an S3 presigned url pointing to an audio file and returns the process item.

Args:
url (str): The S3 presigned url.
name (str, optional): Optional name for the job request. Defaults to filename.
embeddings (bool): Whether to include speaker and behavioral embeddings. Defaults to False.
embeddings (bool): Whether to include speaker embeddings. Defaults to False.
enable_generator_detection (bool): Whether to include prediction for the source of the deepfake (generator model). Defaults to False.
meta (str, optional): Metadata json containing any extra user-defined metadata.
Returns:
ProcessItem: The process item containing details about the submitted process.
"""
# Create and validate parameters
params = S3UrlUploadParams(url=url, name=name, embeddings=embeddings, meta=meta)
params = DeepfakeS3UrlUploadParams(
url=url,
name=name,
embeddings=embeddings,
meta=meta,
enable_generator_detection=enable_generator_detection,
)

# Use provided name or default to filename
job_name = params.name

payload = {
"url": params.url,
"name": job_name,
"embeddings": params.embeddings
"embeddings": params.embeddings,
"enable_generator_detection": params.enable_generator_detection,
}

if params.meta:
Expand All @@ -96,7 +117,7 @@ def upload_s3_presigned_url(
path=f"detection/clients/{self.config.cid}/processes/s3-presigned-url",
method="POST",
json=payload,
headers=headers
headers=headers,
)

return ProcessItem(**response)
Expand Down Expand Up @@ -172,7 +193,7 @@ def stream_audio(

def _request_generator() -> Iterator[pb.AudioStream]:
# Streaming API always requires the first message to contain
# the audio configurationand authentication details
# the audio configuration and authentication details
audio_config = options.to_pb_config()
req = pb.AudioStream(
cid=int(self.config.cid),
Expand Down
12 changes: 12 additions & 0 deletions src/behavioralsignals/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,18 @@ def validate_meta_json(cls, v):
raise ValueError("meta must be valid JSON string")
return v

class DeepfakeAudioUploadParams(AudioUploadParams):
enable_generator_detection: bool = Field(
False, description="Whether to include prediction for the source of the deepfake (generator model)"
)


class DeepfakeS3UrlUploadParams(S3UrlUploadParams):
enable_generator_detection: bool = Field(
False, description="Whether to include prediction for the source of the deepfake (generator model)"
)



class ProcessItem(BaseModel):
"""Individual process in the list"""
Expand Down