From 526e2852d12a1e395daa5eb2e2d63a319a573298 Mon Sep 17 00:00:00 2001 From: paskhalyan Date: Thu, 2 Apr 2026 18:19:46 +0400 Subject: [PATCH] Add documentation for integrating Hecttor audio denoising into Pipecat, including setup, usage examples, and API reference. --- docs.json | 2 + guides/features/hecttor.mdx | 175 ++++++++++++++++++++++ server/utilities/audio/hecttor-filter.mdx | 163 ++++++++++++++++++++ 3 files changed, 340 insertions(+) create mode 100644 guides/features/hecttor.mdx create mode 100644 server/utilities/audio/hecttor-filter.mdx diff --git a/docs.json b/docs.json index 09399ee2..1cbadfa7 100644 --- a/docs.json +++ b/docs.json @@ -69,6 +69,7 @@ "group": "Features", "pages": [ "guides/features/pipecat-flows", + "guides/features/hecttor", "guides/features/krisp-viva", "guides/features/whatsapp", "guides/features/gemini-live", @@ -271,6 +272,7 @@ "pages": [ "server/utilities/audio/aic-filter", "server/utilities/audio/audio-buffer-processor", + "server/utilities/audio/hecttor-filter", "server/utilities/audio/koala-filter", "server/utilities/audio/krisp-viva-filter", "server/utilities/audio/krisp-viva-vad-analyzer", diff --git a/guides/features/hecttor.mdx b/guides/features/hecttor.mdx new file mode 100644 index 00000000..bb4d8c64 --- /dev/null +++ b/guides/features/hecttor.mdx @@ -0,0 +1,175 @@ +--- +title: "Hecttor" +sidebarTitle: "Hecttor" +description: "Learn how to integrate Hecttor's audio denoising into your Pipecat application" +--- + +## Overview + +Hecttor's SDK provides real-time audio denoising for Pipecat applications using deep learning. It removes background noise and enhances speech clarity with two specialized modes: + +- **ASR Mode** — Optimized for speech recognition pipelines. Preserves speech characteristics important for transcription accuracy, leading to fewer errors and better STT results. +- **Human Mode** — Optimized for human listeners. Includes optional voice boost (multi-band compressor) that increases speech loudness and clarity. + + + + API reference for audio denoising + + + Complete example with Hecttor denoising + + + Learn more about Hecttor + + + +## Prerequisites + +To use Hecttor with Pipecat, you need: + +1. A Hecttor API key +2. The `hecttor_sdk` Python wheel for your platform + + + Get your API key and download the SDK from the [Hecttor admin panel](https://admin.hecttor.ai). + + +## Setup + +### Download the Python SDK + +1. Log in to the [Hecttor admin panel](https://admin.hecttor.ai) +2. Download the `hecttor_sdk` wheel file for your platform and Python version + +### Install the SDK + +Install the wheel file you downloaded: + +```bash +pip install hecttor_sdk---.whl +``` + +For example, on macOS ARM64 with Python 3.10: + +```bash +pip install hecttor_sdk-2.1.0-cp310-cp310-macosx_15_0_arm64.whl +``` + +Then install the pipecat Hecttor extra: + +```bash +pip install "pipecat-ai[hecttor]" +``` + +### Set up environment variables + +Add your Hecttor API key to your `.env` file: + +```bash +HECTTOR_API_KEY=your_api_key_here +``` + +## Test the integration + +You're ready to test! Try running the [Hecttor foundational example](https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/07zm-interruptible-hecttor.py), which demonstrates audio denoising with Deepgram STT, OpenAI LLM, and Cartesia TTS. + + + Learn how to [run foundational + examples](https://github.com/pipecat-ai/pipecat/blob/main/examples/README.md) + in Pipecat. + + +## Audio Denoising + +`HecttorFilter` removes background noise from the user's audio in real-time. Add it to any transport via the `audio_in_filter` parameter. + +### ASR Mode (for STT pipelines) + +Use ASR mode when the audio will be processed by a speech-to-text service. This mode preserves speech features that are important for accurate transcription. + +```python +import os +from pipecat.audio.filters.hecttor_filter import HecttorFilter +from pipecat.transports.base_transport import TransportParams + +hecttor_filter = HecttorFilter( + api_key=os.getenv("HECTTOR_API_KEY"), + mode="asr", +) + +transport = SmallWebRTCTransport( + webrtc_connection=webrtc_connection, + params=TransportParams( + audio_in_enabled=True, + audio_in_filter=hecttor_filter, + audio_out_enabled=True, + ), +) +``` + +### Human Mode (with voice boost) + +Use Human mode when the audio will be heard by human listeners (e.g., recordings or call forwarding). Enable voice boost for louder, clearer speech. + +```python +import os +from pipecat.audio.filters.hecttor_filter import HecttorFilter +from pipecat.transports.daily.transport import DailyParams, DailyTransport + +hecttor_filter = HecttorFilter( + api_key=os.getenv("HECTTOR_API_KEY"), + mode="human", + enable_voice_boost=True, +) + +transport = DailyTransport( + room_url, + token, + "Respond bot", + DailyParams( + audio_in_enabled=True, + audio_in_filter=hecttor_filter, + audio_out_enabled=True, + ), +) +``` + +### Fine-tuning Enhancement + +Use the `enhancer_weight` parameter to blend between original and enhanced audio: + +```python +hecttor_filter = HecttorFilter( + api_key=os.getenv("HECTTOR_API_KEY"), + mode="asr", + enhancer_weight=0.8, # 80% enhanced, 20% original +) +``` + +### Runtime Toggle + +Disable and re-enable denoising at runtime using `FilterEnableFrame`: + +```python +from pipecat.frames.frames import FilterEnableFrame + +# Disable denoising +await task.queue_frame(FilterEnableFrame(False)) + +# Re-enable denoising +await task.queue_frame(FilterEnableFrame(True)) +``` + +See the [HecttorFilter reference](/server/utilities/audio/hecttor-filter) for all configuration options. diff --git a/server/utilities/audio/hecttor-filter.mdx b/server/utilities/audio/hecttor-filter.mdx new file mode 100644 index 00000000..c520e396 --- /dev/null +++ b/server/utilities/audio/hecttor-filter.mdx @@ -0,0 +1,163 @@ +--- +title: "HecttorFilter" +description: "Audio denoising filter using the Hecttor SDK" +--- + +## Overview + +`HecttorFilter` is an audio processor that enhances speech in real-time audio streams using the Hecttor SDK's neural network-based denoising. It inherits from `BaseAudioFilter` and processes audio frames to remove background noise before they reach the STT service. + +Two enhancement modes are available: + +- **ASR mode** — Optimized for speech recognition pipelines. Preserves speech characteristics important for transcription accuracy. +- **Human mode** — Optimized for human listeners. Includes optional voice boost (multi-band compressor) for speech loudness. + +To use Hecttor, you need an API key from the [Hecttor admin panel](https://admin.hecttor.ai). + +## Installation + +The `hecttor_sdk` package is not available on PyPI. Download the wheel for your platform from the [Hecttor admin panel](https://admin.hecttor.ai) and install it manually: + +```bash +pip install hecttor_sdk--.whl +``` + +Then install the pipecat Hecttor extra (for dependency tracking): + +```bash +pip install "pipecat-ai[hecttor]" +``` + +## Environment Variables + + + Hecttor API key for authentication and usage tracking. Can also be passed directly via the `api_key` constructor parameter. + + +## Constructor Parameters + + + Hecttor API key. If not provided, falls back to the `HECTTOR_API_KEY` environment variable. + + + + Enhancement mode. Use `"asr"` for speech recognition pipelines or `"human"` for human listeners. + + + + Chunk size in milliseconds for audio processing. Must be a positive multiple of 16. + + + + Enable voice boost post-processing (multi-band compressor for speech loudness). Only applies in `"human"` mode. + + + + Blend factor between original and enhanced audio. `1.0` = fully enhanced, `0.0` = original audio. + + +## Supported Sample Rates + +The filter supports the following sample rates (the SDK automatically resamples internally): + +- 4000 Hz +- 8000 Hz +- 16000 Hz +- 24000 Hz +- 32000 Hz +- 44100 Hz +- 48000 Hz + +## Input Frames + + + Control frame to toggle filtering on/off at runtime. + +```python +from pipecat.frames.frames import FilterEnableFrame + +# Disable denoising +await task.queue_frame(FilterEnableFrame(False)) + +# Re-enable denoising +await task.queue_frame(FilterEnableFrame(True)) +``` + + + +## Usage Example + +### ASR Mode (for STT pipelines) + +```python +import os +from pipecat.audio.filters.hecttor_filter import HecttorFilter +from pipecat.transports.daily.transport import DailyParams, DailyTransport + +hecttor_filter = HecttorFilter( + api_key=os.getenv("HECTTOR_API_KEY"), + mode="asr", +) + +transport = DailyTransport( + room_url, + token, + "Respond bot", + DailyParams( + audio_in_enabled=True, + audio_in_filter=hecttor_filter, + audio_out_enabled=True, + ), +) +``` + +### Human Mode (with voice boost) + +```python +import os +from pipecat.audio.filters.hecttor_filter import HecttorFilter +from pipecat.transports.daily.transport import DailyParams, DailyTransport + +hecttor_filter = HecttorFilter( + api_key=os.getenv("HECTTOR_API_KEY"), + mode="human", + enable_voice_boost=True, +) + +transport = DailyTransport( + room_url, + token, + "Respond bot", + DailyParams( + audio_in_enabled=True, + audio_in_filter=hecttor_filter, + audio_out_enabled=True, + ), +) +``` + +### Other Transports + +The filter works with any transport that supports `audio_in_filter`: + +```python +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +# Twilio / WebSocket +params = FastAPIWebsocketParams( + audio_in_enabled=True, + audio_in_filter=hecttor_filter, + audio_out_enabled=True, +) +``` + +## Notes + +- The Hecttor SDK is not publicly available on PyPI. Download the wheel from the [Hecttor admin panel](https://admin.hecttor.ai). +- The SDK requires outbound HTTPS access for API key authentication on initialization. +- ASR mode is recommended when the audio will be processed by a speech-to-text service. +- Human mode with voice boost is recommended when the audio will be heard by human listeners. +- The `enhancer_weight` parameter lets you blend between original and enhanced audio for fine-tuning. +- The filter introduces minimal latency (~6ms processing per 16ms audio chunk). +- Internal neural network state is maintained across chunks for seamless streaming. +- Call `reset_caches()` via the filter's `stop()`/`start()` cycle to reset state for a new audio stream.