Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -276,5 +276,19 @@ target_link_libraries(test-frameforge-new-features PRIVATE common)
add_test(NAME test-frameforge-new-features COMMAND $<TARGET_FILE:test-frameforge-new-features> WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/tests)
set_property(TEST test-frameforge-new-features PROPERTY LABELS "main")

# FrameForge Audio Capture Test
add_executable(test-frameforge-audio test-frameforge-audio.cpp
${CMAKE_SOURCE_DIR}/tools/frameforge/frameforge-audio.cpp)
target_include_directories(test-frameforge-audio PRIVATE ${CMAKE_SOURCE_DIR}/tools/frameforge ${CMAKE_SOURCE_DIR}/vendor)
target_link_libraries(test-frameforge-audio PRIVATE common)
# Add PortAudio if available
if(PORTAUDIO_FOUND)
target_include_directories(test-frameforge-audio PRIVATE ${PORTAUDIO_INCLUDE_DIRS})
target_link_libraries(test-frameforge-audio PRIVATE ${PORTAUDIO_LIBRARIES})
target_compile_definitions(test-frameforge-audio PRIVATE FRAMEFORGE_PORTAUDIO_SUPPORT)
endif()
Comment thread
TheOriginalBytePlayer marked this conversation as resolved.
add_test(NAME test-frameforge-audio COMMAND $<TARGET_FILE:test-frameforge-audio> WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/tests)
set_property(TEST test-frameforge-audio PROPERTY LABELS "main")

llama_build_and_test(test-alloc.cpp)
target_include_directories(test-alloc PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src)
127 changes: 127 additions & 0 deletions tests/test-frameforge-audio.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#include "../tools/frameforge/frameforge-audio.h"

#include <cassert>
#include <iostream>
#include <thread>
#include <chrono>

using namespace frameforge;

static void test_audio_config() {
std::cout << "Testing audio configuration..." << std::endl;

AudioConfig config;
assert(config.sample_rate == 16000);
assert(config.channels == 1);
assert(config.frames_per_buffer == 512);

// Custom config
AudioConfig custom;
custom.sample_rate = 44100;
custom.channels = 2;
custom.frames_per_buffer = 1024;

assert(custom.sample_rate == 44100);
assert(custom.channels == 2);
assert(custom.frames_per_buffer == 1024);

std::cout << " ✓ Audio configuration passed" << std::endl;
}

static void test_audio_capture_initialization() {
std::cout << "Testing audio capture initialization..." << std::endl;

AudioConfig config;
AudioCapture capture(config);

// Just test that we can create an instance
assert(!capture.is_capturing());

#ifdef FRAMEFORGE_PORTAUDIO_SUPPORT
std::cout << " PortAudio support is available" << std::endl;

// Try to initialize
bool init_result = capture.initialize();
if (init_result) {
std::cout << " ✓ Audio capture initialization succeeded" << std::endl;

// Follow typical usage pattern: start capturing before testing buffer operations.
capture.start();

// Test buffer operations
capture.clear_buffer();
std::vector<float> buffer = capture.get_audio_buffer();
assert(buffer.empty());
std::cout << " ✓ Buffer operations work" << std::endl;
Comment thread
TheOriginalBytePlayer marked this conversation as resolved.

// Stop capturing to complete the typical lifecycle.
capture.stop();
} else {
std::cout << " ! Audio capture initialization failed (this is OK if no audio device is available)" << std::endl;
}
#else
std::cout << " PortAudio support is not available" << std::endl;
std::cout << " ✓ Stub implementation works" << std::endl;
#endif
}

static void test_audio_callback() {
std::cout << "Testing audio callback..." << std::endl;

#ifdef FRAMEFORGE_PORTAUDIO_SUPPORT
AudioConfig config;
AudioCapture capture(config);

bool callback_called = false;
capture.set_callback([&callback_called](const std::vector<float> & data) {
callback_called = true;
std::cout << " Callback received " << data.size() << " samples" << std::endl;
});

if (capture.initialize()) {
if (capture.start()) {
std::cout << " Audio capture started, waiting for callback..." << std::endl;

// Wait for a short time to see if we get audio data
auto start_time = std::chrono::steady_clock::now();
while (!callback_called) {
std::this_thread::sleep_for(std::chrono::milliseconds(100));
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::steady_clock::now() - start_time
).count();

if (elapsed > 2) {
std::cout << " ! No callback received after 2 seconds (no audio input?)" << std::endl;
break;
}
}

capture.stop();

if (callback_called) {
std::cout << " ✓ Audio callback test passed" << std::endl;
} else {
std::cout << " ! Audio callback test completed (no audio detected)" << std::endl;
}
} else {
std::cout << " ! Could not start audio capture" << std::endl;
}
} else {
std::cout << " ! Could not initialize audio capture" << std::endl;
}
#else
std::cout << " PortAudio support not available, skipping callback test" << std::endl;
#endif
}

int main() {
std::cout << "Running FrameForge Audio Capture Tests" << std::endl;
std::cout << "======================================" << std::endl;

test_audio_config();
test_audio_capture_initialization();
test_audio_callback();

std::cout << "\nAll tests completed!" << std::endl;
return 0;
}
17 changes: 17 additions & 0 deletions tools/frameforge/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ add_executable(${TARGET}
frameforge-validator.cpp
frameforge-json.cpp
frameforge-ipc.cpp
frameforge-audio.cpp
)

target_include_directories(${TARGET} PRIVATE
Expand All @@ -32,6 +33,22 @@ else()
message(WARNING "Whisper not found at ${CMAKE_SOURCE_DIR}/external/whisper, frameforge-sidecar will build without Whisper support")
endif()

# PortAudio for live audio capture
find_package(PkgConfig)
if(PkgConfig_FOUND)
pkg_check_modules(PORTAUDIO portaudio-2.0)
if(PORTAUDIO_FOUND)
target_include_directories(${TARGET} PRIVATE ${PORTAUDIO_INCLUDE_DIRS})
target_link_libraries(${TARGET} PRIVATE ${PORTAUDIO_LIBRARIES})
target_compile_definitions(${TARGET} PRIVATE FRAMEFORGE_PORTAUDIO_SUPPORT)
message(STATUS "PortAudio found, enabling live audio capture")
else()
message(WARNING "PortAudio not found, frameforge-sidecar will build without live audio capture support")
endif()
else()
message(WARNING "PkgConfig not found, cannot detect PortAudio")
endif()

# Platform-specific libraries
if(WIN32)
# Windows-specific libraries
Expand Down
47 changes: 45 additions & 2 deletions tools/frameforge/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ This tool integrates Whisper.cpp for speech-to-text and Llama.cpp for intent cla
## Overview

The FrameForge Sidecar is a 64-bit resident process that:
1. Receives audio input (via file or IPC)
1. Receives audio input (via file, IPC, or live microphone capture with PortAudio)
2. Transcribes audio to text using Whisper
3. Classifies intent and extracts parameters using Llama
4. Validates commands against a strict schema
Expand Down Expand Up @@ -116,6 +116,30 @@ cmake --build build --config Release

The binary will be located at: `build/bin/frameforge-sidecar`

### Dependencies

**Required:**
- Llama.cpp (built-in)

**Optional:**
- Whisper.cpp - For speech-to-text transcription (recommended)
- PortAudio - For live microphone audio capture (recommended for production use)

To enable PortAudio support, install the development library before building:

**Ubuntu/Debian:**
```bash
sudo apt-get install portaudio19-dev
```

**macOS:**
```bash
brew install portaudio
```

**Windows:**
Download and install PortAudio from http://www.portaudio.com/

## Usage

### Test Mode (with audio file)
Expand All @@ -128,6 +152,24 @@ The binary will be located at: `build/bin/frameforge-sidecar`
--verbose
```

### Live Audio Capture Mode (with microphone)

**Requires both PortAudio and Whisper support (see Building section)**

```bash
./build/bin/frameforge-sidecar \
--whisper-model /path/to/whisper-model.bin \
--llama-model /path/to/llama-model.gguf \
--live-audio \
--verbose
```

This mode continuously captures audio from the default microphone using Voice Activity Detection (VAD). It automatically detects when speech begins and ends, then processes the audio when:
1. At least 500ms of speech is detected
2. Followed by 250ms of silence

The captured speech is then transcribed with Whisper, classified with Llama, and validated. The audio buffer is cleared after each processing cycle.

### Server Mode (IPC with Named Pipes)

```bash
Expand All @@ -139,9 +181,10 @@ The binary will be located at: `build/bin/frameforge-sidecar`

### Command-Line Options

- `-wm, --whisper-model FNAME` - Path to Whisper model file (required)
- `-wm, --whisper-model FNAME` - Path to Whisper model file (required if Whisper support is compiled)
- `-lm, --llama-model FNAME` - Path to Llama model file (required)
- `-a, --audio FILE` - Audio file to transcribe (for testing)
- `-la, --live-audio` - Enable live audio capture via PortAudio (requires PortAudio support)
- `-p, --pipe NAME` - Named pipe name (default: frameforge_pipe)
- `-vd, --verb-defs FILE` - Path to verb definitions JSON file (optional)
- `-t, --threads N` - Number of threads (default: 4)
Expand Down
Loading
Loading