diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c3d9f9c324f..c39539c143b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -248,5 +248,15 @@ get_filename_component(TEST_TARGET test-c.c NAME_WE) add_executable(${TEST_TARGET} test-c.c) target_link_libraries(${TEST_TARGET} PRIVATE llama) +# FrameForge validator tests +add_executable(test-frameforge-validator test-frameforge-validator.cpp + ${CMAKE_SOURCE_DIR}/tools/frameforge/frameforge-schema.cpp + ${CMAKE_SOURCE_DIR}/tools/frameforge/frameforge-validator.cpp + ${CMAKE_SOURCE_DIR}/tools/frameforge/frameforge-json.cpp) +target_include_directories(test-frameforge-validator PRIVATE ${CMAKE_SOURCE_DIR}/tools/frameforge ${CMAKE_SOURCE_DIR}/vendor) +target_link_libraries(test-frameforge-validator PRIVATE common) +add_test(NAME test-frameforge-validator COMMAND $) +set_property(TEST test-frameforge-validator PROPERTY LABELS "main") + llama_build_and_test(test-alloc.cpp) target_include_directories(test-alloc PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src) diff --git a/tests/test-frameforge-validator.cpp b/tests/test-frameforge-validator.cpp new file mode 100644 index 00000000000..b2588705599 --- /dev/null +++ b/tests/test-frameforge-validator.cpp @@ -0,0 +1,211 @@ +#include "../../tools/frameforge/frameforge-schema.h" +#include "../../tools/frameforge/frameforge-validator.h" +#include "../../tools/frameforge/frameforge-json.h" + +#include +#include +#include + +using namespace frameforge; + +static void test_verb_conversion() { + std::cout << "Testing verb conversion..." << std::endl; + + // Test basic verb + assert(string_to_verb("PAN") == Verb::PAN); + assert(verb_to_string(Verb::PAN) == "PAN"); + + // Test misspelling + assert(string_to_verb("PIN") == Verb::PAN); + + // Test case insensitivity + assert(string_to_verb("pan") == Verb::PAN); + assert(string_to_verb("Pan") == Verb::PAN); + + std::cout << " ✓ Verb conversion tests passed" << std::endl; +} + +static void test_action_group() { + std::cout << "Testing action group mapping..." << std::endl; + + assert(get_action_group_for_verb(Verb::PAN) == ActionGroup::CAMERA_CONTROL); + assert(get_action_group_for_verb(Verb::SET_POSE) == ActionGroup::ACTOR_POSE); + assert(get_action_group_for_verb(Verb::ADD) == ActionGroup::OBJECT_MGMT); + assert(get_action_group_for_verb(Verb::SHOT) == ActionGroup::SHOT_MGMT); + + std::cout << " ✓ Action group tests passed" << std::endl; +} + +static void test_required_parameters() { + std::cout << "Testing required parameters..." << std::endl; + + auto pan_params = get_required_parameters(Verb::PAN); + assert(pan_params.size() == 1); + assert(pan_params[0] == "direction"); + + auto lean_params = get_required_parameters(Verb::LEAN); + assert(lean_params.size() == 2); + + auto add_params = get_required_parameters(Verb::ADD); + assert(add_params.size() == 1); + assert(add_params[0] == "target"); + + std::cout << " ✓ Required parameters tests passed" << std::endl; +} + +static void test_valid_command() { + std::cout << "Testing valid command validation..." << std::endl; + + CommandValidator validator; + + // Create a valid PAN command + Command cmd; + cmd.verb = Verb::PAN; + cmd.subject = "Camera1"; + cmd.action_group = ActionGroup::CAMERA_CONTROL; + cmd.parameters.direction = Direction::LEFT; + + ValidationResult result = validator.validate(cmd); + assert(result.valid); + + std::cout << " ✓ Valid command test passed" << std::endl; +} + +static void test_missing_parameters() { + std::cout << "Testing missing parameter detection..." << std::endl; + + CommandValidator validator; + + // Create PAN command without direction + Command cmd; + cmd.verb = Verb::PAN; + cmd.subject = "Camera1"; + cmd.action_group = ActionGroup::CAMERA_CONTROL; + // Missing direction parameter + + ValidationResult result = validator.validate(cmd); + assert(!result.valid); + assert(!result.missing_parameters.empty()); + + std::cout << " ✓ Missing parameter test passed" << std::endl; +} + +static void test_json_parsing() { + std::cout << "Testing JSON parsing and validation..." << std::endl; + + CommandValidator validator; + + std::string json_str = R"({ + "verb": "PAN", + "subject": "Camera1", + "action_group": "CAMERA_CONTROL", + "parameters": { + "direction": "LEFT" + } + })"; + + Command cmd; + ValidationResult result = validator.validate_json(json_str, cmd); + + assert(result.valid); + assert(cmd.verb == Verb::PAN); + assert(cmd.subject == "Camera1"); + assert(cmd.parameters.direction.has_value()); + assert(cmd.parameters.direction.value() == Direction::LEFT); + + std::cout << " ✓ JSON parsing test passed" << std::endl; +} + +static void test_json_serialization() { + std::cout << "Testing JSON serialization..." << std::endl; + + Command cmd; + cmd.verb = Verb::PAN; + cmd.subject = "Camera1"; + cmd.action_group = ActionGroup::CAMERA_CONTROL; + cmd.parameters.direction = Direction::LEFT; + cmd.valid = true; + + std::string json = command_to_json(cmd); + + assert(!json.empty()); + assert(json.find("\"PAN\"") != std::string::npos); + assert(json.find("\"Camera1\"") != std::string::npos); + assert(json.find("\"LEFT\"") != std::string::npos); + + std::cout << " ✓ JSON serialization test passed" << std::endl; +} + +static void test_complex_command() { + std::cout << "Testing complex command with pose..." << std::endl; + + CommandValidator validator; + + std::string json_str = R"({ + "verb": "SET_POSE", + "subject": "Tom", + "action_group": "ACTOR_POSE", + "parameters": { + "pose_description": "arms crossed", + "joint_rotations": [ + {"name": "shoulder_left", "rotation_x": 0, "rotation_y": 45, "rotation_z": 0}, + {"name": "shoulder_right", "rotation_x": 0, "rotation_y": -45, "rotation_z": 0} + ] + } + })"; + + Command cmd; + ValidationResult result = validator.validate_json(json_str, cmd); + + assert(result.valid); + assert(cmd.verb == Verb::SET_POSE); + assert(cmd.subject == "Tom"); + assert(cmd.parameters.joint_rotations.has_value()); + assert(cmd.parameters.joint_rotations.value().size() == 2); + + std::cout << " ✓ Complex command test passed" << std::endl; +} + +static void test_clarification_request() { + std::cout << "Testing clarification request generation..." << std::endl; + + CommandValidator validator; + + Command cmd; + cmd.verb = Verb::PAN; + cmd.subject = "Camera1"; + cmd.action_group = ActionGroup::CAMERA_CONTROL; + + ValidationResult result = validator.validate(cmd); + assert(!result.valid); + + std::string clarification = validator.generate_clarification_request(result, cmd); + assert(!clarification.empty()); + assert(clarification.find("direction") != std::string::npos); + + std::cout << " ✓ Clarification request test passed" << std::endl; +} + +int main() { + std::cout << "Running FrameForge Validator Tests..." << std::endl; + std::cout << "======================================" << std::endl; + + try { + test_verb_conversion(); + test_action_group(); + test_required_parameters(); + test_valid_command(); + test_missing_parameters(); + test_json_parsing(); + test_json_serialization(); + test_complex_command(); + test_clarification_request(); + + std::cout << "======================================" << std::endl; + std::cout << "All tests passed! ✓" << std::endl; + return 0; + } catch (const std::exception & e) { + std::cerr << "Test failed with exception: " << e.what() << std::endl; + return 1; + } +} diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index cc584645f04..506de6043a2 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -39,4 +39,5 @@ else() add_subdirectory(export-lora) endif() add_subdirectory(fit-params) + add_subdirectory(frameforge) endif() diff --git a/tools/frameforge/CMakeLists.txt b/tools/frameforge/CMakeLists.txt new file mode 100644 index 00000000000..4be9177eec5 --- /dev/null +++ b/tools/frameforge/CMakeLists.txt @@ -0,0 +1,39 @@ +set(TARGET frameforge-sidecar) + +add_executable(${TARGET} + frameforge-sidecar.cpp + frameforge-schema.cpp + frameforge-validator.cpp + frameforge-json.cpp + frameforge-ipc.cpp +) + +target_include_directories(${TARGET} PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/include + ${CMAKE_SOURCE_DIR}/external/whisper/include + ${CMAKE_SOURCE_DIR}/common + ${CMAKE_SOURCE_DIR}/ggml/include +) + +target_link_libraries(${TARGET} PRIVATE + common + llama +) + +# Link Whisper library +# We need to add whisper as a subdirectory or link to it +# For now, we'll create a target for whisper +add_subdirectory(${CMAKE_SOURCE_DIR}/external/whisper ${CMAKE_BINARY_DIR}/whisper EXCLUDE_FROM_ALL) + +target_link_libraries(${TARGET} PRIVATE whisper) + +# Platform-specific libraries +if(WIN32) + # Windows-specific libraries +elseif(UNIX) + # Unix-specific libraries + target_link_libraries(${TARGET} PRIVATE pthread) +endif() + +install(TARGETS ${TARGET} RUNTIME) diff --git a/tools/frameforge/README.md b/tools/frameforge/README.md new file mode 100644 index 00000000000..0ec8507f375 --- /dev/null +++ b/tools/frameforge/README.md @@ -0,0 +1,189 @@ +# FrameForge Studio Voice Command Integration + +This tool integrates Whisper.cpp for speech-to-text and Llama.cpp for intent classification to provide voice command functionality for FrameForge Studio. + +## Overview + +The FrameForge Sidecar is a 64-bit resident process that: +1. Receives audio input (via file or IPC) +2. Transcribes audio to text using Whisper +3. Classifies intent and extracts parameters using Llama +4. Validates commands against a strict schema +5. Sends validated JSON commands to the 32-bit FrameForge Bridge via Named Pipes + +## Command Schema + +Commands follow a JSON-based schema with the following structure: + +```json +{ + "verb": "PAN", + "subject": "Camera1", + "action_group": "CAMERA_CONTROL", + "parameters": { + "direction": "LEFT", + "degrees": 45.0, + "speed": 10.0, + "target": "ObjectName", + "pose_description": "arms raised above head", + "joint_rotations": [ + {"name": "shoulder_left", "rotation_x": 0, "rotation_y": 90, "rotation_z": 0} + ] + } +} +``` + +### Action Groups + +- **CAMERA_CONTROL**: Camera movements (PAN, TILT, DOLLY, ZOOM, LEAN) +- **ACTOR_POSE**: Actor positioning (SET_POSE, ADJUST_POSE) +- **OBJECT_MGMT**: Object manipulation (ADD, DELETE, MOVE, ROTATE) +- **SHOT_MGMT**: Shot management (SHOT, SAVE_SHOT, LOAD_SHOT) + +### Verbs and Required Parameters + +| Verb | Required Parameters | +|------|---------------------| +| PAN | direction | +| TILT | direction | +| DOLLY | direction, speed | +| ZOOM | direction | +| LEAN | direction, degrees | +| SET_POSE | pose_description | +| ADJUST_POSE | pose_description | +| ADD | target | +| DELETE | target | +| MOVE | target, direction | +| ROTATE | target, degrees | +| SHOT | target | +| SAVE_SHOT | target | +| LOAD_SHOT | target | + +## Building + +The tool is built as part of the llama.cpp build process: + +```bash +cmake -B build +cmake --build build --config Release +``` + +The binary will be located at: `build/bin/frameforge-sidecar` + +## Usage + +### Test Mode (with audio file) + +```bash +./build/bin/frameforge-sidecar \ + --whisper-model /path/to/whisper-model.bin \ + --llama-model /path/to/llama-model.gguf \ + --audio /path/to/audio.wav \ + --verbose +``` + +### Server Mode (IPC with Named Pipes) + +```bash +./build/bin/frameforge-sidecar \ + --whisper-model /path/to/whisper-model.bin \ + --llama-model /path/to/llama-model.gguf \ + --pipe frameforge_pipe +``` + +### Command-Line Options + +- `-wm, --whisper-model FNAME` - Path to Whisper model file (required) +- `-lm, --llama-model FNAME` - Path to Llama model file (required) +- `-a, --audio FILE` - Audio file to transcribe (for testing) +- `-p, --pipe NAME` - Named pipe name (default: frameforge_pipe) +- `-t, --threads N` - Number of threads (default: 4) +- `-v, --verbose` - Enable verbose output +- `-h, --help` - Show help message + +## Architecture + +### Components + +1. **frameforge-schema**: Defines the command schema, action groups, verbs, and parameters +2. **frameforge-validator**: CommandValidator class for validating commands +3. **frameforge-json**: JSON serialization/deserialization utilities +4. **frameforge-ipc**: IPC server/client using Named Pipes +5. **frameforge-sidecar**: Main application integrating Whisper and Llama + +### Intent Classification + +The system uses a strict system prompt for Llama to classify intents: + +- Maps natural language to specific verbs +- Handles common misspellings (e.g., "PIN LEFT" → "PAN LEFT") +- Extracts parameters from context +- Generates joint rotation arrays for pose descriptions +- Returns only valid JSON + +### Validation + +The CommandValidator checks: +- Verb is valid and recognized +- Action group matches the verb +- All required parameters are present +- Parameter values are within valid ranges +- Subject is not empty + +If validation fails, the system generates a clarification request asking the user for missing information. + +### IPC Communication + +Named Pipes provide high-speed communication: +- **Windows**: `\\.\pipe\frameforge_pipe` +- **Unix/Linux**: `/tmp/frameforge_pipe` + +Messages are length-prefixed (4-byte size + payload) for reliable streaming. + +## Example Voice Commands + +- "Pan the camera left" +- "Tilt camera 1 up 30 degrees" +- "Add a chair to the scene" +- "Set Tom's pose to arms crossed" +- "Move the table forward" +- "Save shot as establishing" + +## Models + +### Recommended Models + +**Whisper**: +- `whisper-base.en` - Fast, English-only +- `whisper-small.en` - Better accuracy +- Download from: https://huggingface.co/ggerganov/whisper.cpp + +**Llama**: +- `llama-3-8b-instruct.gguf` - Good balance of speed and accuracy +- `llama-3.1-8b-instruct.gguf` - Latest version with better instruction following +- Download from: https://huggingface.co/models + +## Development + +### Adding New Verbs + +1. Add the verb to the `Verb` enum in `frameforge-schema.h` +2. Update `string_to_verb()` in `frameforge-schema.cpp` +3. Add to the appropriate action group in `get_action_group_for_verb()` +4. Define required parameters in `get_required_parameters()` + +### Testing + +Create test audio files with commands and run in test mode: + +```bash +./build/bin/frameforge-sidecar \ + -wm whisper-base.en.bin \ + -lm llama-3-8b-instruct.gguf \ + -a test_command.wav \ + -v +``` + +## License + +This tool is part of llama.cpp and follows the same MIT license. diff --git a/tools/frameforge/frameforge-ipc.cpp b/tools/frameforge/frameforge-ipc.cpp new file mode 100644 index 00000000000..8bfdffc46eb --- /dev/null +++ b/tools/frameforge/frameforge-ipc.cpp @@ -0,0 +1,362 @@ +#include "frameforge-ipc.h" + +#include +#include +#include + +#ifdef _WIN32 +#include +#else +#include +#include +#include +#include +#include +#endif + +namespace frameforge { + +// IPC constants +constexpr size_t MAX_MESSAGE_SIZE = 1024 * 1024; // 1MB +constexpr int MAX_PIPE_INSTANCES = 1; +constexpr int PIPE_BUFFER_SIZE = 4096; + +// IPCServer implementation + +IPCServer::IPCServer(const std::string & pipe_name) + : pipe_name_(pipe_name) + , running_(false) + , message_callback_(nullptr) +#ifdef _WIN32 + , pipe_handle_(INVALID_HANDLE_VALUE) +#else + , pipe_fd_(-1) +#endif +{ +} + +IPCServer::~IPCServer() { + stop(); +} + +bool IPCServer::start() { + if (running_) { + return false; + } + + if (!create_pipe()) { + return false; + } + + running_ = true; + return true; +} + +void IPCServer::stop() { + if (!running_) { + return; + } + + running_ = false; + close_pipe(); +} + +bool IPCServer::send_message(const std::string & message) { + if (!running_) { + return false; + } + +#ifdef _WIN32 + if (pipe_handle_ == INVALID_HANDLE_VALUE) { + return false; + } + + DWORD bytes_written; + uint32_t msg_size = static_cast(message.size()); + + // Write message size first + if (!WriteFile(pipe_handle_, &msg_size, sizeof(msg_size), &bytes_written, NULL)) { + return false; + } + + // Write message data + if (!WriteFile(pipe_handle_, message.c_str(), msg_size, &bytes_written, NULL)) { + return false; + } + + FlushFileBuffers(pipe_handle_); + return true; +#else + if (pipe_fd_ < 0) { + return false; + } + + uint32_t msg_size = static_cast(message.size()); + + // Write message size first + if (write(pipe_fd_, &msg_size, sizeof(msg_size)) != sizeof(msg_size)) { + return false; + } + + // Write message data + if (write(pipe_fd_, message.c_str(), msg_size) != static_cast(msg_size)) { + return false; + } + + return true; +#endif +} + +void IPCServer::set_message_callback(std::function callback) { + message_callback_ = callback; +} + +#ifdef _WIN32 + +bool IPCServer::create_pipe() { + std::string pipe_path = "\\\\.\\pipe\\" + pipe_name_; + + pipe_handle_ = CreateNamedPipeA(pipe_path.c_str(), PIPE_ACCESS_DUPLEX, + PIPE_TYPE_MESSAGE | PIPE_READMODE_MESSAGE | PIPE_WAIT, MAX_PIPE_INSTANCES, + PIPE_BUFFER_SIZE, // out buffer size + PIPE_BUFFER_SIZE, // in buffer size + 0, // default timeout + NULL); + + if (pipe_handle_ == INVALID_HANDLE_VALUE) { + std::cerr << "Failed to create named pipe: " << GetLastError() << std::endl; + return false; + } + + return true; +} + +void IPCServer::close_pipe() { + if (pipe_handle_ != INVALID_HANDLE_VALUE) { + DisconnectNamedPipe(pipe_handle_); + CloseHandle(pipe_handle_); + pipe_handle_ = INVALID_HANDLE_VALUE; + } +} + +#else + +bool IPCServer::create_pipe() { + std::string pipe_path = "/tmp/" + pipe_name_; + + // Remove existing pipe if it exists + unlink(pipe_path.c_str()); + + // Create FIFO (named pipe) + if (mkfifo(pipe_path.c_str(), 0666) != 0) { + std::cerr << "Failed to create named pipe: " << strerror(errno) << std::endl; + return false; + } + + // Open pipe for reading and writing (non-blocking initially) + pipe_fd_ = open(pipe_path.c_str(), O_RDWR | O_NONBLOCK); + if (pipe_fd_ < 0) { + std::cerr << "Failed to open named pipe: " << strerror(errno) << std::endl; + unlink(pipe_path.c_str()); + return false; + } + + return true; +} + +void IPCServer::close_pipe() { + if (pipe_fd_ >= 0) { + close(pipe_fd_); + pipe_fd_ = -1; + + std::string pipe_path = "/tmp/" + pipe_name_; + unlink(pipe_path.c_str()); + } +} + +#endif + +// IPCClient implementation + +IPCClient::IPCClient(const std::string & pipe_name) + : pipe_name_(pipe_name) + , connected_(false) +#ifdef _WIN32 + , pipe_handle_(INVALID_HANDLE_VALUE) +#else + , pipe_fd_(-1) +#endif +{ +} + +IPCClient::~IPCClient() { + disconnect(); +} + +bool IPCClient::connect() { + if (connected_) { + return false; + } + +#ifdef _WIN32 + std::string pipe_path = "\\\\.\\pipe\\" + pipe_name_; + + // Try to connect to the pipe + pipe_handle_ = CreateFileA( + pipe_path.c_str(), + GENERIC_READ | GENERIC_WRITE, + 0, + NULL, + OPEN_EXISTING, + 0, + NULL + ); + + if (pipe_handle_ == INVALID_HANDLE_VALUE) { + std::cerr << "Failed to connect to pipe: " << GetLastError() << std::endl; + return false; + } + + // Set pipe to message-read mode + DWORD mode = PIPE_READMODE_MESSAGE; + if (!SetNamedPipeHandleState(pipe_handle_, &mode, NULL, NULL)) { + std::cerr << "Failed to set pipe mode: " << GetLastError() << std::endl; + CloseHandle(pipe_handle_); + pipe_handle_ = INVALID_HANDLE_VALUE; + return false; + } +#else + std::string pipe_path = "/tmp/" + pipe_name_; + + pipe_fd_ = open(pipe_path.c_str(), O_RDWR); + if (pipe_fd_ < 0) { + std::cerr << "Failed to connect to pipe: " << strerror(errno) << std::endl; + return false; + } +#endif + + connected_ = true; + return true; +} + +void IPCClient::disconnect() { + if (!connected_) { + return; + } + +#ifdef _WIN32 + if (pipe_handle_ != INVALID_HANDLE_VALUE) { + CloseHandle(pipe_handle_); + pipe_handle_ = INVALID_HANDLE_VALUE; + } +#else + if (pipe_fd_ >= 0) { + close(pipe_fd_); + pipe_fd_ = -1; + } +#endif + + connected_ = false; +} + +bool IPCClient::send_message(const std::string & message) { + if (!connected_) { + return false; + } + +#ifdef _WIN32 + if (pipe_handle_ == INVALID_HANDLE_VALUE) { + return false; + } + + DWORD bytes_written; + uint32_t msg_size = static_cast(message.size()); + + // Write message size first + if (!WriteFile(pipe_handle_, &msg_size, sizeof(msg_size), &bytes_written, NULL)) { + return false; + } + + // Write message data + if (!WriteFile(pipe_handle_, message.c_str(), msg_size, &bytes_written, NULL)) { + return false; + } + + return true; +#else + if (pipe_fd_ < 0) { + return false; + } + + uint32_t msg_size = static_cast(message.size()); + + // Write message size first + if (write(pipe_fd_, &msg_size, sizeof(msg_size)) != sizeof(msg_size)) { + return false; + } + + // Write message data + if (write(pipe_fd_, message.c_str(), msg_size) != static_cast(msg_size)) { + return false; + } + + return true; +#endif +} + +std::string IPCClient::receive_message() { + if (!connected_) { + return ""; + } + +#ifdef _WIN32 + if (pipe_handle_ == INVALID_HANDLE_VALUE) { + return ""; + } + + DWORD bytes_read; + uint32_t msg_size = 0; + + // Read message size first + if (!ReadFile(pipe_handle_, &msg_size, sizeof(msg_size), &bytes_read, NULL)) { + return ""; + } + + if (msg_size == 0 || msg_size > MAX_MESSAGE_SIZE) { + return ""; + } + + // Read message data + std::string message(msg_size, '\0'); + if (!ReadFile(pipe_handle_, &message[0], msg_size, &bytes_read, NULL)) { + return ""; + } + + return message; +#else + if (pipe_fd_ < 0) { + return ""; + } + + uint32_t msg_size = 0; + + // Read message size first + if (read(pipe_fd_, &msg_size, sizeof(msg_size)) != sizeof(msg_size)) { + return ""; + } + + if (msg_size == 0 || msg_size > MAX_MESSAGE_SIZE) { + return ""; + } + + // Read message data + std::string message(msg_size, '\0'); + if (read(pipe_fd_, &message[0], msg_size) != static_cast(msg_size)) { + return ""; + } + + return message; +#endif +} + +} // namespace frameforge diff --git a/tools/frameforge/frameforge-ipc.h b/tools/frameforge/frameforge-ipc.h new file mode 100644 index 00000000000..aba1a7385cf --- /dev/null +++ b/tools/frameforge/frameforge-ipc.h @@ -0,0 +1,82 @@ +#ifndef FRAMEFORGE_IPC_H +#define FRAMEFORGE_IPC_H + +#include +#include + +namespace frameforge { + +// IPC Server for Named Pipes +class IPCServer { +public: + IPCServer(const std::string & pipe_name); + ~IPCServer(); + + // Start the IPC server + bool start(); + + // Stop the IPC server + void stop(); + + // Check if server is running + bool is_running() const { return running_; } + + // Send a message through the pipe + bool send_message(const std::string & message); + + // Set callback for received messages + void set_message_callback(std::function callback); + +private: + std::string pipe_name_; + bool running_; + std::function message_callback_; + +#ifdef _WIN32 + void * pipe_handle_; // HANDLE on Windows + void server_loop_windows(); +#else + int pipe_fd_; + void server_loop_unix(); +#endif + + // Platform-specific implementations + bool create_pipe(); + void close_pipe(); +}; + +// IPC Client for Named Pipes +class IPCClient { +public: + IPCClient(const std::string & pipe_name); + ~IPCClient(); + + // Connect to the IPC server + bool connect(); + + // Disconnect from the server + void disconnect(); + + // Check if connected + bool is_connected() const { return connected_; } + + // Send a message through the pipe + bool send_message(const std::string & message); + + // Receive a message from the pipe (blocking) + std::string receive_message(); + +private: + std::string pipe_name_; + bool connected_; + +#ifdef _WIN32 + void * pipe_handle_; // HANDLE on Windows +#else + int pipe_fd_; +#endif +}; + +} // namespace frameforge + +#endif // FRAMEFORGE_IPC_H diff --git a/tools/frameforge/frameforge-json.cpp b/tools/frameforge/frameforge-json.cpp new file mode 100644 index 00000000000..c60dfd66801 --- /dev/null +++ b/tools/frameforge/frameforge-json.cpp @@ -0,0 +1,139 @@ +#include "frameforge-json.h" + +#include "../../vendor/nlohmann/json.hpp" + +using json = nlohmann::json; + +namespace frameforge { + +std::string command_to_json(const Command & cmd) { + json j; + + j["verb"] = verb_to_string(cmd.verb); + j["subject"] = cmd.subject; + j["action_group"] = action_group_to_string(cmd.action_group); + j["valid"] = cmd.valid; + + if (!cmd.error_message.empty()) { + j["error_message"] = cmd.error_message; + } + + json params = json::object(); + + if (cmd.parameters.direction.has_value()) { + params["direction"] = direction_to_string(cmd.parameters.direction.value()); + } + + if (cmd.parameters.degrees.has_value()) { + params["degrees"] = cmd.parameters.degrees.value(); + } + + if (cmd.parameters.speed.has_value()) { + params["speed"] = cmd.parameters.speed.value(); + } + + if (cmd.parameters.target.has_value()) { + params["target"] = cmd.parameters.target.value(); + } + + if (cmd.parameters.pose_description.has_value()) { + params["pose_description"] = cmd.parameters.pose_description.value(); + } + + if (cmd.parameters.joint_rotations.has_value()) { + json joints = json::array(); + for (const auto & joint : cmd.parameters.joint_rotations.value()) { + json joint_json; + joint_json["name"] = joint.name; + joint_json["rotation_x"] = joint.rotation_x; + joint_json["rotation_y"] = joint.rotation_y; + joint_json["rotation_z"] = joint.rotation_z; + joints.push_back(joint_json); + } + params["joint_rotations"] = joints; + } + + if (cmd.parameters.additional_params.has_value()) { + for (const auto & [key, value] : cmd.parameters.additional_params.value()) { + params[key] = value; + } + } + + j["parameters"] = params; + + return j.dump(2); +} + +Command json_to_command(const std::string & json_str) { + Command cmd; + + try { + json j = json::parse(json_str); + + cmd.verb = string_to_verb(j.value("verb", "")); + cmd.subject = j.value("subject", ""); + cmd.action_group = string_to_action_group(j.value("action_group", "")); + cmd.valid = j.value("valid", false); + cmd.error_message = j.value("error_message", ""); + + if (j.contains("parameters")) { + json params = j["parameters"]; + + if (params.contains("direction")) { + cmd.parameters.direction = string_to_direction(params["direction"].get()); + } + + if (params.contains("degrees")) { + cmd.parameters.degrees = params["degrees"].get(); + } + + if (params.contains("speed")) { + cmd.parameters.speed = params["speed"].get(); + } + + if (params.contains("target")) { + cmd.parameters.target = params["target"].get(); + } + + if (params.contains("pose_description")) { + cmd.parameters.pose_description = params["pose_description"].get(); + } + + if (params.contains("joint_rotations")) { + std::vector joints; + for (const auto & joint_json : params["joint_rotations"]) { + Joint joint; + joint.name = joint_json["name"].get(); + joint.rotation_x = joint_json.value("rotation_x", 0.0f); + joint.rotation_y = joint_json.value("rotation_y", 0.0f); + joint.rotation_z = joint_json.value("rotation_z", 0.0f); + joints.push_back(joint); + } + cmd.parameters.joint_rotations = joints; + } + } + + } catch (const std::exception & e) { + cmd.valid = false; + cmd.error_message = std::string("Error parsing JSON: ") + e.what(); + } + + return cmd; +} + +std::string validation_error_to_json( + const std::string & error_message, + const std::vector & missing_params +) { + json j; + j["error"] = error_message; + j["valid"] = false; + + if (!missing_params.empty()) { + j["missing_parameters"] = missing_params; + } + + return j.dump(2); +} + +} // namespace frameforge diff --git a/tools/frameforge/frameforge-json.h b/tools/frameforge/frameforge-json.h new file mode 100644 index 00000000000..61533163b3a --- /dev/null +++ b/tools/frameforge/frameforge-json.h @@ -0,0 +1,21 @@ +#ifndef FRAMEFORGE_JSON_H +#define FRAMEFORGE_JSON_H + +#include "frameforge-schema.h" +#include + +namespace frameforge { + +// Convert Command to JSON string +std::string command_to_json(const Command & cmd); + +// Parse JSON string to Command +Command json_to_command(const std::string & json_str); + +// Convert ValidationResult to JSON error object +std::string validation_error_to_json(const std::string & error_message, + const std::vector & missing_params); + +} // namespace frameforge + +#endif // FRAMEFORGE_JSON_H diff --git a/tools/frameforge/frameforge-schema.cpp b/tools/frameforge/frameforge-schema.cpp new file mode 100644 index 00000000000..8a4221dd366 --- /dev/null +++ b/tools/frameforge/frameforge-schema.cpp @@ -0,0 +1,172 @@ +#include "frameforge-schema.h" +#include +#include + +namespace frameforge { + +// Convert string to uppercase for case-insensitive comparison +static std::string to_upper(const std::string & str) { + std::string result = str; + std::transform(result.begin(), result.end(), result.begin(), + [](unsigned char c) { return std::toupper(c); }); + return result; +} + +std::string action_group_to_string(ActionGroup group) { + switch (group) { + case ActionGroup::CAMERA_CONTROL: return "CAMERA_CONTROL"; + case ActionGroup::ACTOR_POSE: return "ACTOR_POSE"; + case ActionGroup::OBJECT_MGMT: return "OBJECT_MGMT"; + case ActionGroup::SHOT_MGMT: return "SHOT_MGMT"; + case ActionGroup::UNKNOWN: return "UNKNOWN"; + } + return "UNKNOWN"; +} + +ActionGroup string_to_action_group(const std::string & str) { + std::string upper = to_upper(str); + if (upper == "CAMERA_CONTROL") return ActionGroup::CAMERA_CONTROL; + if (upper == "ACTOR_POSE") return ActionGroup::ACTOR_POSE; + if (upper == "OBJECT_MGMT") return ActionGroup::OBJECT_MGMT; + if (upper == "SHOT_MGMT") return ActionGroup::SHOT_MGMT; + return ActionGroup::UNKNOWN; +} + +std::string verb_to_string(Verb verb) { + switch (verb) { + case Verb::PAN: return "PAN"; + case Verb::TILT: return "TILT"; + case Verb::DOLLY: return "DOLLY"; + case Verb::ZOOM: return "ZOOM"; + case Verb::LEAN: return "LEAN"; + case Verb::SET_POSE: return "SET_POSE"; + case Verb::ADJUST_POSE: return "ADJUST_POSE"; + case Verb::ADD: return "ADD"; + case Verb::DELETE: return "DELETE"; + case Verb::MOVE: return "MOVE"; + case Verb::ROTATE: return "ROTATE"; + case Verb::SHOT: return "SHOT"; + case Verb::SAVE_SHOT: return "SAVE_SHOT"; + case Verb::LOAD_SHOT: return "LOAD_SHOT"; + case Verb::UNKNOWN: return "UNKNOWN"; + } + return "UNKNOWN"; +} + +Verb string_to_verb(const std::string & str) { + std::string upper = to_upper(str); + // Handle common misspellings/alternatives + if (upper == "PIN" || upper == "PAN") return Verb::PAN; + if (upper == "TILT") return Verb::TILT; + if (upper == "DOLLY") return Verb::DOLLY; + if (upper == "ZOOM") return Verb::ZOOM; + if (upper == "LEAN") return Verb::LEAN; + if (upper == "SET_POSE") return Verb::SET_POSE; + if (upper == "ADJUST_POSE") return Verb::ADJUST_POSE; + if (upper == "ADD") return Verb::ADD; + if (upper == "DELETE" || upper == "REMOVE") return Verb::DELETE; + if (upper == "MOVE") return Verb::MOVE; + if (upper == "ROTATE") return Verb::ROTATE; + if (upper == "SHOT") return Verb::SHOT; + if (upper == "SAVE_SHOT") return Verb::SAVE_SHOT; + if (upper == "LOAD_SHOT") return Verb::LOAD_SHOT; + return Verb::UNKNOWN; +} + +std::string direction_to_string(Direction dir) { + switch (dir) { + case Direction::LEFT: return "LEFT"; + case Direction::RIGHT: return "RIGHT"; + case Direction::UP: return "UP"; + case Direction::DOWN: return "DOWN"; + case Direction::FORWARD: return "FORWARD"; + case Direction::BACKWARD: return "BACKWARD"; + case Direction::UNKNOWN: return "UNKNOWN"; + } + return "UNKNOWN"; +} + +Direction string_to_direction(const std::string & str) { + std::string upper = to_upper(str); + if (upper == "LEFT") return Direction::LEFT; + if (upper == "RIGHT") return Direction::RIGHT; + if (upper == "UP") return Direction::UP; + if (upper == "DOWN") return Direction::DOWN; + if (upper == "FORWARD") return Direction::FORWARD; + if (upper == "BACKWARD") return Direction::BACKWARD; + return Direction::UNKNOWN; +} + +ActionGroup get_action_group_for_verb(Verb verb) { + switch (verb) { + case Verb::PAN: + case Verb::TILT: + case Verb::DOLLY: + case Verb::ZOOM: + case Verb::LEAN: + return ActionGroup::CAMERA_CONTROL; + + case Verb::SET_POSE: + case Verb::ADJUST_POSE: + return ActionGroup::ACTOR_POSE; + + case Verb::ADD: + case Verb::DELETE: + case Verb::MOVE: + case Verb::ROTATE: + return ActionGroup::OBJECT_MGMT; + + case Verb::SHOT: + case Verb::SAVE_SHOT: + case Verb::LOAD_SHOT: + return ActionGroup::SHOT_MGMT; + + case Verb::UNKNOWN: + return ActionGroup::UNKNOWN; + } + return ActionGroup::UNKNOWN; +} + +std::vector get_required_parameters(Verb verb) { + switch (verb) { + case Verb::PAN: + case Verb::TILT: + return {"direction"}; + + case Verb::DOLLY: + return {"direction", "speed"}; + + case Verb::ZOOM: + return {"direction"}; + + case Verb::LEAN: + return {"direction", "degrees"}; + + case Verb::SET_POSE: + case Verb::ADJUST_POSE: + return {"pose_description"}; + + case Verb::ADD: + return {"target"}; + + case Verb::DELETE: + return {"target"}; + + case Verb::MOVE: + return {"target", "direction"}; + + case Verb::ROTATE: + return {"target", "degrees"}; + + case Verb::SHOT: + case Verb::SAVE_SHOT: + case Verb::LOAD_SHOT: + return {"target"}; + + case Verb::UNKNOWN: + return {}; + } + return {}; +} + +} // namespace frameforge diff --git a/tools/frameforge/frameforge-schema.h b/tools/frameforge/frameforge-schema.h new file mode 100644 index 00000000000..ebabf8f11bf --- /dev/null +++ b/tools/frameforge/frameforge-schema.h @@ -0,0 +1,105 @@ +#ifndef FRAMEFORGE_SCHEMA_H +#define FRAMEFORGE_SCHEMA_H + +#include +#include +#include +#include + +namespace frameforge { + +// Action Groups +enum class ActionGroup { + CAMERA_CONTROL, + ACTOR_POSE, + OBJECT_MGMT, + SHOT_MGMT, + UNKNOWN +}; + +// Command Verbs +enum class Verb { + // Camera Control + PAN, + TILT, + DOLLY, + ZOOM, + LEAN, + + // Actor Pose + SET_POSE, + ADJUST_POSE, + + // Object Management + ADD, + DELETE, + MOVE, + ROTATE, + + // Shot Management + SHOT, + SAVE_SHOT, + LOAD_SHOT, + + UNKNOWN +}; + +// Direction enum +enum class Direction { + LEFT, + RIGHT, + UP, + DOWN, + FORWARD, + BACKWARD, + UNKNOWN +}; + +// Joint definition for pose descriptions +struct Joint { + std::string name; // e.g., "shoulder_left", "elbow_right" + float rotation_x; // rotation in degrees + float rotation_y; + float rotation_z; +}; + +// Command Parameters structure +struct CommandParameters { + std::optional direction; + std::optional degrees; + std::optional speed; + std::optional target; + std::optional pose_description; + std::optional> joint_rotations; + std::optional> additional_params; +}; + +// Main Command structure +struct Command { + Verb verb; + std::string subject; // e.g., "Camera1", "Tom" + ActionGroup action_group; + CommandParameters parameters; + bool valid; + std::string error_message; +}; + +// Helper functions to convert enums to/from strings +std::string action_group_to_string(ActionGroup group); +ActionGroup string_to_action_group(const std::string & str); + +std::string verb_to_string(Verb verb); +Verb string_to_verb(const std::string & str); + +std::string direction_to_string(Direction dir); +Direction string_to_direction(const std::string & str); + +// Get action group for a verb +ActionGroup get_action_group_for_verb(Verb verb); + +// Get required parameters for a verb +std::vector get_required_parameters(Verb verb); + +} // namespace frameforge + +#endif // FRAMEFORGE_SCHEMA_H diff --git a/tools/frameforge/frameforge-sidecar.cpp b/tools/frameforge/frameforge-sidecar.cpp new file mode 100644 index 00000000000..966a479b4c7 --- /dev/null +++ b/tools/frameforge/frameforge-sidecar.cpp @@ -0,0 +1,407 @@ +#include "../../common/common.h" +#include "../../external/whisper/include/whisper.h" +#include "frameforge-ipc.h" +#include "frameforge-json.h" +#include "frameforge-schema.h" +#include "frameforge-validator.h" +#include "llama.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// System prompt for Llama intent classification +static const char * INTENT_SYSTEM_PROMPT = R"(You are an intent classifier for FrameForge Studio, a professional previsualization software. + +Analyze user voice commands and map them to one of these Action Groups: +- CAMERA_CONTROL: Camera movements (pan, tilt, dolly, zoom, lean) +- ACTOR_POSE: Actor positioning and poses +- OBJECT_MGMT: Adding, deleting, moving, or rotating objects +- SHOT_MGMT: Managing shots (save, load) + +Extract parameters from the user's natural language input: +- Direction: LEFT, RIGHT, UP, DOWN, FORWARD, BACKWARD +- Degrees: Numeric values for rotation (0-360) +- Speed: Numeric values for movement speed (0-100) +- Target: Names of objects, cameras, or actors +- PoseDescription: Natural language description of a pose + +Important rules: +1. If user says "PIN", map it to "PAN" verb +2. If Action Group is ACTOR_POSE, generate a JSON array of joint rotations for the described pose +3. Infer missing subjects when context is clear (e.g., "camera" for camera commands) +4. Return ONLY a valid JSON object with this structure: +{ + "verb": "VERB_NAME", + "subject": "SubjectName", + "action_group": "ACTION_GROUP", + "parameters": { + "direction": "DIRECTION", + "degrees": 45.0, + "speed": 10.0, + "target": "ObjectName", + "pose_description": "description", + "joint_rotations": [{"name": "shoulder_left", "rotation_x": 0, "rotation_y": 45, "rotation_z": 0}] + } +} + +Do not include explanations, only the JSON object.)"; + +struct frameforge_params { + std::string whisper_model; + std::string llama_model; + std::string audio_file; + std::string pipe_name = "frameforge_pipe"; + int n_threads = 4; + bool verbose = false; +}; + +static void print_usage(const char * argv0) { + fprintf(stderr, "Usage: %s [options]\n", argv0); + fprintf(stderr, "Options:\n"); + fprintf(stderr, " -wm, --whisper-model FNAME Path to Whisper model file\n"); + fprintf(stderr, " -lm, --llama-model FNAME Path to Llama model file\n"); + fprintf(stderr, " -a, --audio FILE Audio file to transcribe (for testing)\n"); + fprintf(stderr, " -p, --pipe NAME Named pipe name (default: frameforge_pipe)\n"); + fprintf(stderr, " -t, --threads N Number of threads (default: 4)\n"); + fprintf(stderr, " -v, --verbose Enable verbose output\n"); + fprintf(stderr, " -h, --help Show this help message\n"); +} + +static bool parse_params(int argc, char ** argv, frameforge_params & params) { + for (int i = 1; i < argc; i++) { + std::string arg = argv[i]; + + if (arg == "-wm" || arg == "--whisper-model") { + if (i + 1 < argc) { + params.whisper_model = argv[++i]; + } else { + fprintf(stderr, "Error: Missing value for %s\n", arg.c_str()); + return false; + } + } else if (arg == "-lm" || arg == "--llama-model") { + if (i + 1 < argc) { + params.llama_model = argv[++i]; + } else { + fprintf(stderr, "Error: Missing value for %s\n", arg.c_str()); + return false; + } + } else if (arg == "-a" || arg == "--audio") { + if (i + 1 < argc) { + params.audio_file = argv[++i]; + } else { + fprintf(stderr, "Error: Missing value for %s\n", arg.c_str()); + return false; + } + } else if (arg == "-p" || arg == "--pipe") { + if (i + 1 < argc) { + params.pipe_name = argv[++i]; + } else { + fprintf(stderr, "Error: Missing value for %s\n", arg.c_str()); + return false; + } + } else if (arg == "-t" || arg == "--threads") { + if (i + 1 < argc) { + params.n_threads = std::stoi(argv[++i]); + } else { + fprintf(stderr, "Error: Missing value for %s\n", arg.c_str()); + return false; + } + } else if (arg == "-v" || arg == "--verbose") { + params.verbose = true; + } else if (arg == "-h" || arg == "--help") { + print_usage(argv[0]); + return false; + } else { + fprintf(stderr, "Error: Unknown argument: %s\n", arg.c_str()); + print_usage(argv[0]); + return false; + } + } + + if (params.whisper_model.empty()) { + fprintf(stderr, "Error: Whisper model path is required\n"); + return false; + } + + if (params.llama_model.empty()) { + fprintf(stderr, "Error: Llama model path is required\n"); + return false; + } + + return true; +} + +// Read WAV file and return PCM audio data +static bool read_wav(const std::string & fname, std::vector & pcmf32, int & sample_rate) { + // Simple WAV reader - assumes 16-bit PCM + FILE * f = fopen(fname.c_str(), "rb"); + if (!f) { + fprintf(stderr, "Error: Failed to open audio file: %s\n", fname.c_str()); + return false; + } + + constexpr int WAV_HEADER_SIZE = 44; + constexpr int WAV_SAMPLE_RATE_OFFSET = 24; + + char buf[256]; + size_t bytes_read = fread(buf, 1, WAV_HEADER_SIZE, f); + if (bytes_read != WAV_HEADER_SIZE) { + fprintf(stderr, "Error: Failed to read WAV header\n"); + fclose(f); + return false; + } + + // Get sample rate from header (offset 24 in WAV spec) + sample_rate = *(int32_t *) (buf + WAV_SAMPLE_RATE_OFFSET); + + // Read audio data + std::vector pcm16; + int16_t sample; + while (fread(&sample, sizeof(int16_t), 1, f) == 1) { + pcm16.push_back(sample); + } + + fclose(f); + + // Convert to float + pcmf32.resize(pcm16.size()); + for (size_t i = 0; i < pcm16.size(); i++) { + pcmf32[i] = static_cast(pcm16[i]) / 32768.0f; + } + + return true; +} + +// Transcribe audio using Whisper +static std::string transcribe_audio(whisper_context * wctx, const std::vector & pcmf32, bool verbose) { + if (!wctx) { + return ""; + } + + whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY); + wparams.print_progress = verbose; + wparams.print_timestamps = false; + wparams.print_special = false; + wparams.translate = false; + wparams.language = "en"; + wparams.n_threads = 4; + + if (whisper_full(wctx, wparams, pcmf32.data(), pcmf32.size()) != 0) { + fprintf(stderr, "Error: Failed to process audio\n"); + return ""; + } + + std::string text; + const int n_segments = whisper_full_n_segments(wctx); + for (int i = 0; i < n_segments; ++i) { + const char * segment_text = whisper_full_get_segment_text(wctx, i); + text += segment_text; + } + + return text; +} + +// Classify intent using Llama +static std::string classify_intent(llama_context * lctx, llama_model * model, const std::string & user_input, bool verbose) { + if (!lctx || !model) { + return ""; + } + + // Get vocab from model + const llama_vocab * vocab = llama_model_get_vocab(model); + + // Build the prompt + std::string prompt = std::string(INTENT_SYSTEM_PROMPT) + "\n\nUser input: " + user_input + "\n\nJSON output:"; + + // Tokenize the prompt + // First get the required size + const int n_tokens = -llama_tokenize(vocab, prompt.c_str(), prompt.size(), NULL, 0, true, true); + std::vector tokens(n_tokens); + if (llama_tokenize(vocab, prompt.c_str(), prompt.size(), tokens.data(), tokens.size(), true, true) < 0) { + fprintf(stderr, "Error: Failed to tokenize prompt\n"); + return ""; + } + + if (verbose) { + fprintf(stderr, "Prompt tokens: %zu\n", tokens.size()); + } + + // Evaluate the prompt + llama_batch batch = llama_batch_get_one(tokens.data(), tokens.size()); + + if (llama_decode(lctx, batch) != 0) { + fprintf(stderr, "Error: Failed to evaluate prompt\n"); + return ""; + } + + // Generate response + std::string response; + const int max_tokens = 512; + + for (int i = 0; i < max_tokens; i++) { + auto * logits = llama_get_logits_ith(lctx, -1); + + // Simple greedy sampling + llama_token new_token = 0; + float max_logit = logits[0]; + for (int j = 1; j < llama_vocab_n_tokens(vocab); j++) { + if (logits[j] > max_logit) { + max_logit = logits[j]; + new_token = j; + } + } + + // Check for end of text + if (llama_vocab_is_eog(vocab, new_token)) { + break; + } + + // Decode token to text + char buf[128]; + int n = llama_token_to_piece(vocab, new_token, buf, sizeof(buf), 0, true); + if (n > 0) { + response.append(buf, n); + } + + // Evaluate the new token + batch = llama_batch_get_one(&new_token, 1); + + if (llama_decode(lctx, batch) != 0) { + break; + } + + // Check if we have a complete JSON object + if (response.find('}') != std::string::npos) { + break; + } + } + + if (verbose) { + fprintf(stderr, "LLM response: %s\n", response.c_str()); + } + + return response; +} + +int main(int argc, char ** argv) { + frameforge_params params; + + if (!parse_params(argc, argv, params)) { + return 1; + } + + // Initialize Whisper + fprintf(stderr, "Loading Whisper model: %s\n", params.whisper_model.c_str()); + whisper_context_params cparams = whisper_context_default_params(); + whisper_context * wctx = whisper_init_from_file_with_params(params.whisper_model.c_str(), cparams); + if (!wctx) { + fprintf(stderr, "Error: Failed to load Whisper model\n"); + return 1; + } + + // Initialize Llama + fprintf(stderr, "Loading Llama model: %s\n", params.llama_model.c_str()); + llama_model_params model_params = llama_model_default_params(); + llama_model * model = llama_model_load_from_file(params.llama_model.c_str(), model_params); + if (!model) { + fprintf(stderr, "Error: Failed to load Llama model\n"); + whisper_free(wctx); + return 1; + } + + llama_context_params ctx_params = llama_context_default_params(); + ctx_params.n_ctx = 2048; + ctx_params.n_threads = params.n_threads; + llama_context * lctx = llama_init_from_model(model, ctx_params); + if (!lctx) { + fprintf(stderr, "Error: Failed to create Llama context\n"); + llama_model_free(model); + whisper_free(wctx); + return 1; + } + + // Initialize command validator + frameforge::CommandValidator validator; + + // Test mode: process a single audio file + if (!params.audio_file.empty()) { + fprintf(stderr, "Processing audio file: %s\n", params.audio_file.c_str()); + + std::vector pcmf32; + int sample_rate = 0; + if (!read_wav(params.audio_file, pcmf32, sample_rate)) { + llama_free(lctx); + llama_model_free(model); + whisper_free(wctx); + return 1; + } + + fprintf(stderr, "Transcribing audio...\n"); + std::string transcription = transcribe_audio(wctx, pcmf32, params.verbose); + fprintf(stderr, "Transcription: %s\n", transcription.c_str()); + + fprintf(stderr, "Classifying intent...\n"); + std::string llm_response = classify_intent(lctx, model, transcription, params.verbose); + fprintf(stderr, "LLM Response: %s\n", llm_response.c_str()); + + // Validate the command + frameforge::Command cmd; + frameforge::ValidationResult result = validator.validate_json(llm_response, cmd); + + if (result.valid) { + std::string json_output = frameforge::command_to_json(cmd); + fprintf(stderr, "Valid command:\n%s\n", json_output.c_str()); + } else { + fprintf(stderr, "Validation failed: %s\n", result.error_message.c_str()); + std::string clarification = validator.generate_clarification_request(result, cmd); + fprintf(stderr, "Clarification: %s\n", clarification.c_str()); + } + + llama_free(lctx); + llama_model_free(model); + whisper_free(wctx); + return 0; + } + + // Server mode: start IPC server + fprintf(stderr, "Starting IPC server on pipe: %s\n", params.pipe_name.c_str()); + frameforge::IPCServer ipc_server(params.pipe_name); + + if (!ipc_server.start()) { + fprintf(stderr, "Error: Failed to start IPC server\n"); + llama_free(lctx); + llama_model_free(model); + whisper_free(wctx); + return 1; + } + + fprintf(stderr, "FrameForge Sidecar ready. Waiting for commands...\n"); + + // Main loop + std::atomic running(true); + while (running) { + // In a real implementation, this would: + // 1. Receive audio data from the IPC pipe + // 2. Transcribe with Whisper + // 3. Classify with Llama + // 4. Validate the command + // 5. Send the validated JSON back through the pipe + + // For now, just keep the process running + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + + ipc_server.stop(); + llama_free(lctx); + llama_model_free(model); + whisper_free(wctx); + + return 0; +} diff --git a/tools/frameforge/frameforge-validator.cpp b/tools/frameforge/frameforge-validator.cpp new file mode 100644 index 00000000000..11dffa6333f --- /dev/null +++ b/tools/frameforge/frameforge-validator.cpp @@ -0,0 +1,255 @@ +#include "frameforge-validator.h" +#include +#include + +// Use vendored nlohmann/json library +#include "../../vendor/nlohmann/json.hpp" + +using json = nlohmann::json; + +namespace frameforge { + +CommandValidator::CommandValidator() { +} + +CommandValidator::~CommandValidator() { +} + +bool CommandValidator::check_required_parameters( + const Command & cmd, + std::vector & missing +) const { + missing.clear(); + + std::vector required = get_required_parameters(cmd.verb); + + for (const auto & param : required) { + if (param == "direction" && !cmd.parameters.direction.has_value()) { + missing.push_back("direction"); + } else if (param == "degrees" && !cmd.parameters.degrees.has_value()) { + missing.push_back("degrees"); + } else if (param == "speed" && !cmd.parameters.speed.has_value()) { + missing.push_back("speed"); + } else if (param == "target" && !cmd.parameters.target.has_value()) { + missing.push_back("target"); + } else if (param == "pose_description" && + !cmd.parameters.pose_description.has_value() && + !cmd.parameters.joint_rotations.has_value()) { + missing.push_back("pose_description or joint_rotations"); + } + } + + return missing.empty(); +} + +bool CommandValidator::validate_parameter_values( + const Command & cmd, + std::string & error +) const { + // Validate direction if present + if (cmd.parameters.direction.has_value()) { + if (cmd.parameters.direction.value() == Direction::UNKNOWN) { + error = "Invalid direction value"; + return false; + } + } + + // Validate degrees if present + if (cmd.parameters.degrees.has_value()) { + float degrees = cmd.parameters.degrees.value(); + if (degrees < -360.0f || degrees > 360.0f) { + error = "Degrees must be between -360 and 360"; + return false; + } + } + + // Validate speed if present + if (cmd.parameters.speed.has_value()) { + float speed = cmd.parameters.speed.value(); + if (speed < 0.0f || speed > 100.0f) { + error = "Speed must be between 0 and 100"; + return false; + } + } + + // Validate subject is not empty + if (cmd.subject.empty()) { + error = "Subject cannot be empty"; + return false; + } + + return true; +} + +ValidationResult CommandValidator::validate(const Command & cmd) const { + ValidationResult result; + result.valid = true; + + // Check if verb is valid + if (cmd.verb == Verb::UNKNOWN) { + result.valid = false; + result.error_message = "Unknown or invalid verb"; + return result; + } + + // Check if action group matches verb + ActionGroup expected_group = get_action_group_for_verb(cmd.verb); + if (cmd.action_group != expected_group && cmd.action_group != ActionGroup::UNKNOWN) { + result.valid = false; + result.error_message = "Action group does not match verb"; + return result; + } + + // Check required parameters + if (!check_required_parameters(cmd, result.missing_parameters)) { + result.valid = false; + std::ostringstream oss; + oss << "Missing required parameters: "; + for (size_t i = 0; i < result.missing_parameters.size(); ++i) { + if (i > 0) oss << ", "; + oss << result.missing_parameters[i]; + } + result.error_message = oss.str(); + return result; + } + + // Validate parameter values + std::string value_error; + if (!validate_parameter_values(cmd, value_error)) { + result.valid = false; + result.error_message = value_error; + return result; + } + + return result; +} + +ValidationResult CommandValidator::validate_json( + const std::string & json_str, + Command & out_cmd +) const { + ValidationResult result; + result.valid = true; + + try { + json j = json::parse(json_str); + + // Parse verb + if (!j.contains("verb")) { + result.valid = false; + result.error_message = "Missing 'verb' field in JSON"; + return result; + } + out_cmd.verb = string_to_verb(j["verb"].get()); + + // Parse subject + if (!j.contains("subject")) { + result.valid = false; + result.error_message = "Missing 'subject' field in JSON"; + return result; + } + out_cmd.subject = j["subject"].get(); + + // Parse action_group (optional, can be inferred) + if (j.contains("action_group")) { + out_cmd.action_group = string_to_action_group(j["action_group"].get()); + } else { + out_cmd.action_group = get_action_group_for_verb(out_cmd.verb); + } + + // Parse parameters + if (j.contains("parameters")) { + json params = j["parameters"]; + + if (params.contains("direction")) { + out_cmd.parameters.direction = string_to_direction(params["direction"].get()); + } + + if (params.contains("degrees")) { + out_cmd.parameters.degrees = params["degrees"].get(); + } + + if (params.contains("speed")) { + out_cmd.parameters.speed = params["speed"].get(); + } + + if (params.contains("target")) { + out_cmd.parameters.target = params["target"].get(); + } + + if (params.contains("pose_description")) { + out_cmd.parameters.pose_description = params["pose_description"].get(); + } + + if (params.contains("joint_rotations")) { + std::vector joints; + for (const auto & joint_json : params["joint_rotations"]) { + Joint joint; + joint.name = joint_json["name"].get(); + joint.rotation_x = joint_json.value("rotation_x", 0.0f); + joint.rotation_y = joint_json.value("rotation_y", 0.0f); + joint.rotation_z = joint_json.value("rotation_z", 0.0f); + joints.push_back(joint); + } + out_cmd.parameters.joint_rotations = joints; + } + + // Parse additional parameters + for (auto & [key, value] : params.items()) { + if (key != "direction" && key != "degrees" && key != "speed" && + key != "target" && key != "pose_description" && key != "joint_rotations") { + if (!out_cmd.parameters.additional_params.has_value()) { + out_cmd.parameters.additional_params = std::map(); + } + out_cmd.parameters.additional_params.value()[key] = value.dump(); + } + } + } + + // Now validate the parsed command + result = validate(out_cmd); + + } catch (const json::parse_error & e) { + result.valid = false; + result.error_message = std::string("JSON parse error: ") + e.what(); + } catch (const json::type_error & e) { + result.valid = false; + result.error_message = std::string("JSON type error: ") + e.what(); + } catch (const std::exception & e) { + result.valid = false; + result.error_message = std::string("Error: ") + e.what(); + } + + return result; +} + +std::string CommandValidator::generate_clarification_request( + const ValidationResult & result, + const Command & cmd +) const { + if (result.valid) { + return ""; + } + + std::ostringstream oss; + oss << "I need clarification for the command '" + << verb_to_string(cmd.verb) + << "' on subject '" + << cmd.subject + << "'. "; + + if (!result.missing_parameters.empty()) { + oss << "Please provide the following parameters: "; + for (size_t i = 0; i < result.missing_parameters.size(); ++i) { + if (i > 0) oss << ", "; + oss << result.missing_parameters[i]; + } + oss << "."; + } else { + oss << result.error_message; + } + + return oss.str(); +} + +} // namespace frameforge diff --git a/tools/frameforge/frameforge-validator.h b/tools/frameforge/frameforge-validator.h new file mode 100644 index 00000000000..826a916e22b --- /dev/null +++ b/tools/frameforge/frameforge-validator.h @@ -0,0 +1,41 @@ +#ifndef FRAMEFORGE_VALIDATOR_H +#define FRAMEFORGE_VALIDATOR_H + +#include "frameforge-schema.h" +#include + +namespace frameforge { + +// Validation result structure +struct ValidationResult { + bool valid; + std::string error_message; + std::vector missing_parameters; +}; + +// CommandValidator class +class CommandValidator { +public: + CommandValidator(); + ~CommandValidator(); + + // Validate a command against the schema + ValidationResult validate(const Command & cmd) const; + + // Validate JSON string and parse into Command + ValidationResult validate_json(const std::string & json_str, Command & out_cmd) const; + + // Generate clarification request for missing parameters + std::string generate_clarification_request(const ValidationResult & result, const Command & cmd) const; + +private: + // Check if required parameters are present + bool check_required_parameters(const Command & cmd, std::vector & missing) const; + + // Validate parameter values + bool validate_parameter_values(const Command & cmd, std::string & error) const; +}; + +} // namespace frameforge + +#endif // FRAMEFORGE_VALIDATOR_H