diff --git a/appinfo/routes.php b/appinfo/routes.php index e2d72d21..9bedeaea 100644 --- a/appinfo/routes.php +++ b/appinfo/routes.php @@ -100,5 +100,10 @@ ['name' => 'OCSUi#setExAppStyle', 'url' => '/api/v1/ui/style', 'verb' => 'POST'], ['name' => 'OCSUi#deleteExAppStyle', 'url' => '/api/v1/ui/style', 'verb' => 'DELETE'], ['name' => 'OCSUi#getExAppStyle', 'url' => '/api/v1/ui/style', 'verb' => 'GET'], + + // Speech-To-Text + ['name' => 'speechToText#registerProvider', 'url' => '/api/v1/ai_provider/speech_to_text', 'verb' => 'POST'], + ['name' => 'speechToText#unregisterProvider', 'url' => '/api/v1/ai_provider/speech_to_text', 'verb' => 'DELETE'], + ['name' => 'speechToText#getProvider', 'url' => '/api/v1/ai_provider/speech_to_text', 'verb' => 'GET'], ], ]; diff --git a/docs/tech_details/ApiScopes.rst b/docs/tech_details/ApiScopes.rst index 2dc815f9..f915dd7f 100644 --- a/docs/tech_details/ApiScopes.rst +++ b/docs/tech_details/ApiScopes.rst @@ -29,6 +29,7 @@ The following API groups are currently supported: * ``33`` WEATHER_STATUS * ``50`` TALK * ``60`` TALK_BOT +* ``61`` AI_PROVIDERS * ``110`` ACTIVITIES * ``120`` NOTES diff --git a/docs/tech_details/api/index.rst b/docs/tech_details/api/index.rst index dc7999f5..8374f026 100644 --- a/docs/tech_details/api/index.rst +++ b/docs/tech_details/api/index.rst @@ -18,4 +18,5 @@ AppAPI Nextcloud APIs topmenu notifications talkbots + speechtotext other_ocs diff --git a/docs/tech_details/api/speechtotext.rst b/docs/tech_details/api/speechtotext.rst new file mode 100644 index 00000000..49117fe8 --- /dev/null +++ b/docs/tech_details/api/speechtotext.rst @@ -0,0 +1,47 @@ +============== +Speech-To-Text +============== + +AppAPI provides a Speech-To-Text (STT) provider registration API for the ExApps. + +Registering ExApp STT provider (OCS) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +OCS endpoint: ``POST /apps/app_api/api/v1/provider/speech_to_text`` + +Request data +************ + +.. code-block:: json + + { + "name": "unique_provider_name", + "display_name": "Provider Display Name", + "action_handler": "/handler_route_on_ex_app", + } + + +Response +******** + +On successful registration response with status code 200 is returned. + +Unregistering ExApp STT provider (OCS) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +OCS endpoint: ``DELETE /apps/app_api/api/v1/provider/speech_to_text`` + +Request data +************ + +.. code-block:: json + + { + "name": "unique_provider_name", + } + + +Response +******** + +On successful unregister response with status code 200 is returned. diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php index a7105f2d..f75956fe 100644 --- a/lib/AppInfo/Application.php +++ b/lib/AppInfo/Application.php @@ -15,6 +15,7 @@ use OCA\AppAPI\Notifications\ExAppNotifier; use OCA\AppAPI\Profiler\AppAPIDataCollector; use OCA\AppAPI\PublicCapabilities; +use OCA\AppAPI\Service\SpeechToTextService; use OCA\AppAPI\Service\UI\TopMenuService; use OCA\DAV\Events\SabrePluginAuthInitEvent; use OCA\Files\Event\LoadAdditionalScriptsEvent; @@ -58,6 +59,15 @@ public function register(IRegistrationContext $context): void { $context->registerEventListener(UserDeletedEvent::class, UserDeletedListener::class); $context->registerNotifierService(ExAppNotifier::class); $context->registerNotifierService(ExAppAdminNotifier::class); + + // Dynamic anonymous providers registration + $container = $this->getContainer(); + try { + /** @var SpeechToTextService $speechToTextService */ + $speechToTextService = $container->get(SpeechToTextService::class); + $speechToTextService->registerExAppSpeechToTextProviders($context, $container->getServer()); + } catch (NotFoundExceptionInterface|ContainerExceptionInterface) { + } } public function boot(IBootContext $context): void { diff --git a/lib/Controller/SpeechToTextController.php b/lib/Controller/SpeechToTextController.php new file mode 100644 index 00000000..112c0011 --- /dev/null +++ b/lib/Controller/SpeechToTextController.php @@ -0,0 +1,78 @@ +request = $request; + } + + #[NoCSRFRequired] + #[PublicPage] + #[AppAPIAuth] + public function registerProvider(string $name, string $displayName, string $actionHandler): DataResponse { + $ncVersion = $this->config->getSystemValueString('version', '0.0.0'); + if (version_compare($ncVersion, '29.0', '<')) { + return new DataResponse([], Http::STATUS_NOT_IMPLEMENTED); + } + $provider = $this->speechToTextService->registerSpeechToTextProvider( + $this->request->getHeader('EX-APP-ID'), $name, $displayName, $actionHandler); + if ($provider === null) { + return new DataResponse([], Http::STATUS_BAD_REQUEST); + } + return new DataResponse(); + } + + #[NoCSRFRequired] + #[PublicPage] + #[AppAPIAuth] + public function unregisterProvider(string $name): DataResponse { + $ncVersion = $this->config->getSystemValueString('version', '0.0.0'); + if (version_compare($ncVersion, '29.0', '<')) { + return new DataResponse([], Http::STATUS_NOT_IMPLEMENTED); + } + $unregistered = $this->speechToTextService->unregisterSpeechToTextProvider( + $this->request->getHeader('EX-APP-ID'), $name); + if ($unregistered === null) { + return new DataResponse([], Http::STATUS_NOT_FOUND); + } + return new DataResponse(); + } + + #[AppAPIAuth] + #[PublicPage] + #[NoCSRFRequired] + public function getProvider(string $name): DataResponse { + $ncVersion = $this->config->getSystemValueString('version', '0.0.0'); + if (version_compare($ncVersion, '29.0', '<')) { + return new DataResponse([], Http::STATUS_NOT_IMPLEMENTED); + } + $result = $this->speechToTextService->getExAppSpeechToTextProvider( + $this->request->getHeader('EX-APP-ID'), $name); + if (!$result) { + return new DataResponse([], Http::STATUS_NOT_FOUND); + } + return new DataResponse($result, Http::STATUS_OK); + } +} diff --git a/lib/Db/SpeechToText/SpeechToTextProvider.php b/lib/Db/SpeechToText/SpeechToTextProvider.php new file mode 100644 index 00000000..0c6a1073 --- /dev/null +++ b/lib/Db/SpeechToText/SpeechToTextProvider.php @@ -0,0 +1,61 @@ +addType('appid', 'string'); + $this->addType('name', 'string'); + $this->addType('displayName', 'string'); + $this->addType('actionHandler', 'string'); + + if (isset($params['id'])) { + $this->setId($params['id']); + } + if (isset($params['appid'])) { + $this->setAppid($params['appid']); + } + if (isset($params['name'])) { + $this->setName($params['name']); + } + if (isset($params['display_name'])) { + $this->setDisplayName($params['display_name']); + } + if (isset($params['action_handler'])) { + $this->setActionHandler($params['action_handler']); + } + } + + public function jsonSerialize(): array { + return [ + 'id' => $this->getId(), + 'appid' => $this->getAppid(), + 'name' => $this->getName(), + 'display_name' => $this->getDisplayName(), + 'action_handler' => $this->getActionHandler(), + ]; + } +} diff --git a/lib/Db/SpeechToText/SpeechToTextProviderMapper.php b/lib/Db/SpeechToText/SpeechToTextProviderMapper.php new file mode 100644 index 00000000..98a4ac8e --- /dev/null +++ b/lib/Db/SpeechToText/SpeechToTextProviderMapper.php @@ -0,0 +1,72 @@ + + */ +class SpeechToTextProviderMapper extends QBMapper { + public function __construct(IDBConnection $db) { + parent::__construct($db, 'ex_speech_to_text'); + } + + /** + * @throws Exception + */ + public function findAllEnabled(): array { + $qb = $this->db->getQueryBuilder(); + $result = $qb->select( + 'ex_speech_to_text.appid', + 'ex_speech_to_text.name', + 'ex_speech_to_text.display_name', + 'ex_speech_to_text.action_handler', + ) + ->from($this->tableName, 'ex_speech_to_text') + ->innerJoin('ex_speech_to_text', 'ex_apps', 'exa', 'exa.appid = ex_speech_to_text.appid') + ->where( + $qb->expr()->eq('exa.enabled', $qb->createNamedParameter(1, IQueryBuilder::PARAM_INT)) + ) + ->executeQuery(); + return $result->fetchAll(); + } + + /** + * @param string $appId + * @param string $name + * + * @return SpeechToTextProvider + * @throws Exception + * @throws MultipleObjectsReturnedException + * + * @throws DoesNotExistException + */ + public function findByAppidName(string $appId, string $name): SpeechToTextProvider { + $qb = $this->db->getQueryBuilder(); + return $this->findEntity($qb->select('*') + ->from($this->tableName) + ->where($qb->expr()->eq('appid', $qb->createNamedParameter($appId), IQueryBuilder::PARAM_STR)) + ->andWhere($qb->expr()->eq('name', $qb->createNamedParameter($name), IQueryBuilder::PARAM_STR)) + ); + } + + /** + * @throws Exception + */ + public function removeAllByAppId(string $appId): int { + $qb = $this->db->getQueryBuilder(); + $qb->delete($this->tableName) + ->where( + $qb->expr()->eq('appid', $qb->createNamedParameter($appId, IQueryBuilder::PARAM_STR)) + ); + return $qb->executeStatement(); + } +} diff --git a/lib/Migration/Version1005Date202312271744.php b/lib/Migration/Version1005Date202312271744.php new file mode 100644 index 00000000..d9f3c8c4 --- /dev/null +++ b/lib/Migration/Version1005Date202312271744.php @@ -0,0 +1,55 @@ +hasTable('ex_speech_to_text')) { + $table = $schema->createTable('ex_speech_to_text'); + + $table->addColumn('id', Types::BIGINT, [ + 'autoincrement' => true, + 'notnull' => true, + ]); + $table->addColumn('appid', Types::STRING, [ + 'notnull' => true, + 'length' => 32, + ]); + $table->addColumn('name', Types::STRING, [ + 'notnull' => true, + 'length' => 64, + ]); + $table->addColumn('display_name', Types::STRING, [ + 'notnull' => true, + 'length' => 64, + ]); + $table->addColumn('action_handler', Types::STRING, [ + 'notnull' => true, + 'length' => 410, + ]); + + $table->setPrimaryKey(['id']); + $table->addUniqueIndex(['appid', 'name'], 'speech_to_text__idx'); + } + + return $schema; + } +} diff --git a/lib/Service/AppAPIService.php b/lib/Service/AppAPIService.php index e4206560..e2419219 100644 --- a/lib/Service/AppAPIService.php +++ b/lib/Service/AppAPIService.php @@ -45,9 +45,9 @@ class AppAPIService { private IClient $client; public function __construct( - private readonly LoggerInterface $logger, - private readonly ILogFactory $logFactory, - ICacheFactory $cacheFactory, + private readonly LoggerInterface $logger, + private readonly ILogFactory $logFactory, + ICacheFactory $cacheFactory, private readonly IThrottler $throttler, private readonly IConfig $config, IClientService $clientService, @@ -61,6 +61,7 @@ public function __construct( private readonly ScriptsService $scriptsService, private readonly StylesService $stylesService, private readonly FilesActionsMenuService $filesActionsMenuService, + private readonly SpeechToTextService $speechToTextService, private readonly ISecureRandom $random, private readonly IUserSession $userSession, private readonly ISession $session, @@ -149,6 +150,7 @@ public function unregisterExApp(string $appId): ?ExApp { $this->initialStateService->deleteExAppInitialStates($appId); $this->scriptsService->deleteExAppScripts($appId); $this->stylesService->deleteExAppStyles($appId); + $this->speechToTextService->unregisterExAppSpeechToTextProviders($appId); $this->cache->remove('/exApp_' . $appId); return $exApp; } catch (Exception $e) { @@ -565,7 +567,12 @@ public function requestToExApp( $url = self::getExAppUrl( $exApp->getProtocol(), $exApp->getHost(), - $exApp->getPort()) . $route; + $exApp->getPort()); + if (str_starts_with($route, '/')) { + $url = $url.$route; + } else { + $url = $url.'/'.$route; + } if (isset($options['headers']) && is_array($options['headers'])) { $options['headers'] = [...$options['headers'], ...$this->buildAppAPIAuthHeaders($request, $userId, $exApp)]; diff --git a/lib/Service/ExAppApiScopeService.php b/lib/Service/ExAppApiScopeService.php index f7d99187..b38b990f 100644 --- a/lib/Service/ExAppApiScopeService.php +++ b/lib/Service/ExAppApiScopeService.php @@ -95,6 +95,7 @@ public function registerInitScopes(): bool { ['api_route' => $aeApiV1Prefix . '/ex-app/enabled', 'scope_group' => 2, 'name' => 'SYSTEM', 'user_check' => 1], ['api_route' => $aeApiV1Prefix . '/notification', 'scope_group' => 32, 'name' => 'NOTIFICATIONS', 'user_check' => 1], ['api_route' => $aeApiV1Prefix . '/talk_bot', 'scope_group' => 60, 'name' => 'TALK_BOT', 'user_check' => 0], + ['api_route' => $aeApiV1Prefix . '/ai_provider/', 'scope_group' => 61, 'name' => 'AI_PROVIDERS', 'user_check' => 0], // AppAPI internal scopes ['api_route' => '/apps/app_api/apps/status', 'scope_group' => 1, 'name' => 'BASIC', 'user_check' => 0], diff --git a/lib/Service/SpeechToTextService.php b/lib/Service/SpeechToTextService.php new file mode 100644 index 00000000..a93a7228 --- /dev/null +++ b/lib/Service/SpeechToTextService.php @@ -0,0 +1,207 @@ +cache = $cacheFactory->createDistributed(Application::APP_ID . '/ex_speech_to_text_providers'); + } + + public function registerSpeechToTextProvider(string $appId, string $name, string $displayName, string $actionHandler): ?SpeechToTextProvider { + try { + $speechToTextProvider = $this->mapper->findByAppidName($appId, $name); + } catch (DoesNotExistException|MultipleObjectsReturnedException|Exception) { + $speechToTextProvider = null; + } + try { + $newSpeechToTextProvider = new SpeechToTextProvider([ + 'appid' => $appId, + 'name' => $name, + 'display_name' => $displayName, + 'action_handler' => ltrim($actionHandler, '/'), + ]); + if ($speechToTextProvider !== null) { + $newSpeechToTextProvider->setId($speechToTextProvider->getId()); + } + $speechToTextProvider = $this->mapper->insertOrUpdate($newSpeechToTextProvider); + $this->cache->set('/ex_speech_to_text_providers_' . $appId . '_' . $name, $speechToTextProvider); + $this->resetCacheEnabled(); + } catch (Exception $e) { + $this->logger->error( + sprintf('Failed to register ExApp %s SpeechToTextProvider %s. Error: %s', $appId, $name, $e->getMessage()), ['exception' => $e] + ); + return null; + } + return $speechToTextProvider; + } + + public function unregisterSpeechToTextProvider(string $appId, string $name): ?SpeechToTextProvider { + try { + $speechToTextProvider = $this->getExAppSpeechToTextProvider($appId, $name); + if ($speechToTextProvider === null) { + return null; + } + $this->mapper->delete($speechToTextProvider); + $this->cache->remove('/ex_speech_to_text_providers_' . $appId . '_' . $name); + $this->resetCacheEnabled(); + return $speechToTextProvider; + } catch (Exception $e) { + $this->logger->error(sprintf('Failed to unregister ExApp %s SpeechToTextProvider %s. Error: %s', $appId, $name, $e->getMessage()), ['exception' => $e]); + return null; + } + } + + /** + * Get list of registered SpeechToText providers (only for enabled ExApps) + * + * @return SpeechToTextProvider[] + */ + public function getRegisteredSpeechToTextProviders(): array { + try { + $cacheKey = '/ex_speech_to_text_providers'; + $records = $this->cache->get($cacheKey); + if ($records === null) { + $records = $this->mapper->findAllEnabled(); + $this->cache->set($cacheKey, $records); + } + return array_map(function ($record) { + return new SpeechToTextProvider($record); + }, $records); + } catch (Exception) { + return []; + } + } + + public function getExAppSpeechToTextProvider(string $appId, string $name): ?SpeechToTextProvider { + $cacheKey = '/ex_speech_to_text_providers_' . $appId . '_' . $name; + $cache = $this->cache->get($cacheKey); + if ($cache !== null) { + return $cache instanceof SpeechToTextProvider ? $cache : new SpeechToTextProvider($cache); + } + + try { + $speechToTextProvider = $this->mapper->findByAppIdName($appId, $name); + } catch (DoesNotExistException|MultipleObjectsReturnedException|Exception) { + return null; + } + $this->cache->set($cacheKey, $speechToTextProvider); + return $speechToTextProvider; + } + + public function unregisterExAppSpeechToTextProviders(string $appId): int { + try { + $result = $this->mapper->removeAllByAppId($appId); + } catch (Exception) { + $result = -1; + } + $this->cache->clear('/ex_speech_to_text_providers_' . $appId); + $this->resetCacheEnabled(); + return $result; + } + + public function resetCacheEnabled(): void { + $this->cache->remove('/ex_speech_to_text_providers'); + } + + /** + * Register ExApp anonymous providers implementations of ISpeechToTextProviderWithId + * so that they can be used as regular providers in DI container. + */ + public function registerExAppSpeechToTextProviders(IRegistrationContext &$context, IServerContainer $serverContainer): void { + $exAppsProviders = $this->getRegisteredSpeechToTextProviders(); + foreach ($exAppsProviders as $exAppProvider) { + $class = '\\OCA\\AppAPI\\' . $exAppProvider->getAppid() . '\\' . $exAppProvider->getName(); + $sttProvider = $this->getAnonymousExAppProvider($exAppProvider, $serverContainer, $class); + $context->registerService($class, function () use ($sttProvider) { + return $sttProvider; + }); + $context->registerSpeechToTextProvider($class); + } + } + + /** + * @psalm-suppress UndefinedClass, MissingDependency, InvalidReturnStatement, InvalidReturnType + */ + private function getAnonymousExAppProvider(SpeechToTextProvider $provider, IServerContainer $serverContainer, string $class): ?ISpeechToTextProviderWithId { + return new class($provider, $serverContainer, $this->userId, $class) implements ISpeechToTextProviderWithId { + public function __construct( + private SpeechToTextProvider $sttProvider, + // We need this to delay the instantiation of AppAPIService during registration to avoid conflicts + private IServerContainer $serverContainer, // TODO: Extract needed methods from AppAPIService to be able to use it everytime + private readonly ?string $userId, + private readonly string $class, + ) { + } + + public function getId(): string { + return $this->class; + } + + public function getName(): string { + return $this->sttProvider->getDisplayName(); + } + + public function transcribeFile(File $file, float $maxExecutionTime = 0): string { + $route = $this->sttProvider->getActionHandler(); + $service = $this->serverContainer->get(AppAPIService::class); + + try { + $fileHandle = $file->fopen('r'); + } catch (Exception $e) { + throw new \Exception(sprintf('Failed to open file: %s. Error: %s', $file->getName(), $e->getMessage())); + } + $response = $service->requestToExAppById($this->sttProvider->getAppid(), + $route, + $this->userId, + 'POST', + params: ['max_execution_time' => $$maxExecutionTime], + options: [ + 'multipart' => [ + [ + 'name' => 'data', + 'contents' => $fileHandle, + 'filename' => $file->getName(), + 'headers' => [ + 'Content-Type' => $file->getMimeType(), + ] + ], + ], + 'query' => ['max_execution_time' => $maxExecutionTime], + 'timeout' => $maxExecutionTime, + ]); + if (is_array($response)) { + throw new \Exception(sprintf('Failed to transcribe file: %s with %s:%s. Error: %s', + $file->getName(), + $this->sttProvider->getAppid(), + $this->sttProvider->getName(), + $response['error'] + )); + } + return $response->getBody(); + } + }; + } +}