From 7196efb69c34357acfb62262187788ecb2e2a246 Mon Sep 17 00:00:00 2001 From: Sergio N Date: Sat, 27 Dec 2025 23:18:24 +0100 Subject: [PATCH 01/22] WIP on docker container for whisper-asr-webservice --- compose.yaml | 46 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/compose.yaml b/compose.yaml index c033179..1590c06 100644 --- a/compose.yaml +++ b/compose.yaml @@ -1,4 +1,18 @@ name: uxcaptain + +networks: + uxcaptain-network: + name: uxcaptain-network + driver: bridge + +volumes: + uxcaptain-database: + name: uxcaptain-database + + whisper-transcription-models: + name: whisper-transcription-models + + services: # server: # container_name: server @@ -40,6 +54,15 @@ services: # condition: service_started # networks: # - uxcaptain-network + + # deploy: + # resources: + # limits: + # cpus: '1.0' + # memory: 512M + # reservations: + # cpus: '0.5' + # memory: 256M minio: image: minio/minio:latest @@ -74,11 +97,22 @@ services: networks: - uxcaptain-network -networks: - uxcaptain-network: - name: uxcaptain-network - driver: bridge + whisper-transcribe: + image: onerahmet/openai-whisper-asr-webservice:latest + container_name: whisper-asr + ports: + - 9006:9000 + volumes: + - whisper-transcription-models:/root/.cache # Model persistence (~2-5GB) + - /Users/martaperezsanchez/repos/minio:ro # Mount your video storage + environment: + - ASR_MODEL=medium # Or base/tiny for speed + - ASR_DEVICE=cpu + - ASR_ENGINE=openai_whisper + - MAX_REQUEST_SIZE=5000 # MB for large videos + restart: unless-stopped + networks: + - uxcaptain-network # Same as MinIO/monolith + -volumes: - uxcaptain-database: From cc77b228bf6e44f5cac35716f5a634b1c523a894 Mon Sep 17 00:00:00 2001 From: Sergio N Date: Mon, 29 Dec 2025 13:33:10 +0100 Subject: [PATCH 02/22] ADD depends_on and MODEL_IDLE_TIMEOUT to whisper-transcribe --- compose.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/compose.yaml b/compose.yaml index 1590c06..8d66175 100644 --- a/compose.yaml +++ b/compose.yaml @@ -110,7 +110,10 @@ services: - ASR_DEVICE=cpu - ASR_ENGINE=openai_whisper - MAX_REQUEST_SIZE=5000 # MB for large videos + - MODEL_IDLE_TIMEOUT=900 # Keep model loaded 15min restart: unless-stopped + depends_on: + - minio networks: - uxcaptain-network # Same as MinIO/monolith From 5e53de86035aac10b953ca545be2075890a67003 Mon Sep 17 00:00:00 2001 From: Sergio N Date: Mon, 29 Dec 2025 14:16:08 +0100 Subject: [PATCH 03/22] REMOVE npm AWS Transcribe --- package-lock.json | 407 ---------------------------------------------- package.json | 1 - 2 files changed, 408 deletions(-) diff --git a/package-lock.json b/package-lock.json index ab8f1da..7b49763 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,7 +10,6 @@ "license": "Proprietary", "dependencies": { "@aws-sdk/client-s3": "^3.936.0", - "@aws-sdk/client-transcribe": "^3.948.0", "@aws-sdk/s3-request-presigner": "^3.936.0", "@getbrevo/brevo": "^3.0.1", "@prisma/client": "^6.19.0", @@ -365,412 +364,6 @@ "node": ">=18.0.0" } }, - "node_modules/@aws-sdk/client-transcribe": { - "version": "3.948.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/client-transcribe/-/client-transcribe-3.948.0.tgz", - "integrity": "sha512-EOPYaW/lL2UHZbsG6PxPeHu/Pcw8MTsUznrRW6z7svVHCgsQkGUoWJs9gxTr601r+TMPgt8rdv2bv+WgXeN/SQ==", - "license": "Apache-2.0", - "dependencies": { - "@aws-crypto/sha256-browser": "5.2.0", - "@aws-crypto/sha256-js": "5.2.0", - "@aws-sdk/core": "3.947.0", - "@aws-sdk/credential-provider-node": "3.948.0", - "@aws-sdk/middleware-host-header": "3.936.0", - "@aws-sdk/middleware-logger": "3.936.0", - "@aws-sdk/middleware-recursion-detection": "3.948.0", - "@aws-sdk/middleware-user-agent": "3.947.0", - "@aws-sdk/region-config-resolver": "3.936.0", - "@aws-sdk/types": "3.936.0", - "@aws-sdk/util-endpoints": "3.936.0", - "@aws-sdk/util-user-agent-browser": "3.936.0", - "@aws-sdk/util-user-agent-node": "3.947.0", - "@smithy/config-resolver": "^4.4.3", - "@smithy/core": "^3.18.7", - "@smithy/fetch-http-handler": "^5.3.6", - "@smithy/hash-node": "^4.2.5", - "@smithy/invalid-dependency": "^4.2.5", - "@smithy/middleware-content-length": "^4.2.5", - "@smithy/middleware-endpoint": "^4.3.14", - "@smithy/middleware-retry": "^4.4.14", - "@smithy/middleware-serde": "^4.2.6", - "@smithy/middleware-stack": "^4.2.5", - "@smithy/node-config-provider": "^4.3.5", - "@smithy/node-http-handler": "^4.4.5", - "@smithy/protocol-http": "^5.3.5", - "@smithy/smithy-client": "^4.9.10", - "@smithy/types": "^4.9.0", - "@smithy/url-parser": "^4.2.5", - "@smithy/util-base64": "^4.3.0", - "@smithy/util-body-length-browser": "^4.2.0", - "@smithy/util-body-length-node": "^4.2.1", - "@smithy/util-defaults-mode-browser": "^4.3.13", - "@smithy/util-defaults-mode-node": "^4.2.16", - "@smithy/util-endpoints": "^3.2.5", - "@smithy/util-middleware": "^4.2.5", - "@smithy/util-retry": "^4.2.5", - "@smithy/util-utf8": "^4.2.0", - "tslib": "^2.6.2" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@aws-sdk/client-transcribe/node_modules/@aws-sdk/client-sso": { - "version": "3.948.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso/-/client-sso-3.948.0.tgz", - "integrity": "sha512-iWjchXy8bIAVBUsKnbfKYXRwhLgRg3EqCQ5FTr3JbR+QR75rZm4ZOYXlvHGztVTmtAZ+PQVA1Y4zO7v7N87C0A==", - "license": "Apache-2.0", - "dependencies": { - "@aws-crypto/sha256-browser": "5.2.0", - "@aws-crypto/sha256-js": "5.2.0", - "@aws-sdk/core": "3.947.0", - "@aws-sdk/middleware-host-header": "3.936.0", - "@aws-sdk/middleware-logger": "3.936.0", - "@aws-sdk/middleware-recursion-detection": "3.948.0", - "@aws-sdk/middleware-user-agent": "3.947.0", - "@aws-sdk/region-config-resolver": "3.936.0", - "@aws-sdk/types": "3.936.0", - "@aws-sdk/util-endpoints": "3.936.0", - "@aws-sdk/util-user-agent-browser": "3.936.0", - "@aws-sdk/util-user-agent-node": "3.947.0", - "@smithy/config-resolver": "^4.4.3", - "@smithy/core": "^3.18.7", - "@smithy/fetch-http-handler": "^5.3.6", - "@smithy/hash-node": "^4.2.5", - "@smithy/invalid-dependency": "^4.2.5", - "@smithy/middleware-content-length": "^4.2.5", - "@smithy/middleware-endpoint": "^4.3.14", - "@smithy/middleware-retry": "^4.4.14", - "@smithy/middleware-serde": "^4.2.6", - "@smithy/middleware-stack": "^4.2.5", - "@smithy/node-config-provider": "^4.3.5", - "@smithy/node-http-handler": "^4.4.5", - "@smithy/protocol-http": "^5.3.5", - "@smithy/smithy-client": "^4.9.10", - "@smithy/types": "^4.9.0", - "@smithy/url-parser": "^4.2.5", - "@smithy/util-base64": "^4.3.0", - "@smithy/util-body-length-browser": "^4.2.0", - "@smithy/util-body-length-node": "^4.2.1", - "@smithy/util-defaults-mode-browser": "^4.3.13", - "@smithy/util-defaults-mode-node": "^4.2.16", - "@smithy/util-endpoints": "^3.2.5", - "@smithy/util-middleware": "^4.2.5", - "@smithy/util-retry": "^4.2.5", - "@smithy/util-utf8": "^4.2.0", - "tslib": "^2.6.2" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@aws-sdk/client-transcribe/node_modules/@aws-sdk/core": { - "version": "3.947.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/core/-/core-3.947.0.tgz", - "integrity": "sha512-Khq4zHhuAkvCFuFbgcy3GrZTzfSX7ZIjIcW1zRDxXRLZKRtuhnZdonqTUfaWi5K42/4OmxkYNpsO7X7trQOeHw==", - "license": "Apache-2.0", - "dependencies": { - "@aws-sdk/types": "3.936.0", - "@aws-sdk/xml-builder": "3.930.0", - "@smithy/core": "^3.18.7", - "@smithy/node-config-provider": "^4.3.5", - "@smithy/property-provider": "^4.2.5", - "@smithy/protocol-http": "^5.3.5", - "@smithy/signature-v4": "^5.3.5", - "@smithy/smithy-client": "^4.9.10", - "@smithy/types": "^4.9.0", - "@smithy/util-base64": "^4.3.0", - "@smithy/util-middleware": "^4.2.5", - "@smithy/util-utf8": "^4.2.0", - "tslib": "^2.6.2" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@aws-sdk/client-transcribe/node_modules/@aws-sdk/credential-provider-env": { - "version": "3.947.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-env/-/credential-provider-env-3.947.0.tgz", - "integrity": "sha512-VR2V6dRELmzwAsCpK4GqxUi6UW5WNhAXS9F9AzWi5jvijwJo3nH92YNJUP4quMpgFZxJHEWyXLWgPjh9u0zYOA==", - "license": "Apache-2.0", - "dependencies": { - "@aws-sdk/core": "3.947.0", - "@aws-sdk/types": "3.936.0", - "@smithy/property-provider": "^4.2.5", - "@smithy/types": "^4.9.0", - "tslib": "^2.6.2" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@aws-sdk/client-transcribe/node_modules/@aws-sdk/credential-provider-http": { - "version": "3.947.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-http/-/credential-provider-http-3.947.0.tgz", - "integrity": "sha512-inF09lh9SlHj63Vmr5d+LmwPXZc2IbK8lAruhOr3KLsZAIHEgHgGPXWDC2ukTEMzg0pkexQ6FOhXXad6klK4RA==", - "license": "Apache-2.0", - "dependencies": { - "@aws-sdk/core": "3.947.0", - "@aws-sdk/types": "3.936.0", - "@smithy/fetch-http-handler": "^5.3.6", - "@smithy/node-http-handler": "^4.4.5", - "@smithy/property-provider": "^4.2.5", - "@smithy/protocol-http": "^5.3.5", - "@smithy/smithy-client": "^4.9.10", - "@smithy/types": "^4.9.0", - "@smithy/util-stream": "^4.5.6", - "tslib": "^2.6.2" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@aws-sdk/client-transcribe/node_modules/@aws-sdk/credential-provider-ini": { - "version": "3.948.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-ini/-/credential-provider-ini-3.948.0.tgz", - "integrity": "sha512-Cl//Qh88e8HBL7yYkJNpF5eq76IO6rq8GsatKcfVBm7RFVxCqYEPSSBtkHdbtNwQdRQqAMXc6E/lEB/CZUDxnA==", - "license": "Apache-2.0", - "dependencies": { - "@aws-sdk/core": "3.947.0", - "@aws-sdk/credential-provider-env": "3.947.0", - "@aws-sdk/credential-provider-http": "3.947.0", - "@aws-sdk/credential-provider-login": "3.948.0", - "@aws-sdk/credential-provider-process": "3.947.0", - "@aws-sdk/credential-provider-sso": "3.948.0", - "@aws-sdk/credential-provider-web-identity": "3.948.0", - "@aws-sdk/nested-clients": "3.948.0", - "@aws-sdk/types": "3.936.0", - "@smithy/credential-provider-imds": "^4.2.5", - "@smithy/property-provider": "^4.2.5", - "@smithy/shared-ini-file-loader": "^4.4.0", - "@smithy/types": "^4.9.0", - "tslib": "^2.6.2" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@aws-sdk/client-transcribe/node_modules/@aws-sdk/credential-provider-login": { - "version": "3.948.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-login/-/credential-provider-login-3.948.0.tgz", - "integrity": "sha512-gcKO2b6eeTuZGp3Vvgr/9OxajMrD3W+FZ2FCyJox363ZgMoYJsyNid1vuZrEuAGkx0jvveLXfwiVS0UXyPkgtw==", - "license": "Apache-2.0", - "dependencies": { - "@aws-sdk/core": "3.947.0", - "@aws-sdk/nested-clients": "3.948.0", - "@aws-sdk/types": "3.936.0", - "@smithy/property-provider": "^4.2.5", - "@smithy/protocol-http": "^5.3.5", - "@smithy/shared-ini-file-loader": "^4.4.0", - "@smithy/types": "^4.9.0", - "tslib": "^2.6.2" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@aws-sdk/client-transcribe/node_modules/@aws-sdk/credential-provider-node": { - "version": "3.948.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-node/-/credential-provider-node-3.948.0.tgz", - "integrity": "sha512-ep5vRLnrRdcsP17Ef31sNN4g8Nqk/4JBydcUJuFRbGuyQtrZZrVT81UeH2xhz6d0BK6ejafDB9+ZpBjXuWT5/Q==", - "license": "Apache-2.0", - "dependencies": { - "@aws-sdk/credential-provider-env": "3.947.0", - "@aws-sdk/credential-provider-http": "3.947.0", - "@aws-sdk/credential-provider-ini": "3.948.0", - "@aws-sdk/credential-provider-process": "3.947.0", - "@aws-sdk/credential-provider-sso": "3.948.0", - "@aws-sdk/credential-provider-web-identity": "3.948.0", - "@aws-sdk/types": "3.936.0", - "@smithy/credential-provider-imds": "^4.2.5", - "@smithy/property-provider": "^4.2.5", - "@smithy/shared-ini-file-loader": "^4.4.0", - "@smithy/types": "^4.9.0", - "tslib": "^2.6.2" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@aws-sdk/client-transcribe/node_modules/@aws-sdk/credential-provider-process": { - "version": "3.947.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-process/-/credential-provider-process-3.947.0.tgz", - "integrity": "sha512-WpanFbHe08SP1hAJNeDdBDVz9SGgMu/gc0XJ9u3uNpW99nKZjDpvPRAdW7WLA4K6essMjxWkguIGNOpij6Do2Q==", - "license": "Apache-2.0", - "dependencies": { - "@aws-sdk/core": "3.947.0", - "@aws-sdk/types": "3.936.0", - "@smithy/property-provider": "^4.2.5", - "@smithy/shared-ini-file-loader": "^4.4.0", - "@smithy/types": "^4.9.0", - "tslib": "^2.6.2" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@aws-sdk/client-transcribe/node_modules/@aws-sdk/credential-provider-sso": { - "version": "3.948.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-sso/-/credential-provider-sso-3.948.0.tgz", - "integrity": "sha512-gqLhX1L+zb/ZDnnYbILQqJ46j735StfWV5PbDjxRzBKS7GzsiYoaf6MyHseEopmWrez5zl5l6aWzig7UpzSeQQ==", - "license": "Apache-2.0", - "dependencies": { - "@aws-sdk/client-sso": "3.948.0", - "@aws-sdk/core": "3.947.0", - "@aws-sdk/token-providers": "3.948.0", - "@aws-sdk/types": "3.936.0", - "@smithy/property-provider": "^4.2.5", - "@smithy/shared-ini-file-loader": "^4.4.0", - "@smithy/types": "^4.9.0", - "tslib": "^2.6.2" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@aws-sdk/client-transcribe/node_modules/@aws-sdk/credential-provider-web-identity": { - "version": "3.948.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-web-identity/-/credential-provider-web-identity-3.948.0.tgz", - "integrity": "sha512-MvYQlXVoJyfF3/SmnNzOVEtANRAiJIObEUYYyjTqKZTmcRIVVky0tPuG26XnB8LmTYgtESwJIZJj/Eyyc9WURQ==", - "license": "Apache-2.0", - "dependencies": { - "@aws-sdk/core": "3.947.0", - "@aws-sdk/nested-clients": "3.948.0", - "@aws-sdk/types": "3.936.0", - "@smithy/property-provider": "^4.2.5", - "@smithy/shared-ini-file-loader": "^4.4.0", - "@smithy/types": "^4.9.0", - "tslib": "^2.6.2" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@aws-sdk/client-transcribe/node_modules/@aws-sdk/middleware-recursion-detection": { - "version": "3.948.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-recursion-detection/-/middleware-recursion-detection-3.948.0.tgz", - "integrity": "sha512-Qa8Zj+EAqA0VlAVvxpRnpBpIWJI9KUwaioY1vkeNVwXPlNaz9y9zCKVM9iU9OZ5HXpoUg6TnhATAHXHAE8+QsQ==", - "license": "Apache-2.0", - "dependencies": { - "@aws-sdk/types": "3.936.0", - "@aws/lambda-invoke-store": "^0.2.2", - "@smithy/protocol-http": "^5.3.5", - "@smithy/types": "^4.9.0", - "tslib": "^2.6.2" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@aws-sdk/client-transcribe/node_modules/@aws-sdk/middleware-user-agent": { - "version": "3.947.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-user-agent/-/middleware-user-agent-3.947.0.tgz", - "integrity": "sha512-7rpKV8YNgCP2R4F9RjWZFcD2R+SO/0R4VHIbY9iZJdH2MzzJ8ZG7h8dZ2m8QkQd1fjx4wrFJGGPJUTYXPV3baA==", - "license": "Apache-2.0", - "dependencies": { - "@aws-sdk/core": "3.947.0", - "@aws-sdk/types": "3.936.0", - "@aws-sdk/util-endpoints": "3.936.0", - "@smithy/core": "^3.18.7", - "@smithy/protocol-http": "^5.3.5", - "@smithy/types": "^4.9.0", - "tslib": "^2.6.2" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@aws-sdk/client-transcribe/node_modules/@aws-sdk/nested-clients": { - "version": "3.948.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/nested-clients/-/nested-clients-3.948.0.tgz", - "integrity": "sha512-zcbJfBsB6h254o3NuoEkf0+UY1GpE9ioiQdENWv7odo69s8iaGBEQ4BDpsIMqcuiiUXw1uKIVNxCB1gUGYz8lw==", - "license": "Apache-2.0", - "dependencies": { - "@aws-crypto/sha256-browser": "5.2.0", - "@aws-crypto/sha256-js": "5.2.0", - "@aws-sdk/core": "3.947.0", - "@aws-sdk/middleware-host-header": "3.936.0", - "@aws-sdk/middleware-logger": "3.936.0", - "@aws-sdk/middleware-recursion-detection": "3.948.0", - "@aws-sdk/middleware-user-agent": "3.947.0", - "@aws-sdk/region-config-resolver": "3.936.0", - "@aws-sdk/types": "3.936.0", - "@aws-sdk/util-endpoints": "3.936.0", - "@aws-sdk/util-user-agent-browser": "3.936.0", - "@aws-sdk/util-user-agent-node": "3.947.0", - "@smithy/config-resolver": "^4.4.3", - "@smithy/core": "^3.18.7", - "@smithy/fetch-http-handler": "^5.3.6", - "@smithy/hash-node": "^4.2.5", - "@smithy/invalid-dependency": "^4.2.5", - "@smithy/middleware-content-length": "^4.2.5", - "@smithy/middleware-endpoint": "^4.3.14", - "@smithy/middleware-retry": "^4.4.14", - "@smithy/middleware-serde": "^4.2.6", - "@smithy/middleware-stack": "^4.2.5", - "@smithy/node-config-provider": "^4.3.5", - "@smithy/node-http-handler": "^4.4.5", - "@smithy/protocol-http": "^5.3.5", - "@smithy/smithy-client": "^4.9.10", - "@smithy/types": "^4.9.0", - "@smithy/url-parser": "^4.2.5", - "@smithy/util-base64": "^4.3.0", - "@smithy/util-body-length-browser": "^4.2.0", - "@smithy/util-body-length-node": "^4.2.1", - "@smithy/util-defaults-mode-browser": "^4.3.13", - "@smithy/util-defaults-mode-node": "^4.2.16", - "@smithy/util-endpoints": "^3.2.5", - "@smithy/util-middleware": "^4.2.5", - "@smithy/util-retry": "^4.2.5", - "@smithy/util-utf8": "^4.2.0", - "tslib": "^2.6.2" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@aws-sdk/client-transcribe/node_modules/@aws-sdk/token-providers": { - "version": "3.948.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/token-providers/-/token-providers-3.948.0.tgz", - "integrity": "sha512-V487/kM4Teq5dcr1t5K6eoUKuqlGr9FRWL3MIMukMERJXHZvio6kox60FZ/YtciRHRI75u14YUqm2Dzddcu3+A==", - "license": "Apache-2.0", - "dependencies": { - "@aws-sdk/core": "3.947.0", - "@aws-sdk/nested-clients": "3.948.0", - "@aws-sdk/types": "3.936.0", - "@smithy/property-provider": "^4.2.5", - "@smithy/shared-ini-file-loader": "^4.4.0", - "@smithy/types": "^4.9.0", - "tslib": "^2.6.2" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@aws-sdk/client-transcribe/node_modules/@aws-sdk/util-user-agent-node": { - "version": "3.947.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-user-agent-node/-/util-user-agent-node-3.947.0.tgz", - "integrity": "sha512-+vhHoDrdbb+zerV4noQk1DHaUMNzWFWPpPYjVTwW2186k5BEJIecAMChYkghRrBVJ3KPWP1+JnZwOd72F3d4rQ==", - "license": "Apache-2.0", - "dependencies": { - "@aws-sdk/middleware-user-agent": "3.947.0", - "@aws-sdk/types": "3.936.0", - "@smithy/node-config-provider": "^4.3.5", - "@smithy/types": "^4.9.0", - "tslib": "^2.6.2" - }, - "engines": { - "node": ">=18.0.0" - }, - "peerDependencies": { - "aws-crt": ">=1.0.0" - }, - "peerDependenciesMeta": { - "aws-crt": { - "optional": true - } - } - }, "node_modules/@aws-sdk/core": { "version": "3.940.0", "resolved": "https://registry.npmjs.org/@aws-sdk/core/-/core-3.940.0.tgz", diff --git a/package.json b/package.json index b89d886..5295b48 100644 --- a/package.json +++ b/package.json @@ -20,7 +20,6 @@ }, "dependencies": { "@aws-sdk/client-s3": "^3.936.0", - "@aws-sdk/client-transcribe": "^3.948.0", "@aws-sdk/s3-request-presigner": "^3.936.0", "@getbrevo/brevo": "^3.0.1", "@prisma/client": "^6.19.0", From 7b1815ebc1ab2582d94eacb12796da47967c0eb2 Mon Sep 17 00:00:00 2001 From: Sergio N Date: Mon, 29 Dec 2025 14:16:25 +0100 Subject: [PATCH 04/22] REMOVE transcribe-related functionality Will be handled by whisper-asr --- server/integrations/aws/Transcribe.js | 56 --------------------------- 1 file changed, 56 deletions(-) delete mode 100644 server/integrations/aws/Transcribe.js diff --git a/server/integrations/aws/Transcribe.js b/server/integrations/aws/Transcribe.js deleted file mode 100644 index 569f3ee..0000000 --- a/server/integrations/aws/Transcribe.js +++ /dev/null @@ -1,56 +0,0 @@ -import { - TranscribeClient, - StartTranscriptionJobCommand, - ListTranscriptionJobsCommand, - DeleteTranscriptionJobCommand, -} from '@aws-sdk/client-transcribe'; -import { getS3Object } from './s3.js'; -import { logInfo } from '../../config/loggerFunctions.js'; - -const transcribeClient = new TranscribeClient({ region: process.env.S3_REGION }); - -export const requestAnalysisEntryTranscriptionToAWSTranscribe = async (transcriptionRequest) => { - const command = new StartTranscriptionJobCommand({ - TranscriptionJobName: transcriptionRequest.analysisEntryId, - LanguageCode: transcriptionRequest.languageCode, - Media: { - MediaFileUri: `s3://${process.env.S3_BUCKET}/analysis/${transcriptionRequest.analysisId}/${transcriptionRequest.analysisEntryId}/recording.mp4`, - }, - OutputBucketName: process.env.S3_BUCKET, - OutputKey: `analysis/${transcriptionRequest.analysisId}/${transcriptionRequest.analysisEntryId}/transcription.json`, - }); - - await transcribeClient.send(command); -}; - -export const listCompletedTranscriptionJobsFromAWS = async () => { - const command = new ListTranscriptionJobsCommand({ - Status: 'COMPLETED', - MaxResults: 10, // Ensure memory is not hogged - if more are available, they will be processed in the next iteration - }); - - const completedTranscriptionJobs = await transcribeClient.send(command); - const completedTranscriptionJobsSummary = completedTranscriptionJobs.TranscriptionJobSummaries; // returns an array - - return completedTranscriptionJobsSummary; -}; - -export const fetchSingleTranscriptionJob = async (analysisId, analysisEntryId) => { - const key = `analysis/${analysisId}/${analysisEntryId}/transcription.json`; - - const transcriptionJobResult = await getS3Object(key); - - return transcriptionJobResult; -}; - -export const deleteCompletedTranscriptionJobFromAWS = async (transcriptionJobName) => { - logInfo(`deleting ${transcriptionJobName} from AWS Transcribe`); - const command = new DeleteTranscriptionJobCommand({ - TranscriptionJobName: transcriptionJobName, - }); - - const deletedTranscriptionJobs = await transcribeClient.send(command); - // returns an array - - return deletedTranscriptionJobs; -}; From 1e5fe7964a76439b6c616b88c01b02f995217275 Mon Sep 17 00:00:00 2001 From: Sergio N Date: Mon, 29 Dec 2025 14:17:16 +0100 Subject: [PATCH 05/22] ADD logic & CRON for pending transcription Jobs from DB --- .../getPendingTranscriptionJobScheduler.js | 11 ++++++++++ server/cron/jobsContainer.js | 8 ++++--- server/models/transcriptionModel.js | 21 +++++++++++++++++-- 3 files changed, 35 insertions(+), 5 deletions(-) create mode 100644 server/cron/getPendingTranscriptionJobScheduler.js diff --git a/server/cron/getPendingTranscriptionJobScheduler.js b/server/cron/getPendingTranscriptionJobScheduler.js new file mode 100644 index 0000000..022a0e4 --- /dev/null +++ b/server/cron/getPendingTranscriptionJobScheduler.js @@ -0,0 +1,11 @@ +import { CronJob } from 'cron'; +import { processPendingTranscriptionJobs } from '../controllers/transcriptionController.js'; +import { logError } from '../config/loggerFunctions.js'; + +export const getPendingTranscriptionJobScheduler = new CronJob('* * * * *', async () => { + try { + await processPendingTranscriptionJobs(); + } catch (error) { + logError('Error processing transcription request', error); + } +}); diff --git a/server/cron/jobsContainer.js b/server/cron/jobsContainer.js index 83f9ac1..86cbc57 100644 --- a/server/cron/jobsContainer.js +++ b/server/cron/jobsContainer.js @@ -1,14 +1,16 @@ -import { logError } from '../config/loggerFunctions.js'; +import { logError, logInfo } from '../config/loggerFunctions.js'; import { deletePasswordResetTokensScheduler } from './deletePasswordResetTokensScheduler.js'; import { getCompletedTranscriptionJobsScheduler } from './getCompletedTranscriptionJobsScheduler.js'; +import { getPendingTranscriptionJobScheduler } from './getPendingTranscriptionJobScheduler.js'; import { markAnalysisEntriesAsCancelledScheduler } from './markAsCancelledAnalysisEntriesScheduler.js'; export const startCronJobs = () => { + logInfo('Starting cron jobs'); try { deletePasswordResetTokensScheduler.start(); - if (process.env.TRANSCRIPTION_ENABLED === true) { - getCompletedTranscriptionJobsScheduler.start(); + if (process.env.TRANSCRIPTION_ENABLED === 'true') { + getPendingTranscriptionJobScheduler.start(); } markAnalysisEntriesAsCancelledScheduler.start(); diff --git a/server/models/transcriptionModel.js b/server/models/transcriptionModel.js index e87cf76..272314b 100644 --- a/server/models/transcriptionModel.js +++ b/server/models/transcriptionModel.js @@ -2,7 +2,7 @@ import { PrismaClient } from '../config/generated/prisma/client/index.js'; const prisma = new PrismaClient(); -export const insertTranscriptionRequestInDb = async (transcriptionRequest) => { +export const insertTranscriptionJobInDb = async (transcriptionRequest) => { await prisma.transcriptionJob.create({ data: { analysis_entry_id: transcriptionRequest.analysisEntryId, @@ -12,7 +12,7 @@ export const insertTranscriptionRequestInDb = async (transcriptionRequest) => { }); }; -export const updateSingleTranscriptionRequestInDb = async (analysisEntryId) => { +export const markInProgressSingleTranscriptionJobInDb = async (analysisEntryId) => { const whereClause = { analysis_entry_id: analysisEntryId, }; @@ -63,3 +63,20 @@ export const storeNormalizedTranscriptionInDb = async (transcriptionJobName, nor }, }); }; + +export const getPendingTranscriptionJobsFromDb = async () => { + const whereClause = { + status: 'PENDING', + }; + + const pendingTranscriptionJobs = await prisma.transcriptionJob.findMany({ + where: whereClause, + select: { + analysis_entry_id: true, + language_code: true, + }, + take: 10, + }); + + return pendingTranscriptionJobs; +}; From 423fa3af21181815703bceb86aa5325c43770758 Mon Sep 17 00:00:00 2001 From: Sergio N Date: Mon, 29 Dec 2025 14:55:21 +0100 Subject: [PATCH 06/22] RENAME aws directory to s3-client --- server/controllers/analysisController.js | 2 +- server/controllers/analysisEntryController.js | 1 + server/integrations/{aws => s3-client}/s3.js | 0 3 files changed, 2 insertions(+), 1 deletion(-) rename server/integrations/{aws => s3-client}/s3.js (100%) diff --git a/server/controllers/analysisController.js b/server/controllers/analysisController.js index 3f21a91..49ec4e1 100644 --- a/server/controllers/analysisController.js +++ b/server/controllers/analysisController.js @@ -7,7 +7,7 @@ import { from '../models/analysisModel.js'; import { createAnalysisEntryInDb } from '../models/analysisEntryModel.js'; -import { generateS3PutPresignedUrl } from '../integrations/aws/s3.js'; +import { generateS3PutPresignedUrl } from '../integrations/s3-client/s3.js'; export const createAnalysis = async (req, res) => { if (req.sanitizedErrors) { diff --git a/server/controllers/analysisEntryController.js b/server/controllers/analysisEntryController.js index 3fa70a5..6c1c4b1 100644 --- a/server/controllers/analysisEntryController.js +++ b/server/controllers/analysisEntryController.js @@ -1,4 +1,5 @@ import { generateS3GetPresignedUrl } from '../integrations/aws/s3.js'; +import { generateS3GetPresignedUrl } from '../integrations/s3-client/s3.js'; import { createAnalysisEntryInDb, getAnalysisEntryDetailsById, markAnalysisEntryAsSubmitted } from '../models/analysisEntryModel.js'; import { processTranscriptionRequest } from '../services/analysisService.js'; diff --git a/server/integrations/aws/s3.js b/server/integrations/s3-client/s3.js similarity index 100% rename from server/integrations/aws/s3.js rename to server/integrations/s3-client/s3.js From 2ecca57b54711aff7146ebd3b0552bdac7e03457 Mon Sep 17 00:00:00 2001 From: Sergio N Date: Mon, 29 Dec 2025 14:55:30 +0100 Subject: [PATCH 07/22] ADD npm package form-data --- package-lock.json | 1 + package.json | 1 + 2 files changed, 2 insertions(+) diff --git a/package-lock.json b/package-lock.json index 7b49763..7e1b230 100644 --- a/package-lock.json +++ b/package-lock.json @@ -24,6 +24,7 @@ "express-session": "^1.18.2", "express-slow-down": "^3.0.1", "express-validator": "^7.3.1", + "form-data": "^4.0.5", "helmet": "^8.1.0", "npm": "^11.6.3", "passport": "^0.7.0", diff --git a/package.json b/package.json index 5295b48..208ebcf 100644 --- a/package.json +++ b/package.json @@ -34,6 +34,7 @@ "express-session": "^1.18.2", "express-slow-down": "^3.0.1", "express-validator": "^7.3.1", + "form-data": "^4.0.5", "helmet": "^8.1.0", "npm": "^11.6.3", "passport": "^0.7.0", From 3978d28afa523f55fa88150e9756a9ce1b33fbbe Mon Sep 17 00:00:00 2001 From: Sergio N Date: Mon, 29 Dec 2025 14:56:01 +0100 Subject: [PATCH 08/22] WIP transcription logic --- .../whisper-asr-webservice/transcribe.js | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 server/integrations/whisper-asr-webservice/transcribe.js diff --git a/server/integrations/whisper-asr-webservice/transcribe.js b/server/integrations/whisper-asr-webservice/transcribe.js new file mode 100644 index 0000000..853ef5f --- /dev/null +++ b/server/integrations/whisper-asr-webservice/transcribe.js @@ -0,0 +1,29 @@ +// Inside your Node.js monolith container + +import axios from 'axios'; + +import fs from 'node:fs'; + +import path from 'node:path'; + +const FormData = require('form-data'); + +const MINIO_VOLUME_PATH = '/Users/martaperezsanchez/repos/minio'; // Shared bind mount +const bucket = process.env.S3_BUCKET + +export const transcribeRecording = async (objectName) => { + const filePath = path.join(MINIO_VOLUME_PATH, bucket, objectName); + + const form = new FormData(); + form.append('audio_file', fs.createReadStream(filePath), 'recording.mp4'); + form.append('task', 'transcribe'); + form.append('output', 'json'); + form.append('word_timestamps', 'true'); // Works with video audio + + const response = await axios.post('http://whisper-asr:9000/asr', form, { + headers: { ...form.getHeaders() }, + timeout: 1200000, // 20min + }); + + return response.data; +}; From d046217a3f73db420f9edc53d5a4bc05e5f7d47a Mon Sep 17 00:00:00 2001 From: Sergio N Date: Mon, 29 Dec 2025 14:59:09 +0100 Subject: [PATCH 09/22] REMOVE deprecated logic relating to AWS Transcribe --- server/controllers/transcriptionController.js | 35 ++++++++++ .../getCompletedTranscriptionJobsScheduler.js | 12 ---- server/cron/jobsContainer.js | 4 -- server/services/analysisService.js | 64 +------------------ 4 files changed, 37 insertions(+), 78 deletions(-) create mode 100644 server/controllers/transcriptionController.js delete mode 100644 server/cron/getCompletedTranscriptionJobsScheduler.js diff --git a/server/controllers/transcriptionController.js b/server/controllers/transcriptionController.js new file mode 100644 index 0000000..6764be1 --- /dev/null +++ b/server/controllers/transcriptionController.js @@ -0,0 +1,35 @@ +import { logError, logInfo } from '../config/loggerFunctions.js'; +import { transcribeRecording } from '../integrations/whisper-asr-webservice/transcribe.js'; +import { getPendingTranscriptionJobsFromDb, markInProgressSingleTranscriptionJobInDb } from '../models/transcriptionModel.js'; + +export const processPendingTranscriptionJobs = async (transcriptionRequest) => { + try { + const pendingTranscriptionJobs = await getPendingTranscriptionJobsFromDb(); + + if (pendingTranscriptionJobs.length === 0) { + logInfo('No pending transcription jobs found'); + return; + } + + console.log('pendingTranscriptionJobs', pendingTranscriptionJobs); + + for (const transcriptionJob of pendingTranscriptionJobs) { + logInfo('Processing transcription job', transcriptionJob); + // await requestAnalysisEntryTranscription(transcriptionJob); + // logInfo('Transcription request sent to AWS Transcribe', transcriptionJob); + + const transcriptionResult = transcribeRecording(transcriptionJob.analysis_entry_id); + + await markInProgressSingleTranscriptionJobInDb(transcriptionJob.analysis_entry_id); + logInfo('Transcription request updated in DB', transcriptionJob); + + // store transcriptionResult & mark job as completed + + logInfo('Transcription result', transcriptionResult); + } + + logInfo('Transcription request updated in DB', transcriptionRequest); + } catch (error) { + logError('Error requesting analysisEntry transcription', error); + } +}; diff --git a/server/cron/getCompletedTranscriptionJobsScheduler.js b/server/cron/getCompletedTranscriptionJobsScheduler.js deleted file mode 100644 index 36a2f44..0000000 --- a/server/cron/getCompletedTranscriptionJobsScheduler.js +++ /dev/null @@ -1,12 +0,0 @@ -import { CronJob } from 'cron'; -import { handleCompletedVideoTranscriptionJobs } from '../services/analysisService.js'; -import { logError, logInfo } from '../config/loggerFunctions.js'; - -export const getCompletedTranscriptionJobsScheduler = new CronJob('15 * * * *', async () => { - try { - logInfo('Checking transcription job status'); - await handleCompletedVideoTranscriptionJobs(); - } catch (error) { - logError('Error checking transcription job status', error); - } -}); diff --git a/server/cron/jobsContainer.js b/server/cron/jobsContainer.js index 86cbc57..9212031 100644 --- a/server/cron/jobsContainer.js +++ b/server/cron/jobsContainer.js @@ -1,6 +1,5 @@ import { logError, logInfo } from '../config/loggerFunctions.js'; import { deletePasswordResetTokensScheduler } from './deletePasswordResetTokensScheduler.js'; -import { getCompletedTranscriptionJobsScheduler } from './getCompletedTranscriptionJobsScheduler.js'; import { getPendingTranscriptionJobScheduler } from './getPendingTranscriptionJobScheduler.js'; import { markAnalysisEntriesAsCancelledScheduler } from './markAsCancelledAnalysisEntriesScheduler.js'; @@ -14,9 +13,6 @@ export const startCronJobs = () => { } markAnalysisEntriesAsCancelledScheduler.start(); - - - console.log('Cron jobs started'); } catch (error) { logError('error on startCronJobs', error); } diff --git a/server/services/analysisService.js b/server/services/analysisService.js index 4ce2f68..dd2c1a4 100644 --- a/server/services/analysisService.js +++ b/server/services/analysisService.js @@ -1,36 +1,12 @@ import { logError, logInfo } from '../config/loggerFunctions.js'; import { - insertTranscriptionRequestInDb, - updateSingleTranscriptionRequestInDb, + insertTranscriptionJobInDb, + markInProgressSingleTranscriptionJobInDb, getSingleTranscriptionJobDetailsFromDb, storeNormalizedTranscriptionInDb, } from '../models/transcriptionModel.js'; -import { - requestAnalysisEntryTranscriptionToAWSTranscribe, deleteCompletedTranscriptionJobFromAWS, - fetchSingleTranscriptionJob, - listCompletedTranscriptionJobsFromAWS, -} from '../integrations/aws/Transcribe.js'; import { normalizeTranscript } from '../utils/transcription/transcriptionNormalizer.js'; -export const processTranscriptionRequest = async (transcriptionRequest) => { - try { - await insertTranscriptionRequestInDb(transcriptionRequest); - logInfo('Transcription request stored in DB', transcriptionRequest); - - try { // Handle errors gracefully - errors will be picked up by a cron job if failed - await requestAnalysisEntryTranscriptionToAWSTranscribe(transcriptionRequest); - logInfo('Transcription request sent to AWS Transcribe', transcriptionRequest); - - await updateSingleTranscriptionRequestInDb(transcriptionRequest.analysisEntryId); - logInfo('Transcription request updated in DB', transcriptionRequest); - } catch (error) { - logError('Error requesting transcription to AWS Transcribe', error); - } - } catch (error) { - logError('Error storing transcription request in DB', error); - } -}; - const processSingleCompletedTranscriptionJob = async (transcriptionJob) => { logInfo(`Processing completed transcription job: ${transcriptionJob.TranscriptionJobName}`); @@ -38,16 +14,6 @@ const processSingleCompletedTranscriptionJob = async (transcriptionJob) => { // 1. Get transcription job details from database const transcriptionJobDetails = await getSingleTranscriptionJobDetailsFromDb(transcriptionJob.TranscriptionJobName); - // Delete from AWS Transcribe if already processed - Shouldnt happen if AWS Transcribe job deletion is working properly - - if (transcriptionJobDetails.status === 'COMPLETED') { // Handle duplicate entries to avoid normalization reprocessing - logInfo(`Deleting already processed job: ${transcriptionJob.TranscriptionJobName}`); - await deleteCompletedTranscriptionJobFromAWS(transcriptionJob.TranscriptionJobName); - } - - // 2. Construct S3 key and fetch transcription file from AWS - const transcriptionJobResultString = await fetchSingleTranscriptionJob(transcriptionJobDetails.AnalysisEntry.analysis_id, transcriptionJob.TranscriptionJobName); - // 3. Parse the transcription job result (JSON string to object) const transcriptionJobResult = JSON.parse(transcriptionJobResultString); @@ -57,34 +23,8 @@ const processSingleCompletedTranscriptionJob = async (transcriptionJob) => { // 5. Store normalized transcript in DB and update status to COMPLETED await storeNormalizedTranscriptionInDb(transcriptionJob.TranscriptionJobName, normalizedTranscriptionJob, transcriptionJobResult); - try { - await deleteCompletedTranscriptionJobFromAWS(transcriptionJob.TranscriptionJobName); - } catch (error) { - logError(`Error deleting transcription job ${transcriptionJob.TranscriptionJobName} from AWS Transcribe`, error); - // AWS Transcribe deletion failing is not an issue since it will be caught by a CRON-based retry mechanism - } - logInfo(`Successfully processed transcription job: ${transcriptionJob.TranscriptionJobName}`); } catch (error) { logError(`Error processing transcription job ${transcriptionJob.TranscriptionJobName}`, error); } }; - -export const handleCompletedVideoTranscriptionJobs = async () => { - try { - // Get the completed Jobs from AWS Transcribe - logInfo('Retrieving completed transcription jobs'); - const completedTranscriptionJobsSummary = await listCompletedTranscriptionJobsFromAWS(); - - if (completedTranscriptionJobsSummary.length === 0) { - logInfo('no completed transcription jobs available to process'); - return; - } - - for (const transcriptionJob of completedTranscriptionJobsSummary) { - await processSingleCompletedTranscriptionJob(transcriptionJob); - } - } catch (error) { - logError('Error processing transcription jobs', error); - } -}; From 98229f3204737142c0f0a001372ae7894320856b Mon Sep 17 00:00:00 2001 From: Sergio N Date: Mon, 29 Dec 2025 15:00:13 +0100 Subject: [PATCH 10/22] REFACTOR transcription job request flow --- server/controllers/analysisEntryController.js | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/server/controllers/analysisEntryController.js b/server/controllers/analysisEntryController.js index 6c1c4b1..33ab70b 100644 --- a/server/controllers/analysisEntryController.js +++ b/server/controllers/analysisEntryController.js @@ -1,7 +1,7 @@ -import { generateS3GetPresignedUrl } from '../integrations/aws/s3.js'; +import { logError, logInfo } from '../config/loggerFunctions.js'; import { generateS3GetPresignedUrl } from '../integrations/s3-client/s3.js'; import { createAnalysisEntryInDb, getAnalysisEntryDetailsById, markAnalysisEntryAsSubmitted } from '../models/analysisEntryModel.js'; -import { processTranscriptionRequest } from '../services/analysisService.js'; +import { insertTranscriptionJobInDb } from '../models/transcriptionModel.js'; export const createAnalysisEntry = async (req, res) => { const { analysisId } = req.body; @@ -27,7 +27,12 @@ export const updateAnalysisEntry = async (req, res) => { }; if (process.env.TRANSCRIPTION_ENABLED === 'true') { - processTranscriptionRequest(transcriptionRequest); // Fire-and-forget + try { + await insertTranscriptionJobInDb(transcriptionRequest); + logInfo('Transcription request stored in DB', transcriptionRequest); + } catch (error) { + logError(`error inserting ${transcriptionRequest.analysisEntryId} analysisEntry's transcription request`); + } } return res.status(200).json({ From 65d2ac3cc99dde161b5d7067f003e2bd8d4ac84a Mon Sep 17 00:00:00 2001 From: Sergio N Date: Tue, 30 Dec 2025 16:20:29 +0100 Subject: [PATCH 11/22] UPDATE transcribe attributes in compose.yaml --- compose.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compose.yaml b/compose.yaml index 8d66175..e974746 100644 --- a/compose.yaml +++ b/compose.yaml @@ -104,13 +104,13 @@ services: - 9006:9000 volumes: - whisper-transcription-models:/root/.cache # Model persistence (~2-5GB) - - /Users/martaperezsanchez/repos/minio:ro # Mount your video storage environment: - ASR_MODEL=medium # Or base/tiny for speed - ASR_DEVICE=cpu - ASR_ENGINE=openai_whisper - MAX_REQUEST_SIZE=5000 # MB for large videos - - MODEL_IDLE_TIMEOUT=900 # Keep model loaded 15min + - MODEL_IDLE_TIMEOUT=900 # in Seconds - Keep model loaded 15min+ + - WORKERS= 2 # handle 2 concurrent requests - be extra-safe to avoid ram overload restart: unless-stopped depends_on: - minio From 6ed84d873cb477710f48b1fc77ad5f5da212d2c9 Mon Sep 17 00:00:00 2001 From: Sergio N Date: Wed, 31 Dec 2025 17:48:59 +0100 Subject: [PATCH 12/22] WIP in transcribe --- compose.yaml | 43 +++++++++++-------- .../whisper-asr-webservice/transcribe.js | 42 ++++++++++++------ server/models/transcriptionModel.js | 2 +- 3 files changed, 55 insertions(+), 32 deletions(-) diff --git a/compose.yaml b/compose.yaml index e974746..ea3a29b 100644 --- a/compose.yaml +++ b/compose.yaml @@ -97,25 +97,30 @@ services: networks: - uxcaptain-network - whisper-transcribe: - image: onerahmet/openai-whisper-asr-webservice:latest - container_name: whisper-asr - ports: - - 9006:9000 - volumes: - - whisper-transcription-models:/root/.cache # Model persistence (~2-5GB) - environment: - - ASR_MODEL=medium # Or base/tiny for speed - - ASR_DEVICE=cpu - - ASR_ENGINE=openai_whisper - - MAX_REQUEST_SIZE=5000 # MB for large videos - - MODEL_IDLE_TIMEOUT=900 # in Seconds - Keep model loaded 15min+ - - WORKERS= 2 # handle 2 concurrent requests - be extra-safe to avoid ram overload - restart: unless-stopped - depends_on: - - minio - networks: - - uxcaptain-network # Same as MinIO/monolith + faster-whisper-transcribe: + image: onerahmet/openai-whisper-asr-webservice:latest + container_name: faster-whisper-asr + ports: + - 9007:9000 + volumes: + - whisper-transcription-models:/root/.cache # Model persistence (~2-5GB) + environment: + - ASR_MODEL=medium # tiny,base,small,medium,large https://ahmetoner.com/whisper-asr-webservice/environmental-variables/#configuring-the-model + - ASR_DEVICE=cpu # https://ahmetoner.com/whisper-asr-webservice/environmental-variables/#configuring-device-and-quantization + - ASR_ENGINE=faster_whisper # openai_whisper, faster_whisper, whisperx -- https://ahmetoner.com/whisper-asr-webservice/environmental-variables/#whisperx + - MODEL_IDLE_TIMEOUT=0 # in Seconds - Keep model loaded - https://ahmetoner.com/whisper-asr-webservice/environmental-variables/#configuring-the-model-unloading-timeout + - ASR_QUANTIZATION=int8 # https://ahmetoner.com/whisper-asr-webservice/environmental-variables/#configuring-device-and-quantization + restart: unless-stopped + depends_on: + - minio + networks: + - uxcaptain-network # Same as MinIO/monolith + # deploy: + # resources: + # limits: + # cpus: '4.0' + # memory: 5000M + diff --git a/server/integrations/whisper-asr-webservice/transcribe.js b/server/integrations/whisper-asr-webservice/transcribe.js index 853ef5f..341afd0 100644 --- a/server/integrations/whisper-asr-webservice/transcribe.js +++ b/server/integrations/whisper-asr-webservice/transcribe.js @@ -1,26 +1,44 @@ -// Inside your Node.js monolith container - import axios from 'axios'; - import fs from 'node:fs'; - import path from 'node:path'; +import FormData from 'form-data'; +import { getS3Object } from '../s3-client/s3'; -const FormData = require('form-data'); const MINIO_VOLUME_PATH = '/Users/martaperezsanchez/repos/minio'; // Shared bind mount -const bucket = process.env.S3_BUCKET +const bucket = process.env.S3_BUCKET; export const transcribeRecording = async (objectName) => { - const filePath = path.join(MINIO_VOLUME_PATH, bucket, objectName); + + // need to use S3 because S3 client (minIO) stores data in a compressed format and cant be accesed via bind mount + + // const fileBuffer = await getS3Object(); + const testFile = '../../../recording3.mp4' + console.log(testFile); // TODO - test this generates a decent transcription before moving on + /* full file path is: + 1- docker service address + 2- bucket -- dev-analysis-entry-storage + 3- analysis -- /analysis + 4- analysis ID - /5a39b038-1c45-4937-9a62-0bede82b69df + 5- analysis entry id -- /52b947ad-456d-4ccf-9bfc-c435ef4c1c6d + 6. file name -- recording.mp4 + + eg: dev-analysis-entry-storage/analysis/5a39b038-1c45-4937-9a62-0bede82b69df/52b947ad-456d-4ccf-9bfc-c435ef4c1c6d + */ + + // Create query parameters + const params = new URLSearchParams({ + task: 'transcribe', + output: 'json', + word_timestamps: 'false', // Works with video audio + }); + // Create form data with only the audio file const form = new FormData(); - form.append('audio_file', fs.createReadStream(filePath), 'recording.mp4'); - form.append('task', 'transcribe'); - form.append('output', 'json'); - form.append('word_timestamps', 'true'); // Works with video audio + form.append('audio_file', fs.createReadStream(fileBuffer), objectName); - const response = await axios.post('http://whisper-asr:9000/asr', form, { + // Make request with query parameters + const response = await axios.post(`http://localhost:9006/asr?${params.toString()}`, form, { headers: { ...form.getHeaders() }, timeout: 1200000, // 20min }); diff --git a/server/models/transcriptionModel.js b/server/models/transcriptionModel.js index 272314b..e820c81 100644 --- a/server/models/transcriptionModel.js +++ b/server/models/transcriptionModel.js @@ -25,7 +25,7 @@ export const markInProgressSingleTranscriptionJobInDb = async (analysisEntryId) }); }; -export const getSingleTranscriptionJobDetailsFromDb = async (transcriptionJobName) => { +export const getSingleTranscriptionJobDetailsFromDb = async (transcriptionJobName) => { // TODO MARKED FOR DELETION const whereClause = { analysis_entry_id: transcriptionJobName, }; From 3317dc09671f4dde69c0f80f3e4b7c8515446f08 Mon Sep 17 00:00:00 2001 From: Sergio N Date: Thu, 1 Jan 2026 21:23:49 +0100 Subject: [PATCH 13/22] UPDATE logging & update transcription request --> transcription job --- server/controllers/analysisEntryController.js | 8 ++++---- server/controllers/transcriptionController.js | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/server/controllers/analysisEntryController.js b/server/controllers/analysisEntryController.js index 33ab70b..9e27d95 100644 --- a/server/controllers/analysisEntryController.js +++ b/server/controllers/analysisEntryController.js @@ -20,7 +20,7 @@ export const updateAnalysisEntry = async (req, res) => { const updatedAnalysisEntry = await markAnalysisEntryAsSubmitted(analysisEntryId); - const transcriptionRequest = { + const transcriptionJob = { analysisEntryId: analysisEntryId, analysisId: updatedAnalysisEntry.analysis_id, languageCode: 'es-ES', @@ -28,10 +28,10 @@ export const updateAnalysisEntry = async (req, res) => { if (process.env.TRANSCRIPTION_ENABLED === 'true') { try { - await insertTranscriptionJobInDb(transcriptionRequest); - logInfo('Transcription request stored in DB', transcriptionRequest); + await insertTranscriptionJobInDb(transcriptionJob); + logInfo(`Transcription job for ${transcriptionJob.analysisEntryId} stored in DB`, transcriptionJob); } catch (error) { - logError(`error inserting ${transcriptionRequest.analysisEntryId} analysisEntry's transcription request`); + logError(`error inserting ${transcriptionJob.analysisEntryId} analysisEntry's transcription request`); } } diff --git a/server/controllers/transcriptionController.js b/server/controllers/transcriptionController.js index 6764be1..365e09a 100644 --- a/server/controllers/transcriptionController.js +++ b/server/controllers/transcriptionController.js @@ -14,7 +14,7 @@ export const processPendingTranscriptionJobs = async (transcriptionRequest) => { console.log('pendingTranscriptionJobs', pendingTranscriptionJobs); for (const transcriptionJob of pendingTranscriptionJobs) { - logInfo('Processing transcription job', transcriptionJob); + logInfo(`Processing transcription job for analysis entry ID: ${transcriptionJob.analysis_entry_id}`, transcriptionJob); // await requestAnalysisEntryTranscription(transcriptionJob); // logInfo('Transcription request sent to AWS Transcribe', transcriptionJob); From b2cc138ab8a24a1a36c33463101bd9301f3a1660 Mon Sep 17 00:00:00 2001 From: Sergio N Date: Thu, 1 Jan 2026 21:25:15 +0100 Subject: [PATCH 14/22] WIP in transcription to send file to transcription service --- .../whisper-asr-webservice/transcribe.js | 32 ++++++------------- server/models/transcriptionModel.js | 5 +++ 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/server/integrations/whisper-asr-webservice/transcribe.js b/server/integrations/whisper-asr-webservice/transcribe.js index 341afd0..537f3ab 100644 --- a/server/integrations/whisper-asr-webservice/transcribe.js +++ b/server/integrations/whisper-asr-webservice/transcribe.js @@ -1,44 +1,32 @@ import axios from 'axios'; import fs from 'node:fs'; -import path from 'node:path'; import FormData from 'form-data'; -import { getS3Object } from '../s3-client/s3'; +import { getS3Object } from '../s3-client/s3.js'; +export const transcribeRecording = async (transcriptionJob) => { + // need to use S3 because S3 client (minIO) stores data in a compressed format and cant be accesed via bind mount -const MINIO_VOLUME_PATH = '/Users/martaperezsanchez/repos/minio'; // Shared bind mount -const bucket = process.env.S3_BUCKET; + const key = `analysis/${transcriptionJob.AnalysisEntry.analysis_id}/${transcriptionJob.analysis_entry_id}/recording.mp4`; -export const transcribeRecording = async (objectName) => { - - // need to use S3 because S3 client (minIO) stores data in a compressed format and cant be accesed via bind mount + console.log('key', key); - // const fileBuffer = await getS3Object(); - const testFile = '../../../recording3.mp4' - console.log(testFile); // TODO - test this generates a decent transcription before moving on - /* full file path is: - 1- docker service address - 2- bucket -- dev-analysis-entry-storage - 3- analysis -- /analysis - 4- analysis ID - /5a39b038-1c45-4937-9a62-0bede82b69df - 5- analysis entry id -- /52b947ad-456d-4ccf-9bfc-c435ef4c1c6d - 6. file name -- recording.mp4 - - eg: dev-analysis-entry-storage/analysis/5a39b038-1c45-4937-9a62-0bede82b69df/52b947ad-456d-4ccf-9bfc-c435ef4c1c6d - */ + const fileBuffer = await getS3Object(key); // Create query parameters const params = new URLSearchParams({ task: 'transcribe', output: 'json', word_timestamps: 'false', // Works with video audio + language: 'es', + vad_filter: 'true', }); // Create form data with only the audio file const form = new FormData(); - form.append('audio_file', fs.createReadStream(fileBuffer), objectName); + form.append('audio_file', fs.createReadStream(fileBuffer)); // Make request with query parameters - const response = await axios.post(`http://localhost:9006/asr?${params.toString()}`, form, { + const response = await axios.post(`http://localhost:9007/asr?${params.toString()}`, form, { headers: { ...form.getHeaders() }, timeout: 1200000, // 20min }); diff --git a/server/models/transcriptionModel.js b/server/models/transcriptionModel.js index e820c81..136df8f 100644 --- a/server/models/transcriptionModel.js +++ b/server/models/transcriptionModel.js @@ -74,6 +74,11 @@ export const getPendingTranscriptionJobsFromDb = async () => { select: { analysis_entry_id: true, language_code: true, + AnalysisEntry: { + select: { + analysis_id: true, + }, + }, }, take: 10, }); From 9566f7b37f62c624a8a8bf86a89d06fa25ffa782 Mon Sep 17 00:00:00 2001 From: Sergio N Date: Fri, 2 Jan 2026 10:23:55 +0100 Subject: [PATCH 15/22] WIP in transcriptionJob processing --- server/controllers/transcriptionController.js | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/server/controllers/transcriptionController.js b/server/controllers/transcriptionController.js index 365e09a..94de0b6 100644 --- a/server/controllers/transcriptionController.js +++ b/server/controllers/transcriptionController.js @@ -1,6 +1,7 @@ import { logError, logInfo } from '../config/loggerFunctions.js'; import { transcribeRecording } from '../integrations/whisper-asr-webservice/transcribe.js'; -import { getPendingTranscriptionJobsFromDb, markInProgressSingleTranscriptionJobInDb } from '../models/transcriptionModel.js'; +import { getPendingTranscriptionJobsFromDb, storeNormalizedTranscriptionInDb } from '../models/transcriptionModel.js'; +import { normalizeTranscript } from '../utils/transcription/transcriptionNormalizer.js'; export const processPendingTranscriptionJobs = async (transcriptionRequest) => { try { @@ -11,21 +12,27 @@ export const processPendingTranscriptionJobs = async (transcriptionRequest) => { return; } - console.log('pendingTranscriptionJobs', pendingTranscriptionJobs); - for (const transcriptionJob of pendingTranscriptionJobs) { logInfo(`Processing transcription job for analysis entry ID: ${transcriptionJob.analysis_entry_id}`, transcriptionJob); - // await requestAnalysisEntryTranscription(transcriptionJob); - // logInfo('Transcription request sent to AWS Transcribe', transcriptionJob); - const transcriptionResult = transcribeRecording(transcriptionJob.analysis_entry_id); + try { + const { analysis_entry_id: analysisEntryId } = transcriptionJob; + + const transcriptionJobResult = await transcribeRecording(transcriptionJob); + + const { segments, text: fullText } = transcriptionJobResult; + // 4. Normalize transcription job result + const normalizedSegments = await normalizeTranscript(segments); - await markInProgressSingleTranscriptionJobInDb(transcriptionJob.analysis_entry_id); - logInfo('Transcription request updated in DB', transcriptionJob); + console.log('normalizedNONparsed', normalizedSegments); - // store transcriptionResult & mark job as completed + // store transcriptionResult & mark job as completed + // await storeNormalizedTranscriptionInDb(analysisEntryId, fullText, normalizedSegments); - logInfo('Transcription result', transcriptionResult); + // logInfo(`Transcription job ${transcriptionJob.analysis_entry_id} completed`, transcriptionJobResult); + } catch (error) { + logError(`Error processing transcription job, ${transcriptionJob.analysis_entry_id}`, error); + } } logInfo('Transcription request updated in DB', transcriptionRequest); From 8e9838d933cf51e1a96b4d53ed23f748236ce54b Mon Sep 17 00:00:00 2001 From: Sergio N Date: Fri, 2 Jan 2026 10:24:38 +0100 Subject: [PATCH 16/22] REFACTOR logic for to download recording from S3 and sending to transcription container --- server/controllers/transcriptionController.js | 45 ++--- server/integrations/s3-client/s3.js | 6 +- .../whisper-asr-webservice/transcribe.js | 13 +- server/models/transcriptionModel.js | 8 +- .../transcription/transcriptionNormalizer.js | 184 +++--------------- 5 files changed, 59 insertions(+), 197 deletions(-) diff --git a/server/controllers/transcriptionController.js b/server/controllers/transcriptionController.js index 94de0b6..b1e2fd4 100644 --- a/server/controllers/transcriptionController.js +++ b/server/controllers/transcriptionController.js @@ -1,42 +1,33 @@ import { logError, logInfo } from '../config/loggerFunctions.js'; import { transcribeRecording } from '../integrations/whisper-asr-webservice/transcribe.js'; import { getPendingTranscriptionJobsFromDb, storeNormalizedTranscriptionInDb } from '../models/transcriptionModel.js'; -import { normalizeTranscript } from '../utils/transcription/transcriptionNormalizer.js'; +import { cleanUpTranscriptSegments } from '../utils/transcription/transcriptionNormalizer.js'; -export const processPendingTranscriptionJobs = async (transcriptionRequest) => { - try { - const pendingTranscriptionJobs = await getPendingTranscriptionJobsFromDb(); +export const processPendingTranscriptionJobs = async () => { + const pendingTranscriptionJobs = await getPendingTranscriptionJobsFromDb(); - if (pendingTranscriptionJobs.length === 0) { - logInfo('No pending transcription jobs found'); - return; - } + if (pendingTranscriptionJobs.length === 0) { + logInfo('No pending transcription jobs found'); + return; + } - for (const transcriptionJob of pendingTranscriptionJobs) { - logInfo(`Processing transcription job for analysis entry ID: ${transcriptionJob.analysis_entry_id}`, transcriptionJob); + for (const transcriptionJob of pendingTranscriptionJobs) { + logInfo(`Processing transcription job for analysis entry ID: ${transcriptionJob.analysis_entry_id}`); - try { - const { analysis_entry_id: analysisEntryId } = transcriptionJob; + try { + const { analysis_entry_id: analysisEntryId } = transcriptionJob; - const transcriptionJobResult = await transcribeRecording(transcriptionJob); + const transcriptionJobResult = await transcribeRecording(transcriptionJob); - const { segments, text: fullText } = transcriptionJobResult; - // 4. Normalize transcription job result - const normalizedSegments = await normalizeTranscript(segments); + const { segments, text: fullText } = transcriptionJobResult; - console.log('normalizedNONparsed', normalizedSegments); + const cleanedUpSegments = await cleanUpTranscriptSegments(segments); - // store transcriptionResult & mark job as completed - // await storeNormalizedTranscriptionInDb(analysisEntryId, fullText, normalizedSegments); + await storeNormalizedTranscriptionInDb(analysisEntryId, fullText, cleanedUpSegments); - // logInfo(`Transcription job ${transcriptionJob.analysis_entry_id} completed`, transcriptionJobResult); - } catch (error) { - logError(`Error processing transcription job, ${transcriptionJob.analysis_entry_id}`, error); - } + logInfo(`Transcription job ${transcriptionJob.analysis_entry_id} completed successfully`); + } catch (error) { + logError(`Error processing transcription job, ${transcriptionJob.analysis_entry_id}`, error); } - - logInfo('Transcription request updated in DB', transcriptionRequest); - } catch (error) { - logError('Error requesting analysisEntry transcription', error); } }; diff --git a/server/integrations/s3-client/s3.js b/server/integrations/s3-client/s3.js index 94c1168..042b1f2 100644 --- a/server/integrations/s3-client/s3.js +++ b/server/integrations/s3-client/s3.js @@ -44,8 +44,8 @@ export const getS3Object = async (key) => { const s3Object = await s3client.send(command); - // Read the response body as a stream and convert to string so it is workable - const responseBody = await s3Object.Body.transformToString(); + // Read the response body as a buffer for binary file handling + const responseBody = await s3Object.Body.transformToByteArray(); - return responseBody; + return Buffer.from(responseBody); }; diff --git a/server/integrations/whisper-asr-webservice/transcribe.js b/server/integrations/whisper-asr-webservice/transcribe.js index 537f3ab..df7e895 100644 --- a/server/integrations/whisper-asr-webservice/transcribe.js +++ b/server/integrations/whisper-asr-webservice/transcribe.js @@ -1,7 +1,8 @@ import axios from 'axios'; -import fs from 'node:fs'; import FormData from 'form-data'; import { getS3Object } from '../s3-client/s3.js'; +import { logError, logInfo } from '../../config/loggerFunctions.js'; +import { markInProgressSingleTranscriptionJobInDb } from '../../models/transcriptionModel.js'; export const transcribeRecording = async (transcriptionJob) => { // need to use S3 because S3 client (minIO) stores data in a compressed format and cant be accesed via bind mount @@ -21,9 +22,12 @@ export const transcribeRecording = async (transcriptionJob) => { vad_filter: 'true', }); - // Create form data with only the audio file + // Create form data with the buffer directly const form = new FormData(); - form.append('audio_file', fs.createReadStream(fileBuffer)); + form.append('audio_file', fileBuffer, { + filename: 'recording.mp4', + contentType: 'video/mp4', + }); // Make request with query parameters const response = await axios.post(`http://localhost:9007/asr?${params.toString()}`, form, { @@ -31,5 +35,8 @@ export const transcribeRecording = async (transcriptionJob) => { timeout: 1200000, // 20min }); + await markInProgressSingleTranscriptionJobInDb(transcriptionJob.analysis_entry_id); // ! unsure how to set this, because if i trigger the transcription, the function will not advance to in progress - maybe handle in progress in transcribe.js? + logInfo('Transcription Job updated in DB', transcriptionJob); + return response.data; }; diff --git a/server/models/transcriptionModel.js b/server/models/transcriptionModel.js index 136df8f..836fc25 100644 --- a/server/models/transcriptionModel.js +++ b/server/models/transcriptionModel.js @@ -45,16 +45,16 @@ export const getSingleTranscriptionJobDetailsFromDb = async (transcriptionJobNam return transcriptionJobDetailsResult; }; -export const storeNormalizedTranscriptionInDb = async (transcriptionJobName, normalizedTranscriptionJob, transcriptionJobResult) => { +export const storeNormalizedTranscriptionInDb = async (analysisEntryId, fullText, normalizedSegments) => { const whereClause = { - id: transcriptionJobName, + id: analysisEntryId, }; await prisma.analysisEntry.update({ where: whereClause, data: { - full_transcript: transcriptionJobResult.results.transcripts[0].transcript, - transcription_segments: normalizedTranscriptionJob.results.segments, + full_transcript: fullText, + transcription_segments: normalizedSegments, transcriptionJob: { update: { status: 'COMPLETED', diff --git a/server/utils/transcription/transcriptionNormalizer.js b/server/utils/transcription/transcriptionNormalizer.js index 500fd11..b5dea7d 100644 --- a/server/utils/transcription/transcriptionNormalizer.js +++ b/server/utils/transcription/transcriptionNormalizer.js @@ -1,10 +1,3 @@ -import { logError } from '../../config/loggerFunctions.js'; - -/** - * Converts string numbers to actual numbers - * @param {string|number} value - Value to convert - * @returns {number} Converted number or 0 if invalid - */ const convertToNumber = (value) => { if (value === undefined || value === null) { return 0; @@ -14,170 +7,41 @@ const convertToNumber = (value) => { return Number.isNaN(num) ? 0 : num; }; -/** - * Groups transcription items into meaningful segments with reduced cluttering - * @param {Array} items - Array of transcription items from AWS Transcribe - * @returns {Array} Array of segments with start_time, end_time, and transcript - */ -const createSegmentsFromItems = (items) => { - if (!items || items.length === 0) { +const createSegmentsFromItems = (segments) => { + if (!segments || segments.length === 0) { return []; } - const segments = []; - let currentSegment = null; - const SEGMENT_GAP_THRESHOLD = 2.0; // seconds - gap for natural pauses to start new segment - const SEGMENT_MAX_DURATION = 25.0; // seconds - maximum duration to avoid overly long segments - - for (let i = 0; i < items.length; i += 1) { - const item = items[i]; - - // Skip items without alternatives - if (!item.alternatives || item.alternatives.length === 0) { - // Skip this iteration - } else { - const alternative = item.alternatives[0]; - const content = alternative.content || ''; - - // Handle items without timing information (like some punctuation) - if (!item.start_time || !item.end_time) { - if (currentSegment && item.type === 'punctuation') { - currentSegment.transcript += content; - } - } else { - const startTime = convertToNumber(item.start_time); - const endTime = convertToNumber(item.end_time); - - // Start a new segment or continue current segment - if (!currentSegment) { - // Start first segment - currentSegment = { - start_time: startTime, - end_time: endTime, - transcript: content, - }; - } else { - // Check if we should start a new segment based on time gap or max duration - const timeGap = startTime - currentSegment.end_time; - const segmentDuration = startTime - currentSegment.start_time; - - // Start new segment if there's a significant gap OR if we've reached max duration - if (timeGap > SEGMENT_GAP_THRESHOLD || segmentDuration >= SEGMENT_MAX_DURATION) { - // Significant gap or max duration reached - finalize current segment and start new one - segments.push(currentSegment); - currentSegment = { - start_time: startTime, - end_time: endTime, - transcript: content, - }; - } else { - // Continue current segment - // Improved logic for adding spaces around punctuation - const currentText = currentSegment.transcript; - const newContent = content.trim(); - - // Don't add space if current text is empty - if (currentText.length === 0) { - currentSegment.transcript += newContent; - } else { - // Get the last character of current text and first character of new content - const lastChar = currentText[currentText.length - 1]; - const firstChar = newContent[0]; - - // Determine if we need to add space - let shouldAddSpace = false; - - // Add space if: - // 1. Current text doesn't end with punctuation and new content doesn't start with punctuation - // 2. Current text ends with punctuation (except quotes/brackets) and new content starts with a letter/number - // 3. Current text ends with letter/number and new content starts with punctuation (.,!?;:) - if (!/[.,!?;:)\]}'"]$/.test(lastChar) && !/^[.,!?;:([{'"]/.test(firstChar)) { - // Neither ends nor starts with punctuation - add space - shouldAddSpace = true; - } else if (/[.,!?;:)]'?]*$/.test(lastChar) && /^[A-Za-zÁÉÍÓÚÑÜáéíóúñü0-9]/.test(firstChar)) { - // Ends with punctuation and starts with letter/number - add space - shouldAddSpace = true; - } else if (/[A-Za-zÁÉÍÓÚÑÜáéíóúñü0-9]$/.test(lastChar) && /^[.,!?;:]/.test(firstChar)) { - // Ends with letter/number and starts with punctuation - don't add space - shouldAddSpace = false; - } else if (/[)\]}'"]$/.test(lastChar) && /^[A-Za-zÁÉÍÓÚÑÜáéíóúñü0-9]/.test(firstChar)) { - // Ends with closing bracket/quote and starts with letter/number - add space - shouldAddSpace = true; - } - - // Special handling for common Spanish patterns - // Add space after periods followed by capital letters (sentence boundaries) - if (/[.] $/.test(lastChar) && /^[A-ZÁÉÍÓÚÑÜ]/.test(firstChar)) { - shouldAddSpace = true; - } + const cleanedSegments = []; - // Add space after commas, semicolons, and colons - if (/[,;:]$/.test(lastChar)) { - shouldAddSpace = true; - } + for (let i = 0; i < segments.length; i += 1) { + const item = segments[i]; - if (shouldAddSpace) { - currentSegment.transcript += ' '; - } + // Process items with required fields + if (item.text && item.start !== undefined && item.end !== undefined) { + // Clean up the segment by removing unnecessary fields and renaming timing fields + const segment = { + start_time: convertToNumber(item.start), + end_time: convertToNumber(item.end), + transcript: item.text.trim(), + }; - currentSegment.transcript += newContent; - } - currentSegment.end_time = endTime; - } - } - } + segments.push(segment); } } - // Add the last segment if it exists - if (currentSegment) { - segments.push(currentSegment); - } - - return segments; + return cleanedSegments; }; -/** - * Normalizes AWS Transcribe output to the required format - * @param {object} transcript - AWS Transcribe output as parsed object - * @returns {Promise} Normalized transcription data - */ -export const normalizeTranscript = async (transcript) => { - try { - // Handle missing or malformed data - if (!transcript) { - return { - status: 'FAILED', - results: { - segments: [], - }, - }; - } - - // Extract status - const status = transcript.status || 'UNKNOWN'; - - // Extract results section - const results = transcript.results || {}; +export const cleanUpTranscriptSegments = async (segments) => { + // Handle missing or malformed data + if (!segments || !Array.isArray(segments)) { + throw new Error('Invalid segments data'); + } - // Extract items array and create segments - const items = results.items || []; - const segments = createSegmentsFromItems(items); + // Create segments from items + const cleanedSegments = createSegmentsFromItems(segments); - // Return normalized structure matching the required format - return { - status, - results: { - segments, - }, - }; - } catch (error) { - logError(`Error normalizing transcript: ${error.message}`, error); - return { - status: 'FAILED', - results: { - segments: [], - }, - }; - } + // Return normalized structure matching the required format + return cleanedSegments; }; From 47b4f1b5562c21f58f04b792481e6952f926c550 Mon Sep 17 00:00:00 2001 From: Sergio N Date: Fri, 2 Jan 2026 11:35:54 +0100 Subject: [PATCH 17/22] FIX transcription logic to return proper segments --- server/integrations/whisper-asr-webservice/transcribe.js | 6 ++---- server/utils/transcription/transcriptionNormalizer.js | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/server/integrations/whisper-asr-webservice/transcribe.js b/server/integrations/whisper-asr-webservice/transcribe.js index df7e895..95fc9a3 100644 --- a/server/integrations/whisper-asr-webservice/transcribe.js +++ b/server/integrations/whisper-asr-webservice/transcribe.js @@ -1,7 +1,7 @@ import axios from 'axios'; import FormData from 'form-data'; import { getS3Object } from '../s3-client/s3.js'; -import { logError, logInfo } from '../../config/loggerFunctions.js'; +import { logInfo } from '../../config/loggerFunctions.js'; import { markInProgressSingleTranscriptionJobInDb } from '../../models/transcriptionModel.js'; export const transcribeRecording = async (transcriptionJob) => { @@ -9,8 +9,6 @@ export const transcribeRecording = async (transcriptionJob) => { const key = `analysis/${transcriptionJob.AnalysisEntry.analysis_id}/${transcriptionJob.analysis_entry_id}/recording.mp4`; - console.log('key', key); - const fileBuffer = await getS3Object(key); // Create query parameters @@ -36,7 +34,7 @@ export const transcribeRecording = async (transcriptionJob) => { }); await markInProgressSingleTranscriptionJobInDb(transcriptionJob.analysis_entry_id); // ! unsure how to set this, because if i trigger the transcription, the function will not advance to in progress - maybe handle in progress in transcribe.js? - logInfo('Transcription Job updated in DB', transcriptionJob); + logInfo(`Transcription marked as in progress for analysis entry ${transcriptionJob.analysis_entry_id} updated in DB`); return response.data; }; diff --git a/server/utils/transcription/transcriptionNormalizer.js b/server/utils/transcription/transcriptionNormalizer.js index b5dea7d..a6e4c8d 100644 --- a/server/utils/transcription/transcriptionNormalizer.js +++ b/server/utils/transcription/transcriptionNormalizer.js @@ -26,7 +26,7 @@ const createSegmentsFromItems = (segments) => { transcript: item.text.trim(), }; - segments.push(segment); + cleanedSegments.push(segment); } } From 0d263719fcb601f16483e97ee2bd797713d59cea Mon Sep 17 00:00:00 2001 From: Sergio N Date: Fri, 2 Jan 2026 11:42:01 +0100 Subject: [PATCH 18/22] REMOVE unused code --- server/models/transcriptionModel.js | 20 ------------------- server/services/analysisService.js | 30 ----------------------------- 2 files changed, 50 deletions(-) delete mode 100644 server/services/analysisService.js diff --git a/server/models/transcriptionModel.js b/server/models/transcriptionModel.js index 836fc25..bbb6087 100644 --- a/server/models/transcriptionModel.js +++ b/server/models/transcriptionModel.js @@ -25,26 +25,6 @@ export const markInProgressSingleTranscriptionJobInDb = async (analysisEntryId) }); }; -export const getSingleTranscriptionJobDetailsFromDb = async (transcriptionJobName) => { // TODO MARKED FOR DELETION - const whereClause = { - analysis_entry_id: transcriptionJobName, - }; - - const transcriptionJobDetailsResult = await prisma.transcriptionJob.findUnique({ - where: whereClause, - select: { - status: true, - AnalysisEntry: { - select: { - analysis_id: true, - }, - }, - }, - }); - - return transcriptionJobDetailsResult; -}; - export const storeNormalizedTranscriptionInDb = async (analysisEntryId, fullText, normalizedSegments) => { const whereClause = { id: analysisEntryId, diff --git a/server/services/analysisService.js b/server/services/analysisService.js deleted file mode 100644 index dd2c1a4..0000000 --- a/server/services/analysisService.js +++ /dev/null @@ -1,30 +0,0 @@ -import { logError, logInfo } from '../config/loggerFunctions.js'; -import { - insertTranscriptionJobInDb, - markInProgressSingleTranscriptionJobInDb, - getSingleTranscriptionJobDetailsFromDb, - storeNormalizedTranscriptionInDb, -} from '../models/transcriptionModel.js'; -import { normalizeTranscript } from '../utils/transcription/transcriptionNormalizer.js'; - -const processSingleCompletedTranscriptionJob = async (transcriptionJob) => { - logInfo(`Processing completed transcription job: ${transcriptionJob.TranscriptionJobName}`); - - try { - // 1. Get transcription job details from database - const transcriptionJobDetails = await getSingleTranscriptionJobDetailsFromDb(transcriptionJob.TranscriptionJobName); - - // 3. Parse the transcription job result (JSON string to object) - const transcriptionJobResult = JSON.parse(transcriptionJobResultString); - - // 4. Normalize transcription job result - const normalizedTranscriptionJob = await normalizeTranscript(transcriptionJobResult); - - // 5. Store normalized transcript in DB and update status to COMPLETED - await storeNormalizedTranscriptionInDb(transcriptionJob.TranscriptionJobName, normalizedTranscriptionJob, transcriptionJobResult); - - logInfo(`Successfully processed transcription job: ${transcriptionJob.TranscriptionJobName}`); - } catch (error) { - logError(`Error processing transcription job ${transcriptionJob.TranscriptionJobName}`, error); - } -}; From 5b9cba0359260f5241c13be2ea299c81ab9c4900 Mon Sep 17 00:00:00 2001 From: Sergio N Date: Fri, 2 Jan 2026 11:44:25 +0100 Subject: [PATCH 19/22] REMOVED transcription_enabled env - not needed anymore --- .env.example | 2 -- server/controllers/analysisEntryController.js | 12 +++++------- server/cron/jobsContainer.js | 4 +--- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/.env.example b/.env.example index c098c48..382e8a1 100644 --- a/.env.example +++ b/.env.example @@ -25,7 +25,5 @@ S3_REGION= S3_BUCKET= S3_ENDPOINT= -TRANSCRIPTION_ENABLED= - MINIO_ROOT_USER= MINIO_ROOT_PASSWORD= \ No newline at end of file diff --git a/server/controllers/analysisEntryController.js b/server/controllers/analysisEntryController.js index 9e27d95..0dc4b93 100644 --- a/server/controllers/analysisEntryController.js +++ b/server/controllers/analysisEntryController.js @@ -26,13 +26,11 @@ export const updateAnalysisEntry = async (req, res) => { languageCode: 'es-ES', }; - if (process.env.TRANSCRIPTION_ENABLED === 'true') { - try { - await insertTranscriptionJobInDb(transcriptionJob); - logInfo(`Transcription job for ${transcriptionJob.analysisEntryId} stored in DB`, transcriptionJob); - } catch (error) { - logError(`error inserting ${transcriptionJob.analysisEntryId} analysisEntry's transcription request`); - } + try { + await insertTranscriptionJobInDb(transcriptionJob); + logInfo(`Transcription job for ${transcriptionJob.analysisEntryId} stored in DB`, transcriptionJob); + } catch (error) { + logError(`error inserting ${transcriptionJob.analysisEntryId} analysisEntry's transcription request`); } return res.status(200).json({ diff --git a/server/cron/jobsContainer.js b/server/cron/jobsContainer.js index 9212031..eda5b71 100644 --- a/server/cron/jobsContainer.js +++ b/server/cron/jobsContainer.js @@ -8,9 +8,7 @@ export const startCronJobs = () => { try { deletePasswordResetTokensScheduler.start(); - if (process.env.TRANSCRIPTION_ENABLED === 'true') { - getPendingTranscriptionJobScheduler.start(); - } + getPendingTranscriptionJobScheduler.start(); markAnalysisEntriesAsCancelledScheduler.start(); } catch (error) { From 0e64562d43adae97b801deed1d4345d01616d4a2 Mon Sep 17 00:00:00 2001 From: Sergio N Date: Fri, 2 Jan 2026 11:56:58 +0100 Subject: [PATCH 20/22] ADD transcription_endpoint env variable --- compose.yaml | 13 ++++++------- server/controllers/analysisEntryController.js | 2 +- .../whisper-asr-webservice/transcribe.js | 5 +++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/compose.yaml b/compose.yaml index ea3a29b..5f8375f 100644 --- a/compose.yaml +++ b/compose.yaml @@ -32,13 +32,12 @@ services: # STRIPE_API_KEY: ${STRIPE_API_KEY} # STRIPE_WEBHOOK_SECRET: ${STRIPE_WEBHOOK_SECRET} # PORT: ${PORT} - # AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID} - # AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY} # S3_BUCKET: dev-analysis-entry-storage # S3_REGION: ${S3_REGION} # S3_ENDPOINT: ${S3_ENDPOINT} # MINIO_ROOT_USER=${MINIO_ROOT_USER} # MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD} + # TRANSCRIPTION_ENDPOINT= ${TRANSCRIPTION_ENDPOINT} # ports: # - 3000:3000 @@ -115,11 +114,11 @@ services: - minio networks: - uxcaptain-network # Same as MinIO/monolith - # deploy: - # resources: - # limits: - # cpus: '4.0' - # memory: 5000M + deploy: + resources: + limits: + cpus: '4.0' + memory: 5000M diff --git a/server/controllers/analysisEntryController.js b/server/controllers/analysisEntryController.js index 0dc4b93..935b19e 100644 --- a/server/controllers/analysisEntryController.js +++ b/server/controllers/analysisEntryController.js @@ -23,7 +23,7 @@ export const updateAnalysisEntry = async (req, res) => { const transcriptionJob = { analysisEntryId: analysisEntryId, analysisId: updatedAnalysisEntry.analysis_id, - languageCode: 'es-ES', + languageCode: 'es', }; try { diff --git a/server/integrations/whisper-asr-webservice/transcribe.js b/server/integrations/whisper-asr-webservice/transcribe.js index 95fc9a3..4bd6334 100644 --- a/server/integrations/whisper-asr-webservice/transcribe.js +++ b/server/integrations/whisper-asr-webservice/transcribe.js @@ -28,12 +28,13 @@ export const transcribeRecording = async (transcriptionJob) => { }); // Make request with query parameters - const response = await axios.post(`http://localhost:9007/asr?${params.toString()}`, form, { + const response = await axios.post(`${process.env.TRANSCRIPTION_ENDPOINT}/asr?${params.toString()}`, form, { headers: { ...form.getHeaders() }, timeout: 1200000, // 20min }); - await markInProgressSingleTranscriptionJobInDb(transcriptionJob.analysis_entry_id); // ! unsure how to set this, because if i trigger the transcription, the function will not advance to in progress - maybe handle in progress in transcribe.js? + markInProgressSingleTranscriptionJobInDb(transcriptionJob.analysis_entry_id); + logInfo(`Transcription marked as in progress for analysis entry ${transcriptionJob.analysis_entry_id} updated in DB`); return response.data; From a5b342d0532445bf8c57eb25830537bee2122fe9 Mon Sep 17 00:00:00 2001 From: Sergio N Date: Fri, 2 Jan 2026 13:50:01 +0100 Subject: [PATCH 21/22] UPDATE logic to update transcriptionJob statutes --- server/controllers/transcriptionController.js | 18 ++++++++++++++---- .../whisper-asr-webservice/transcribe.js | 6 ------ server/models/transcriptionModel.js | 4 ++-- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/server/controllers/transcriptionController.js b/server/controllers/transcriptionController.js index b1e2fd4..2a6330e 100644 --- a/server/controllers/transcriptionController.js +++ b/server/controllers/transcriptionController.js @@ -1,6 +1,10 @@ import { logError, logInfo } from '../config/loggerFunctions.js'; import { transcribeRecording } from '../integrations/whisper-asr-webservice/transcribe.js'; -import { getPendingTranscriptionJobsFromDb, storeNormalizedTranscriptionInDb } from '../models/transcriptionModel.js'; +import { + getPendingTranscriptionJobsFromDb, + storeNormalizedTranscriptionInDb, + updateStatusSingleTranscriptionJobInDb, +} from '../models/transcriptionModel.js'; import { cleanUpTranscriptSegments } from '../utils/transcription/transcriptionNormalizer.js'; export const processPendingTranscriptionJobs = async () => { @@ -14,8 +18,12 @@ export const processPendingTranscriptionJobs = async () => { for (const transcriptionJob of pendingTranscriptionJobs) { logInfo(`Processing transcription job for analysis entry ID: ${transcriptionJob.analysis_entry_id}`); + const { analysis_entry_id: analysisEntryId } = transcriptionJob; + try { - const { analysis_entry_id: analysisEntryId } = transcriptionJob; + // Mark job as IN_PROGRESS before making async call to prevent re-queuing + await updateStatusSingleTranscriptionJobInDb(analysisEntryId, 'IN_PROGRESS'); + logInfo(`Marked transcription job ${analysisEntryId} as IN_PROGRESS`); const transcriptionJobResult = await transcribeRecording(transcriptionJob); @@ -25,9 +33,11 @@ export const processPendingTranscriptionJobs = async () => { await storeNormalizedTranscriptionInDb(analysisEntryId, fullText, cleanedUpSegments); - logInfo(`Transcription job ${transcriptionJob.analysis_entry_id} completed successfully`); + logInfo(`Transcription job ${analysisEntryId} completed successfully`); } catch (error) { - logError(`Error processing transcription job, ${transcriptionJob.analysis_entry_id}`, error); + // Mark job back as PENDING to allow retry + await updateStatusSingleTranscriptionJobInDb(analysisEntryId, 'IN_PROGRESS'); + logError(`Error processing transcription job, ${analysisEntryId}`, error); } } }; diff --git a/server/integrations/whisper-asr-webservice/transcribe.js b/server/integrations/whisper-asr-webservice/transcribe.js index 4bd6334..ff2e09d 100644 --- a/server/integrations/whisper-asr-webservice/transcribe.js +++ b/server/integrations/whisper-asr-webservice/transcribe.js @@ -1,8 +1,6 @@ import axios from 'axios'; import FormData from 'form-data'; import { getS3Object } from '../s3-client/s3.js'; -import { logInfo } from '../../config/loggerFunctions.js'; -import { markInProgressSingleTranscriptionJobInDb } from '../../models/transcriptionModel.js'; export const transcribeRecording = async (transcriptionJob) => { // need to use S3 because S3 client (minIO) stores data in a compressed format and cant be accesed via bind mount @@ -33,9 +31,5 @@ export const transcribeRecording = async (transcriptionJob) => { timeout: 1200000, // 20min }); - markInProgressSingleTranscriptionJobInDb(transcriptionJob.analysis_entry_id); - - logInfo(`Transcription marked as in progress for analysis entry ${transcriptionJob.analysis_entry_id} updated in DB`); - return response.data; }; diff --git a/server/models/transcriptionModel.js b/server/models/transcriptionModel.js index bbb6087..ba52834 100644 --- a/server/models/transcriptionModel.js +++ b/server/models/transcriptionModel.js @@ -12,7 +12,7 @@ export const insertTranscriptionJobInDb = async (transcriptionRequest) => { }); }; -export const markInProgressSingleTranscriptionJobInDb = async (analysisEntryId) => { +export const updateStatusSingleTranscriptionJobInDb = async (analysisEntryId, status) => { const whereClause = { analysis_entry_id: analysisEntryId, }; @@ -20,7 +20,7 @@ export const markInProgressSingleTranscriptionJobInDb = async (analysisEntryId) await prisma.transcriptionJob.update({ where: whereClause, data: { - status: 'IN_PROGRESS', + status: status, }, }); }; From 480ff0c374127c346460068a6ea746b5585ae320 Mon Sep 17 00:00:00 2001 From: Sergio N Date: Fri, 2 Jan 2026 13:56:31 +0100 Subject: [PATCH 22/22] UPDATE cron job scheduler to 1 min for prod replica --- server/cron/getPendingTranscriptionJobScheduler.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/cron/getPendingTranscriptionJobScheduler.js b/server/cron/getPendingTranscriptionJobScheduler.js index 022a0e4..23ddb09 100644 --- a/server/cron/getPendingTranscriptionJobScheduler.js +++ b/server/cron/getPendingTranscriptionJobScheduler.js @@ -2,7 +2,7 @@ import { CronJob } from 'cron'; import { processPendingTranscriptionJobs } from '../controllers/transcriptionController.js'; import { logError } from '../config/loggerFunctions.js'; -export const getPendingTranscriptionJobScheduler = new CronJob('* * * * *', async () => { +export const getPendingTranscriptionJobScheduler = new CronJob('15 * * * *', async () => { try { await processPendingTranscriptionJobs(); } catch (error) {