Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ python_test_code/
!python_test_code/pyproject.toml
!python_test_code/uv.lock
!python_test_code/test_unified_suite.py
!python_test_code/test_metrics.py
47 changes: 47 additions & 0 deletions backend/src/api/metrics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import { Elysia } from "elysia";
import { generatePrometheusMetrics } from "@/services/prometheus";

/**
* Prometheus metrics endpoint
* Exposes operational metrics in Prometheus exposition format
*
* SECURITY NOTE: This endpoint is intentionally public (no authentication required).
* This is a deliberate design choice because:
*
* 1. Standard Practice: Prometheus metrics endpoints are typically unauthenticated
* to allow easy scraping by monitoring systems.
*
* 2. Operational Data Only: The metrics expose only aggregated operational data
* (request counts, latencies, token usage, error rates). No sensitive data
* like API keys, request/response content, or user data is exposed.
*
* 3. API Key Privacy: The `api_key_comment` label is used instead of the actual
* API key value, providing meaningful aggregation without exposing secrets.
*
* 4. Network Security: In production deployments, network-level security (firewall
* rules, VPC, ingress policies) should restrict access to the metrics endpoint
* to authorized monitoring systems only.
*
* If stricter security is required, consider:
* - Using network policies to restrict access to Prometheus scrapers
* - Deploying a metrics proxy with authentication
* - Adding optional bearer token authentication via environment variable
*/
export const metricsApi = new Elysia().get(
"/metrics",
async () => {
const metrics = await generatePrometheusMetrics();
return new Response(metrics, {
headers: {
"Content-Type": "text/plain; version=0.0.4; charset=utf-8",
},
});
},
Comment thread
pescn marked this conversation as resolved.
{
detail: {
description:
"Prometheus metrics endpoint. Returns operational metrics in Prometheus exposition format.",
tags: ["Metrics"],
},
},
);
178 changes: 178 additions & 0 deletions backend/src/db/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1347,3 +1347,181 @@ export async function updateCompletion(
const [first] = r;
return first ?? null;
}

// ============================================
// Prometheus Metrics Operations
// ============================================

/**
* Get completion metrics grouped by model, status, and api_format
* Returns all-time totals for Prometheus counters
* Joins with api_keys table to get api_key_comment for meaningful aggregation
*/
export async function getCompletionMetricsByModelAndStatus() {
logger.debug("getCompletionMetricsByModelAndStatus");
const result = await db.execute(sql`
SELECT
c.model,
c.status,
c.api_format,
COALESCE(ak.comment, 'unknown') AS api_key_comment,
COUNT(*) AS count,
COALESCE(SUM(CASE WHEN c.prompt_tokens > 0 THEN c.prompt_tokens ELSE 0 END), 0) AS prompt_tokens,
COALESCE(SUM(CASE WHEN c.completion_tokens > 0 THEN c.completion_tokens ELSE 0 END), 0) AS completion_tokens
FROM completions c
LEFT JOIN api_keys ak ON c.api_key_id = ak.id
WHERE c.deleted = false
GROUP BY c.model, c.status, c.api_format, ak.comment
`);
return result as unknown as {
model: string;
status: string;
api_format: string | null;
api_key_comment: string;
count: string;
prompt_tokens: string;
completion_tokens: string;
}[];
}

/**
* Get embedding metrics grouped by model and status
* Returns all-time totals for Prometheus counters
* Joins with api_keys table to get api_key_comment for meaningful aggregation
*/
export async function getEmbeddingMetricsByModelAndStatus() {
logger.debug("getEmbeddingMetricsByModelAndStatus");
const result = await db.execute(sql`
SELECT
e.model,
e.status,
COALESCE(ak.comment, 'unknown') AS api_key_comment,
COUNT(*) AS count,
COALESCE(SUM(CASE WHEN e.input_tokens > 0 THEN e.input_tokens ELSE 0 END), 0) AS input_tokens
FROM embeddings e
LEFT JOIN api_keys ak ON e.api_key_id = ak.id
WHERE e.deleted = false
GROUP BY e.model, e.status, ak.comment
`);
return result as unknown as {
model: string;
status: string;
api_key_comment: string;
count: string;
input_tokens: string;
}[];
}

// Histogram bucket boundaries in milliseconds (for LLM latency)
export const LATENCY_BUCKETS_MS = [100, 250, 500, 1000, 2500, 5000, 10000, 30000, 60000, 120000];

// Pre-computed bucket case SQL fragments (constant, computed once at module load)
const DURATION_BUCKET_CASES = LATENCY_BUCKETS_MS.map(
(b) => `SUM(CASE WHEN duration <= ${b} THEN 1 ELSE 0 END) AS bucket_${b}`,
).join(",\n ");

const TTFT_BUCKET_CASES = LATENCY_BUCKETS_MS.map(
(b) => `SUM(CASE WHEN ttft <= ${b} THEN 1 ELSE 0 END) AS bucket_${b}`,
).join(",\n ");

/**
* Get completion duration histogram data grouped by model
* Duration is stored in milliseconds in the database
*
* Note: We use SUM(duration) not AVG because Prometheus histogram format requires
* the total sum of all observations (_sum metric). Average can be computed by
* Prometheus as sum/count when needed.
*/
export async function getCompletionDurationHistogram() {
logger.debug("getCompletionDurationHistogram");
const result = await db.execute(sql.raw(`
SELECT
model,
${DURATION_BUCKET_CASES},
COUNT(*) AS total_count,
COALESCE(SUM(duration), 0) AS duration_sum
FROM completions
WHERE deleted = false AND duration > 0
GROUP BY model
`));
return result as unknown as Record<string, string>[];
}

/**
* Get completion TTFT (Time To First Token) histogram data grouped by model
* TTFT is stored in milliseconds in the database
*/
export async function getCompletionTTFTHistogram() {
logger.debug("getCompletionTTFTHistogram");
const result = await db.execute(sql.raw(`
SELECT
model,
${TTFT_BUCKET_CASES},
COUNT(*) AS total_count,
COALESCE(SUM(ttft), 0) AS ttft_sum
FROM completions
WHERE deleted = false AND ttft > 0 AND status = 'completed'
GROUP BY model
`));
return result as unknown as Record<string, string>[];
}

/**
* Get embedding duration histogram data grouped by model
* Duration is stored in milliseconds in the database
*/
export async function getEmbeddingDurationHistogram() {
logger.debug("getEmbeddingDurationHistogram");
const result = await db.execute(sql.raw(`
SELECT
model,
${DURATION_BUCKET_CASES},
COUNT(*) AS total_count,
COALESCE(SUM(duration), 0) AS duration_sum
FROM embeddings
WHERE deleted = false AND duration > 0
GROUP BY model
`));
return result as unknown as Record<string, string>[];
}

/**
* Get API key rate limit configuration for Prometheus metrics
* Returns all active (non-revoked) API keys with their rate limits
*/
export async function getApiKeyRateLimitConfig() {
logger.debug("getApiKeyRateLimitConfig");
return await db
.select({
id: schema.ApiKeysTable.id,
comment: schema.ApiKeysTable.comment,
rpmLimit: schema.ApiKeysTable.rpmLimit,
tpmLimit: schema.ApiKeysTable.tpmLimit,
})
.from(schema.ApiKeysTable)
.where(not(schema.ApiKeysTable.revoked));
}

/**
* Get counts of active entities for Prometheus gauges
* Uses a single query with subqueries for efficiency (one DB round-trip)
*/
export async function getActiveEntityCounts() {
logger.debug("getActiveEntityCounts");

const result = await db.execute(sql`
SELECT
(SELECT COUNT(*) FROM api_keys WHERE NOT revoked) AS api_keys,
(SELECT COUNT(*) FROM providers WHERE NOT deleted) AS providers,
(SELECT COUNT(*) FROM models WHERE NOT deleted AND model_type = 'chat') AS chat_models,
(SELECT COUNT(*) FROM models WHERE NOT deleted AND model_type = 'embedding') AS embedding_models
`);

const row = (result as unknown as Record<string, string>[])[0];
return {
apiKeys: Number(row?.api_keys ?? 0),
providers: Number(row?.providers ?? 0),
chatModels: Number(row?.chat_models ?? 0),
embeddingModels: Number(row?.embedding_models ?? 0),
};
}
6 changes: 4 additions & 2 deletions backend/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ async function exists(path: string): Promise<boolean> {
}
import { join } from "node:path";
import { routes } from "@/api";
import { metricsApi } from "@/api/metrics";
import { loggerPlugin } from "@/plugins/loggerPlugin";
import {
ALLOWED_ORIGINS,
Expand Down Expand Up @@ -151,8 +152,8 @@ async function spaPlugin(dir: string) {
if (path.startsWith("/docs") || path.startsWith("/__tsr")) {
return status(404);
}
// Skip API routes
if (path.startsWith("/api") || path.startsWith("/v1")) {
// Skip API routes and metrics (include trailing slash to prevent SPA fallback)
if (path.startsWith("/api") || path.startsWith("/v1") || path === "/metrics" || path === "/metrics/") {
return status(404);
}

Expand Down Expand Up @@ -205,6 +206,7 @@ const app = new Elysia()
)
.use(serverTiming())
.use(routes)
.use(metricsApi)
.use(await docsPlugin(DOCS_DIR))
.use(await spaPlugin(FRONTEND_DIR))
.listen({
Expand Down
39 changes: 39 additions & 0 deletions backend/src/plugins/apiKeyRateLimitPlugin.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,43 @@
import { consola } from "consola";
import { Elysia } from "elysia";
import { apiKeyPlugin } from "./apiKeyPlugin";
import { checkRpmLimit, checkTpmLimit } from "@/utils/apiKeyRateLimit";
import { redisClient } from "@/utils/redisClient";

// Re-export consumeTokens for use in API handlers
export { consumeTokens } from "@/utils/apiKeyRateLimit";

const logger = consola.withTag("apiKeyRateLimitPlugin");

// Redis key for tracking rate limit rejections (for Prometheus metrics)
const RATE_LIMIT_REJECTIONS_KEY = "nexusgate:metrics:rate_limit_rejections";

/**
* Track a rate limit rejection in Redis for Prometheus metrics
* @param apiKeyComment The API key comment for label
* @param limitType Type of limit exceeded ('rpm' or 'tpm')
*/
async function trackRateLimitRejection(
apiKeyComment: string | null,
limitType: "rpm" | "tpm",
): Promise<void> {
try {
const field = `${apiKeyComment ?? "unknown"}:${limitType}`;
await redisClient.hincrby(RATE_LIMIT_REJECTIONS_KEY, field, 1);
} catch (error) {
logger.error("Failed to track rate limit rejection:", error);
}
}

/**
* Get all rate limit rejections from Redis for Prometheus metrics
*/
export async function getRateLimitRejections(): Promise<
Record<string, string>
> {
return await redisClient.hgetall(RATE_LIMIT_REJECTIONS_KEY);
}

/**
* OpenAI-compatible rate limit error response
*/
Expand Down Expand Up @@ -39,6 +72,9 @@ export const apiKeyRateLimitPlugin = new Elysia({
);

if (!rpmResult.allowed) {
// Track rejection for Prometheus metrics
await trackRateLimitRejection(apiKeyRecord.comment, "rpm");

set.headers["X-RateLimit-Limit-RPM"] =
apiKeyRecord.rpmLimit.toString();
set.headers["X-RateLimit-Remaining-RPM"] = "0";
Expand All @@ -59,6 +95,9 @@ export const apiKeyRateLimitPlugin = new Elysia({
);

if (!tpmResult.allowed) {
// Track rejection for Prometheus metrics
await trackRateLimitRejection(apiKeyRecord.comment, "tpm");

set.headers["X-RateLimit-Limit-TPM"] =
apiKeyRecord.tpmLimit.toString();
set.headers["X-RateLimit-Remaining-TPM"] = "0";
Expand Down
Loading