diff --git a/.gitignore b/.gitignore index c4641bb..d82cdb7 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,4 @@ python_test_code/ !python_test_code/pyproject.toml !python_test_code/uv.lock !python_test_code/test_unified_suite.py +!python_test_code/test_metrics.py diff --git a/backend/src/api/metrics.ts b/backend/src/api/metrics.ts new file mode 100644 index 0000000..fb711a7 --- /dev/null +++ b/backend/src/api/metrics.ts @@ -0,0 +1,47 @@ +import { Elysia } from "elysia"; +import { generatePrometheusMetrics } from "@/services/prometheus"; + +/** + * Prometheus metrics endpoint + * Exposes operational metrics in Prometheus exposition format + * + * SECURITY NOTE: This endpoint is intentionally public (no authentication required). + * This is a deliberate design choice because: + * + * 1. Standard Practice: Prometheus metrics endpoints are typically unauthenticated + * to allow easy scraping by monitoring systems. + * + * 2. Operational Data Only: The metrics expose only aggregated operational data + * (request counts, latencies, token usage, error rates). No sensitive data + * like API keys, request/response content, or user data is exposed. + * + * 3. API Key Privacy: The `api_key_comment` label is used instead of the actual + * API key value, providing meaningful aggregation without exposing secrets. + * + * 4. Network Security: In production deployments, network-level security (firewall + * rules, VPC, ingress policies) should restrict access to the metrics endpoint + * to authorized monitoring systems only. + * + * If stricter security is required, consider: + * - Using network policies to restrict access to Prometheus scrapers + * - Deploying a metrics proxy with authentication + * - Adding optional bearer token authentication via environment variable + */ +export const metricsApi = new Elysia().get( + "/metrics", + async () => { + const metrics = await generatePrometheusMetrics(); + return new Response(metrics, { + headers: { + "Content-Type": "text/plain; version=0.0.4; charset=utf-8", + }, + }); + }, + { + detail: { + description: + "Prometheus metrics endpoint. Returns operational metrics in Prometheus exposition format.", + tags: ["Metrics"], + }, + }, +); diff --git a/backend/src/db/index.ts b/backend/src/db/index.ts index e46142b..f0e8431 100644 --- a/backend/src/db/index.ts +++ b/backend/src/db/index.ts @@ -1347,3 +1347,181 @@ export async function updateCompletion( const [first] = r; return first ?? null; } + +// ============================================ +// Prometheus Metrics Operations +// ============================================ + +/** + * Get completion metrics grouped by model, status, and api_format + * Returns all-time totals for Prometheus counters + * Joins with api_keys table to get api_key_comment for meaningful aggregation + */ +export async function getCompletionMetricsByModelAndStatus() { + logger.debug("getCompletionMetricsByModelAndStatus"); + const result = await db.execute(sql` + SELECT + c.model, + c.status, + c.api_format, + COALESCE(ak.comment, 'unknown') AS api_key_comment, + COUNT(*) AS count, + COALESCE(SUM(CASE WHEN c.prompt_tokens > 0 THEN c.prompt_tokens ELSE 0 END), 0) AS prompt_tokens, + COALESCE(SUM(CASE WHEN c.completion_tokens > 0 THEN c.completion_tokens ELSE 0 END), 0) AS completion_tokens + FROM completions c + LEFT JOIN api_keys ak ON c.api_key_id = ak.id + WHERE c.deleted = false + GROUP BY c.model, c.status, c.api_format, ak.comment + `); + return result as unknown as { + model: string; + status: string; + api_format: string | null; + api_key_comment: string; + count: string; + prompt_tokens: string; + completion_tokens: string; + }[]; +} + +/** + * Get embedding metrics grouped by model and status + * Returns all-time totals for Prometheus counters + * Joins with api_keys table to get api_key_comment for meaningful aggregation + */ +export async function getEmbeddingMetricsByModelAndStatus() { + logger.debug("getEmbeddingMetricsByModelAndStatus"); + const result = await db.execute(sql` + SELECT + e.model, + e.status, + COALESCE(ak.comment, 'unknown') AS api_key_comment, + COUNT(*) AS count, + COALESCE(SUM(CASE WHEN e.input_tokens > 0 THEN e.input_tokens ELSE 0 END), 0) AS input_tokens + FROM embeddings e + LEFT JOIN api_keys ak ON e.api_key_id = ak.id + WHERE e.deleted = false + GROUP BY e.model, e.status, ak.comment + `); + return result as unknown as { + model: string; + status: string; + api_key_comment: string; + count: string; + input_tokens: string; + }[]; +} + +// Histogram bucket boundaries in milliseconds (for LLM latency) +export const LATENCY_BUCKETS_MS = [100, 250, 500, 1000, 2500, 5000, 10000, 30000, 60000, 120000]; + +// Pre-computed bucket case SQL fragments (constant, computed once at module load) +const DURATION_BUCKET_CASES = LATENCY_BUCKETS_MS.map( + (b) => `SUM(CASE WHEN duration <= ${b} THEN 1 ELSE 0 END) AS bucket_${b}`, +).join(",\n "); + +const TTFT_BUCKET_CASES = LATENCY_BUCKETS_MS.map( + (b) => `SUM(CASE WHEN ttft <= ${b} THEN 1 ELSE 0 END) AS bucket_${b}`, +).join(",\n "); + +/** + * Get completion duration histogram data grouped by model + * Duration is stored in milliseconds in the database + * + * Note: We use SUM(duration) not AVG because Prometheus histogram format requires + * the total sum of all observations (_sum metric). Average can be computed by + * Prometheus as sum/count when needed. + */ +export async function getCompletionDurationHistogram() { + logger.debug("getCompletionDurationHistogram"); + const result = await db.execute(sql.raw(` + SELECT + model, + ${DURATION_BUCKET_CASES}, + COUNT(*) AS total_count, + COALESCE(SUM(duration), 0) AS duration_sum + FROM completions + WHERE deleted = false AND duration > 0 + GROUP BY model + `)); + return result as unknown as Record[]; +} + +/** + * Get completion TTFT (Time To First Token) histogram data grouped by model + * TTFT is stored in milliseconds in the database + */ +export async function getCompletionTTFTHistogram() { + logger.debug("getCompletionTTFTHistogram"); + const result = await db.execute(sql.raw(` + SELECT + model, + ${TTFT_BUCKET_CASES}, + COUNT(*) AS total_count, + COALESCE(SUM(ttft), 0) AS ttft_sum + FROM completions + WHERE deleted = false AND ttft > 0 AND status = 'completed' + GROUP BY model + `)); + return result as unknown as Record[]; +} + +/** + * Get embedding duration histogram data grouped by model + * Duration is stored in milliseconds in the database + */ +export async function getEmbeddingDurationHistogram() { + logger.debug("getEmbeddingDurationHistogram"); + const result = await db.execute(sql.raw(` + SELECT + model, + ${DURATION_BUCKET_CASES}, + COUNT(*) AS total_count, + COALESCE(SUM(duration), 0) AS duration_sum + FROM embeddings + WHERE deleted = false AND duration > 0 + GROUP BY model + `)); + return result as unknown as Record[]; +} + +/** + * Get API key rate limit configuration for Prometheus metrics + * Returns all active (non-revoked) API keys with their rate limits + */ +export async function getApiKeyRateLimitConfig() { + logger.debug("getApiKeyRateLimitConfig"); + return await db + .select({ + id: schema.ApiKeysTable.id, + comment: schema.ApiKeysTable.comment, + rpmLimit: schema.ApiKeysTable.rpmLimit, + tpmLimit: schema.ApiKeysTable.tpmLimit, + }) + .from(schema.ApiKeysTable) + .where(not(schema.ApiKeysTable.revoked)); +} + +/** + * Get counts of active entities for Prometheus gauges + * Uses a single query with subqueries for efficiency (one DB round-trip) + */ +export async function getActiveEntityCounts() { + logger.debug("getActiveEntityCounts"); + + const result = await db.execute(sql` + SELECT + (SELECT COUNT(*) FROM api_keys WHERE NOT revoked) AS api_keys, + (SELECT COUNT(*) FROM providers WHERE NOT deleted) AS providers, + (SELECT COUNT(*) FROM models WHERE NOT deleted AND model_type = 'chat') AS chat_models, + (SELECT COUNT(*) FROM models WHERE NOT deleted AND model_type = 'embedding') AS embedding_models + `); + + const row = (result as unknown as Record[])[0]; + return { + apiKeys: Number(row?.api_keys ?? 0), + providers: Number(row?.providers ?? 0), + chatModels: Number(row?.chat_models ?? 0), + embeddingModels: Number(row?.embedding_models ?? 0), + }; +} diff --git a/backend/src/index.ts b/backend/src/index.ts index 39b8cee..613f313 100644 --- a/backend/src/index.ts +++ b/backend/src/index.ts @@ -16,6 +16,7 @@ async function exists(path: string): Promise { } import { join } from "node:path"; import { routes } from "@/api"; +import { metricsApi } from "@/api/metrics"; import { loggerPlugin } from "@/plugins/loggerPlugin"; import { ALLOWED_ORIGINS, @@ -151,8 +152,8 @@ async function spaPlugin(dir: string) { if (path.startsWith("/docs") || path.startsWith("/__tsr")) { return status(404); } - // Skip API routes - if (path.startsWith("/api") || path.startsWith("/v1")) { + // Skip API routes and metrics (include trailing slash to prevent SPA fallback) + if (path.startsWith("/api") || path.startsWith("/v1") || path === "/metrics" || path === "/metrics/") { return status(404); } @@ -205,6 +206,7 @@ const app = new Elysia() ) .use(serverTiming()) .use(routes) + .use(metricsApi) .use(await docsPlugin(DOCS_DIR)) .use(await spaPlugin(FRONTEND_DIR)) .listen({ diff --git a/backend/src/plugins/apiKeyRateLimitPlugin.ts b/backend/src/plugins/apiKeyRateLimitPlugin.ts index a00cf3d..978dfc0 100644 --- a/backend/src/plugins/apiKeyRateLimitPlugin.ts +++ b/backend/src/plugins/apiKeyRateLimitPlugin.ts @@ -1,10 +1,43 @@ +import { consola } from "consola"; import { Elysia } from "elysia"; import { apiKeyPlugin } from "./apiKeyPlugin"; import { checkRpmLimit, checkTpmLimit } from "@/utils/apiKeyRateLimit"; +import { redisClient } from "@/utils/redisClient"; // Re-export consumeTokens for use in API handlers export { consumeTokens } from "@/utils/apiKeyRateLimit"; +const logger = consola.withTag("apiKeyRateLimitPlugin"); + +// Redis key for tracking rate limit rejections (for Prometheus metrics) +const RATE_LIMIT_REJECTIONS_KEY = "nexusgate:metrics:rate_limit_rejections"; + +/** + * Track a rate limit rejection in Redis for Prometheus metrics + * @param apiKeyComment The API key comment for label + * @param limitType Type of limit exceeded ('rpm' or 'tpm') + */ +async function trackRateLimitRejection( + apiKeyComment: string | null, + limitType: "rpm" | "tpm", +): Promise { + try { + const field = `${apiKeyComment ?? "unknown"}:${limitType}`; + await redisClient.hincrby(RATE_LIMIT_REJECTIONS_KEY, field, 1); + } catch (error) { + logger.error("Failed to track rate limit rejection:", error); + } +} + +/** + * Get all rate limit rejections from Redis for Prometheus metrics + */ +export async function getRateLimitRejections(): Promise< + Record +> { + return await redisClient.hgetall(RATE_LIMIT_REJECTIONS_KEY); +} + /** * OpenAI-compatible rate limit error response */ @@ -39,6 +72,9 @@ export const apiKeyRateLimitPlugin = new Elysia({ ); if (!rpmResult.allowed) { + // Track rejection for Prometheus metrics + await trackRateLimitRejection(apiKeyRecord.comment, "rpm"); + set.headers["X-RateLimit-Limit-RPM"] = apiKeyRecord.rpmLimit.toString(); set.headers["X-RateLimit-Remaining-RPM"] = "0"; @@ -59,6 +95,9 @@ export const apiKeyRateLimitPlugin = new Elysia({ ); if (!tpmResult.allowed) { + // Track rejection for Prometheus metrics + await trackRateLimitRejection(apiKeyRecord.comment, "tpm"); + set.headers["X-RateLimit-Limit-TPM"] = apiKeyRecord.tpmLimit.toString(); set.headers["X-RateLimit-Remaining-TPM"] = "0"; diff --git a/backend/src/services/prometheus.ts b/backend/src/services/prometheus.ts new file mode 100644 index 0000000..58b3426 --- /dev/null +++ b/backend/src/services/prometheus.ts @@ -0,0 +1,498 @@ +import { consola } from "consola"; +import { + getCompletionMetricsByModelAndStatus, + getEmbeddingMetricsByModelAndStatus, + getCompletionDurationHistogram, + getCompletionTTFTHistogram, + getEmbeddingDurationHistogram, + getActiveEntityCounts, + getApiKeyRateLimitConfig, + LATENCY_BUCKETS_MS, +} from "@/db"; +import { COMMIT_SHA, METRICS_CACHE_TTL_SECONDS } from "@/utils/config"; +import { redisClient } from "@/utils/redisClient"; +import { getRateLimitStatus } from "@/utils/apiKeyRateLimit"; +import { getRateLimitRejections } from "@/plugins/apiKeyRateLimitPlugin"; + +const logger = consola.withTag("prometheus"); + +// Redis cache key for metrics +const METRICS_CACHE_KEY = "nexusgate:metrics:cache"; + +// Convert milliseconds to seconds for Prometheus (standard unit) +const LATENCY_BUCKETS_SEC = LATENCY_BUCKETS_MS.map((ms) => ms / 1000); + +/** + * Escape label values according to Prometheus format + * Backslash, double-quote, and newline must be escaped + */ +function escapeLabelValue(value: string): string { + return value + .replace(/\\/g, "\\\\") + .replace(/"/g, '\\"') + .replace(/\n/g, "\\n"); +} + +/** + * Format labels as Prometheus label string + */ +function formatLabels(labels: Record): string { + const parts: string[] = []; + for (const [key, value] of Object.entries(labels)) { + if (value !== null && value !== undefined && value !== "") { + parts.push(`${key}="${escapeLabelValue(String(value))}"`); + } + } + return parts.length > 0 ? `{${parts.join(",")}}` : ""; +} + +interface MetricValue { + labels: Record; + value: number; +} + +/** + * Format a counter metric in Prometheus exposition format + */ +function formatCounter(name: string, help: string, values: MetricValue[]): string { + const lines: string[] = [ + `# HELP ${name} ${help}`, + `# TYPE ${name} counter`, + ]; + for (const { labels, value } of values) { + lines.push(`${name}${formatLabels(labels)} ${value}`); + } + return lines.join("\n"); +} + +/** + * Format a gauge metric in Prometheus exposition format + */ +function formatGauge(name: string, help: string, values: MetricValue[]): string { + const lines: string[] = [ + `# HELP ${name} ${help}`, + `# TYPE ${name} gauge`, + ]; + for (const { labels, value } of values) { + lines.push(`${name}${formatLabels(labels)} ${value}`); + } + return lines.join("\n"); +} + +interface HistogramValue { + labels: Record; + buckets: Map; // le (in seconds) -> cumulative count + sum: number; + count: number; +} + +/** + * Format a histogram metric in Prometheus exposition format + */ +function formatHistogram(name: string, help: string, buckets: number[], values: HistogramValue[]): string { + const lines: string[] = [ + `# HELP ${name} ${help}`, + `# TYPE ${name} histogram`, + ]; + for (const { labels, buckets: bucketCounts, sum, count } of values) { + // Output bucket lines + for (const le of buckets) { + const bucketCount = bucketCounts.get(le) ?? 0; + lines.push(`${name}_bucket${formatLabels({ ...labels, le })} ${bucketCount}`); + } + // +Inf bucket (total count) + lines.push(`${name}_bucket${formatLabels({ ...labels, le: "+Inf" })} ${count}`); + // Sum and count + lines.push(`${name}_sum${formatLabels(labels)} ${sum}`); + lines.push(`${name}_count${formatLabels(labels)} ${count}`); + } + return lines.join("\n"); +} + +/** + * Generate all Prometheus metrics + */ +export async function generatePrometheusMetrics(): Promise { + try { + // Try to get cached metrics first + const cachedMetrics = await redisClient.get(METRICS_CACHE_KEY); + if (cachedMetrics) { + logger.debug("Returning cached metrics"); + return cachedMetrics; + } + + // Generate fresh metrics + const metrics = await generateMetricsInternal(); + + // Cache the metrics + await redisClient.set(METRICS_CACHE_KEY, metrics, { EX: METRICS_CACHE_TTL_SECONDS }); + + return metrics; + } catch (error) { + logger.error("Error generating metrics:", error); + // Return minimal fallback metrics on error + return generateFallbackMetrics(); + } +} + +/** + * Generate fallback metrics when main generation fails + */ +function generateFallbackMetrics(): string { + const sections: string[] = []; + + // Info metric always works + sections.push( + formatGauge("nexusgate_info", "NexusGate build information", [ + { labels: { version: COMMIT_SHA }, value: 1 }, + ]), + ); + + // Error indicator + sections.push( + formatGauge("nexusgate_metrics_error", "Indicates metrics generation failed", [ + { labels: {}, value: 1 }, + ]), + ); + + return sections.join("\n\n") + "\n"; +} + +/** + * Internal metrics generation (the actual work) + */ +async function generateMetricsInternal(): Promise { + // Fetch all metrics data in parallel + const [ + completionMetrics, + embeddingMetrics, + completionDurationHist, + completionTTFTHist, + embeddingDurationHist, + entityCounts, + apiKeyConfigs, + rateLimitRejections, + ] = await Promise.all([ + getCompletionMetricsByModelAndStatus(), + getEmbeddingMetricsByModelAndStatus(), + getCompletionDurationHistogram(), + getCompletionTTFTHistogram(), + getEmbeddingDurationHistogram(), + getActiveEntityCounts(), + getApiKeyRateLimitConfig(), + getRateLimitRejections(), + ]); + + const sections: string[] = []; + + // Info metric + sections.push( + formatGauge("nexusgate_info", "NexusGate build information", [ + { labels: { version: COMMIT_SHA }, value: 1 }, + ]), + ); + + // Completion counter metrics + const completionCounts: MetricValue[] = []; + const promptTokenCounts: Map = new Map(); + const completionTokenCounts: Map = new Map(); + + for (const row of completionMetrics) { + completionCounts.push({ + labels: { + model: row.model, + status: row.status, + api_format: row.api_format, + api_key_comment: row.api_key_comment, + }, + value: Number(row.count), + }); + + // Aggregate tokens by model + const currentPrompt = promptTokenCounts.get(row.model) ?? 0; + promptTokenCounts.set(row.model, currentPrompt + Number(row.prompt_tokens)); + + const currentCompletion = completionTokenCounts.get(row.model) ?? 0; + completionTokenCounts.set(row.model, currentCompletion + Number(row.completion_tokens)); + } + + if (completionCounts.length > 0) { + sections.push( + formatCounter( + "nexusgate_completions_total", + "Total number of completion requests", + completionCounts, + ), + ); + } + + // Prompt token counter + const promptTokenValues: MetricValue[] = []; + for (const [model, tokens] of promptTokenCounts) { + promptTokenValues.push({ labels: { model }, value: tokens }); + } + if (promptTokenValues.length > 0) { + sections.push( + formatCounter( + "nexusgate_tokens_prompt_total", + "Total prompt tokens processed", + promptTokenValues, + ), + ); + } + + // Completion token counter + const completionTokenValues: MetricValue[] = []; + for (const [model, tokens] of completionTokenCounts) { + completionTokenValues.push({ labels: { model }, value: tokens }); + } + if (completionTokenValues.length > 0) { + sections.push( + formatCounter( + "nexusgate_tokens_completion_total", + "Total completion tokens generated", + completionTokenValues, + ), + ); + } + + // Embedding counter metrics + const embeddingCounts: MetricValue[] = []; + const embeddingTokenCounts: Map = new Map(); + + for (const row of embeddingMetrics) { + embeddingCounts.push({ + labels: { + model: row.model, + status: row.status, + api_key_comment: row.api_key_comment, + }, + value: Number(row.count), + }); + + const currentTokens = embeddingTokenCounts.get(row.model) ?? 0; + embeddingTokenCounts.set(row.model, currentTokens + Number(row.input_tokens)); + } + + if (embeddingCounts.length > 0) { + sections.push( + formatCounter( + "nexusgate_embeddings_total", + "Total number of embedding requests", + embeddingCounts, + ), + ); + } + + // Embedding token counter + const embeddingTokenValues: MetricValue[] = []; + for (const [model, tokens] of embeddingTokenCounts) { + embeddingTokenValues.push({ labels: { model }, value: tokens }); + } + if (embeddingTokenValues.length > 0) { + sections.push( + formatCounter( + "nexusgate_tokens_embedding_total", + "Total embedding tokens processed", + embeddingTokenValues, + ), + ); + } + + // Completion duration histogram + const durationHistValues = parseHistogramData(completionDurationHist, "duration"); + if (durationHistValues.length > 0) { + sections.push( + formatHistogram( + "nexusgate_completion_duration_seconds", + "Completion request duration in seconds", + LATENCY_BUCKETS_SEC, + durationHistValues, + ), + ); + } + + // Completion TTFT histogram + const ttftHistValues = parseHistogramData(completionTTFTHist, "ttft"); + if (ttftHistValues.length > 0) { + sections.push( + formatHistogram( + "nexusgate_completion_ttft_seconds", + "Time to first token in seconds", + LATENCY_BUCKETS_SEC, + ttftHistValues, + ), + ); + } + + // Embedding duration histogram + const embeddingDurationHistValues = parseHistogramData(embeddingDurationHist, "duration"); + if (embeddingDurationHistValues.length > 0) { + sections.push( + formatHistogram( + "nexusgate_embedding_duration_seconds", + "Embedding request duration in seconds", + LATENCY_BUCKETS_SEC, + embeddingDurationHistValues, + ), + ); + } + + // Gauge metrics for active entities + sections.push( + formatGauge("nexusgate_active_api_keys", "Number of active (non-revoked) API keys", [ + { labels: {}, value: entityCounts.apiKeys }, + ]), + ); + + sections.push( + formatGauge("nexusgate_active_providers", "Number of active providers", [ + { labels: {}, value: entityCounts.providers }, + ]), + ); + + sections.push( + formatGauge("nexusgate_active_models", "Number of active models", [ + { labels: { type: "chat" }, value: entityCounts.chatModels }, + { labels: { type: "embedding" }, value: entityCounts.embeddingModels }, + ]), + ); + + // API Key Rate Limit Metrics + // Fetch current usage from Redis for each API key in parallel for better performance + const rpmUsageValues: MetricValue[] = []; + const rpmLimitValues: MetricValue[] = []; + const tpmUsageValues: MetricValue[] = []; + const tpmLimitValues: MetricValue[] = []; + + const rateLimitStatuses = await Promise.all( + apiKeyConfigs.map(async (apiKey) => + getRateLimitStatus(apiKey.id, { + rpmLimit: apiKey.rpmLimit, + tpmLimit: apiKey.tpmLimit, + }), + ), + ); + + for (let i = 0; i < apiKeyConfigs.length; i++) { + const apiKey = apiKeyConfigs[i]; + const status = rateLimitStatuses[i]; + if (!apiKey || !status) { + continue; + } + + const comment = apiKey.comment ?? "unknown"; + + rpmUsageValues.push({ + labels: { api_key_comment: comment }, + value: status.rpm.current, + }); + rpmLimitValues.push({ + labels: { api_key_comment: comment }, + value: status.rpm.limit, + }); + tpmUsageValues.push({ + labels: { api_key_comment: comment }, + value: status.tpm.current, + }); + tpmLimitValues.push({ + labels: { api_key_comment: comment }, + value: status.tpm.limit, + }); + } + + if (rpmUsageValues.length > 0) { + sections.push( + formatGauge( + "nexusgate_api_key_rpm_usage", + "Current RPM usage per API key", + rpmUsageValues, + ), + ); + sections.push( + formatGauge( + "nexusgate_api_key_rpm_limit", + "RPM limit per API key", + rpmLimitValues, + ), + ); + sections.push( + formatGauge( + "nexusgate_api_key_tpm_usage", + "Current TPM usage per API key", + tpmUsageValues, + ), + ); + sections.push( + formatGauge( + "nexusgate_api_key_tpm_limit", + "TPM limit per API key", + tpmLimitValues, + ), + ); + } + + // Rate Limit Rejection Counter + // Field format is "apiKeyComment:limitType" where apiKeyComment may contain colons + const rejectionValues: MetricValue[] = []; + for (const [field, count] of Object.entries(rateLimitRejections)) { + const parts = field.split(":"); + const limitType = parts.pop(); // Last part is always the limit type (rpm/tpm) + const apiKeyComment = parts.join(":"); // Rejoin in case comment contained colons + + if (apiKeyComment && limitType) { + rejectionValues.push({ + labels: { api_key_comment: apiKeyComment, limit_type: limitType }, + value: Number(count), + }); + } + } + + if (rejectionValues.length > 0) { + sections.push( + formatCounter( + "nexusgate_rate_limit_rejections_total", + "Total number of rate limit rejections (429 responses)", + rejectionValues, + ), + ); + } + + return sections.join("\n\n") + "\n"; +} + +/** + * Parse histogram data from database results + */ +function parseHistogramData( + data: Record[], + sumField: "duration" | "ttft", +): HistogramValue[] { + const values: HistogramValue[] = []; + + for (const row of data) { + const model = row.model; + const buckets = new Map(); + + // Parse bucket counts and convert to seconds + for (const ms of LATENCY_BUCKETS_MS) { + const bucketKey = `bucket_${ms}`; + const count = Number(row[bucketKey] ?? 0); + // Convert ms bucket boundary to seconds + buckets.set(ms / 1000, count); + } + + // Sum is in milliseconds in DB, convert to seconds + const sum = Number(row[`${sumField}_sum`] ?? 0) / 1000; + const count = Number(row.total_count ?? 0); + + values.push({ + labels: { model }, + buckets, + sum, + count, + }); + } + + return values; +} diff --git a/backend/src/utils/config.ts b/backend/src/utils/config.ts index 23741f3..6ff379b 100644 --- a/backend/src/utils/config.ts +++ b/backend/src/utils/config.ts @@ -143,3 +143,10 @@ export const FORCILY_ADD_API_KEYS = env( export const FRONTEND_DIR = env("frontend dir", z.coerce.string(), "dist"); export const DOCS_DIR = env("docs dir", z.coerce.string(), "docs"); + +// Prometheus metrics configuration +export const METRICS_CACHE_TTL_SECONDS = env( + "metrics cache ttl seconds", + z.coerce.number().int().positive(), + "30", +); diff --git a/backend/src/utils/redisClient.ts b/backend/src/utils/redisClient.ts index 2a8d3f8..98dd766 100644 --- a/backend/src/utils/redisClient.ts +++ b/backend/src/utils/redisClient.ts @@ -149,6 +149,40 @@ class RedisClient { } } + /** + * Increment a field in a hash by the given amount + * @param {string} key - Hash key + * @param {string} field - Field within the hash + * @param {number} increment - Amount to increment by + * @returns {Promise} New value after increment + */ + public async hincrby( + key: string, + field: string, + increment: number, + ): Promise { + try { + return await this.client.hincrby(key, field, increment); + } catch (error) { + logger.error(`Redis hincrby error: ${(error as Error).message}`); + return 0; + } + } + + /** + * Get all fields and values from a hash + * @param {string} key - Hash key + * @returns {Promise>} Hash fields and values + */ + public async hgetall(key: string): Promise> { + try { + return await this.client.hgetall(key); + } catch (error) { + logger.error(`Redis hgetall error: ${(error as Error).message}`); + return {}; + } + } + /** * Close the Redis connection */ diff --git a/docker-compose.monitoring.yaml b/docker-compose.monitoring.yaml new file mode 100644 index 0000000..9d4926a --- /dev/null +++ b/docker-compose.monitoring.yaml @@ -0,0 +1,41 @@ +# Docker Compose override for Prometheus + Grafana monitoring stack +# Use with: docker compose -f docker-compose.yaml -f docker-compose.monitoring.yaml up -d + +services: + prometheus: + image: "prom/prometheus:latest" + container_name: nexusgate-prometheus + volumes: + - "./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro" + - "prometheus_data:/prometheus" + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.enable-lifecycle' + - '--storage.tsdb.retention.time=15d' + ports: + - "${PROMETHEUS_PORT:-9090}:9090" + restart: on-failure + depends_on: + - nexusgate + + grafana: + image: "grafana/grafana:latest" + container_name: nexusgate-grafana + environment: + - "GF_SECURITY_ADMIN_USER=${GRAFANA_USER:-admin}" + - "GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-admin}" + - "GF_USERS_ALLOW_SIGN_UP=false" + - "GF_SERVER_ROOT_URL=http://localhost:${GRAFANA_PORT:-3001}" + volumes: + - "grafana_data:/var/lib/grafana" + - "./grafana/provisioning:/etc/grafana/provisioning:ro" + ports: + - "${GRAFANA_PORT:-3001}:3000" + restart: on-failure + depends_on: + - prometheus + +volumes: + prometheus_data: + grafana_data: diff --git a/grafana/provisioning/dashboards/dashboards.yml b/grafana/provisioning/dashboards/dashboards.yml new file mode 100644 index 0000000..3a63b6a --- /dev/null +++ b/grafana/provisioning/dashboards/dashboards.yml @@ -0,0 +1,16 @@ +# Grafana dashboard provisioning +# Auto-loads dashboards from the dashboards directory + +apiVersion: 1 + +providers: + - name: 'NexusGate Dashboards' + orgId: 1 + folder: 'NexusGate' + folderUid: 'nexusgate' + type: file + disableDeletion: false + editable: true + updateIntervalSeconds: 30 + options: + path: /etc/grafana/provisioning/dashboards/json diff --git a/grafana/provisioning/dashboards/json/nexusgate-dashboard.json b/grafana/provisioning/dashboards/json/nexusgate-dashboard.json new file mode 100644 index 0000000..4a109fe --- /dev/null +++ b/grafana/provisioning/dashboards/json/nexusgate-dashboard.json @@ -0,0 +1,2178 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 100, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(increase(nexusgate_completions_total{status=\"completed\"}[$__range]))", + "legendFormat": "Completions", + "refId": "A" + } + ], + "title": "Total Completions", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(increase(nexusgate_embeddings_total{status=\"completed\"}[$__range]))", + "legendFormat": "Embeddings", + "refId": "A" + } + ], + "title": "Total Embeddings", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 90 + }, + { + "color": "green", + "value": 99 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "100 * sum(increase(nexusgate_completions_total{status=\"completed\"}[$__range])) / sum(increase(nexusgate_completions_total[$__range]))", + "legendFormat": "Success Rate", + "refId": "A" + } + ], + "title": "Completion Success Rate", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "nexusgate_active_api_keys", + "legendFormat": "API Keys", + "refId": "A" + } + ], + "title": "Active API Keys", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "nexusgate_active_providers", + "legendFormat": "Providers", + "refId": "A" + } + ], + "title": "Active Providers", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "orange", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(nexusgate_active_models)", + "legendFormat": "Models", + "refId": "A" + } + ], + "title": "Active Models", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 101, + "panels": [], + "title": "Request Rate & Throughput", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 6 + }, + "id": 10, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(nexusgate_completions_total[$__rate_interval])) by (model)", + "legendFormat": "{{model}}", + "refId": "A" + } + ], + "title": "Completion Request Rate (by Model)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "completed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "aborted" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cache_hit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 6 + }, + "id": 11, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(nexusgate_completions_total[$__rate_interval])) by (status)", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Completion Request Rate (by Status)", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 102, + "panels": [], + "title": "Latency", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 20, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.50, sum(rate(nexusgate_completion_duration_seconds_bucket[$__rate_interval])) by (le, model))", + "legendFormat": "p50 - {{model}}", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.95, sum(rate(nexusgate_completion_duration_seconds_bucket[$__rate_interval])) by (le, model))", + "legendFormat": "p95 - {{model}}", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(nexusgate_completion_duration_seconds_bucket[$__rate_interval])) by (le, model))", + "legendFormat": "p99 - {{model}}", + "refId": "C" + } + ], + "title": "Completion Duration (p50/p95/p99)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 21, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.50, sum(rate(nexusgate_completion_ttft_seconds_bucket[$__rate_interval])) by (le, model))", + "legendFormat": "p50 - {{model}}", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.95, sum(rate(nexusgate_completion_ttft_seconds_bucket[$__rate_interval])) by (le, model))", + "legendFormat": "p95 - {{model}}", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(nexusgate_completion_ttft_seconds_bucket[$__rate_interval])) by (le, model))", + "legendFormat": "p99 - {{model}}", + "refId": "C" + } + ], + "title": "Time To First Token (TTFT) (p50/p95/p99)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 23 + }, + "id": 22, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"0.5\"}[$__range]))", + "legendFormat": "< 0.5s", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"1\"}[$__range])) - sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"0.5\"}[$__range]))", + "legendFormat": "0.5s - 1s", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"2.5\"}[$__range])) - sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"1\"}[$__range]))", + "legendFormat": "1s - 2.5s", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"5\"}[$__range])) - sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"2.5\"}[$__range]))", + "legendFormat": "2.5s - 5s", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"10\"}[$__range])) - sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"5\"}[$__range]))", + "legendFormat": "5s - 10s", + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"+Inf\"}[$__range])) - sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"10\"}[$__range]))", + "legendFormat": "> 10s", + "refId": "F" + } + ], + "title": "Latency Distribution", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 103, + "panels": [], + "title": "Token Usage", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 30, + "options": { + "legend": { + "calcs": ["sum"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(nexusgate_tokens_prompt_total[$__rate_interval])) by (model)", + "legendFormat": "Prompt - {{model}}", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(nexusgate_tokens_completion_total[$__rate_interval])) by (model)", + "legendFormat": "Completion - {{model}}", + "refId": "B" + } + ], + "title": "Token Rate (Prompt vs Completion)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 32 + }, + "id": 31, + "options": { + "displayLabels": ["percent"], + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": ["value"] + }, + "pieType": "donut", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(increase(nexusgate_tokens_prompt_total[$__range])) by (model)", + "legendFormat": "{{model}}", + "refId": "A" + } + ], + "title": "Prompt Tokens by Model", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 32 + }, + "id": 32, + "options": { + "displayLabels": ["percent"], + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": ["value"] + }, + "pieType": "donut", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(increase(nexusgate_tokens_completion_total[$__range])) by (model)", + "legendFormat": "{{model}}", + "refId": "A" + } + ], + "title": "Completion Tokens by Model", + "type": "piechart" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 104, + "panels": [], + "title": "Errors, Cache & Rate Limits", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 40, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(nexusgate_completions_total{status=\"failed\"}[$__rate_interval])) by (model) / sum(rate(nexusgate_completions_total[$__rate_interval])) by (model)", + "legendFormat": "{{model}}", + "refId": "A" + } + ], + "title": "Error Rate by Model", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 41 + }, + "id": 41, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(nexusgate_completions_total{status=\"cache_hit\"}[$__rate_interval])) / sum(rate(nexusgate_completions_total[$__rate_interval]))", + "legendFormat": "Cache Hit Rate", + "refId": "A" + } + ], + "title": "Cache Hit Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "aborted" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 49 + }, + "id": 42, + "options": { + "displayLabels": ["percent", "value"], + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": ["value"] + }, + "pieType": "donut", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(increase(nexusgate_completions_total{status=~\"failed|aborted\"}[$__range])) by (status)", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Error Type Distribution", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*rpm.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*tpm.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 49 + }, + "id": 43, + "options": { + "legend": { + "calcs": ["sum"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(nexusgate_rate_limit_rejections_total[$__rate_interval])) by (api_key_comment, limit_type)", + "legendFormat": "{{api_key_comment}} ({{limit_type}})", + "refId": "A" + } + ], + "title": "Rate Limit Rejections", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 49 + }, + "id": 44, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "100 * nexusgate_api_key_rpm_usage / nexusgate_api_key_rpm_limit", + "legendFormat": "{{api_key_comment}} RPM", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "100 * nexusgate_api_key_tpm_usage / nexusgate_api_key_tpm_limit", + "legendFormat": "{{api_key_comment}} TPM", + "refId": "B" + } + ], + "title": "API Key Rate Limit Usage (%)", + "type": "gauge" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 57 + }, + "id": 105, + "panels": [], + "title": "API Format Distribution", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 58 + }, + "id": 50, + "options": { + "displayLabels": ["percent", "value"], + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": ["value"] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(increase(nexusgate_completions_total[$__range])) by (api_format)", + "legendFormat": "{{api_format}}", + "refId": "A" + } + ], + "title": "Requests by API Format", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 58 + }, + "id": 51, + "options": { + "displayLabels": ["percent", "value"], + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": ["value"] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(increase(nexusgate_completions_total[$__range])) by (model)", + "legendFormat": "{{model}}", + "refId": "A" + } + ], + "title": "Requests by Model", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 58 + }, + "id": 52, + "options": { + "displayLabels": ["percent", "value"], + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": ["value"] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(increase(nexusgate_completions_total[$__range])) by (status)", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests by Status", + "type": "piechart" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 66 + }, + "id": 106, + "panels": [], + "title": "Embeddings", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 67 + }, + "id": 60, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(nexusgate_embeddings_total[$__rate_interval])) by (model)", + "legendFormat": "{{model}}", + "refId": "A" + } + ], + "title": "Embedding Request Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 67 + }, + "id": 61, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.50, sum(rate(nexusgate_embedding_duration_seconds_bucket[$__rate_interval])) by (le, model))", + "legendFormat": "p50 - {{model}}", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.95, sum(rate(nexusgate_embedding_duration_seconds_bucket[$__rate_interval])) by (le, model))", + "legendFormat": "p95 - {{model}}", + "refId": "B" + } + ], + "title": "Embedding Latency (p50/p95)", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["nexusgate", "llm", "prometheus"], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "prometheus" + }, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h" + ] + }, + "timezone": "browser", + "title": "NexusGate LLM Gateway", + "uid": "nexusgate-overview", + "version": 1, + "weekStart": "" +} diff --git a/grafana/provisioning/datasources/prometheus.yml b/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 0000000..68be1ed --- /dev/null +++ b/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,15 @@ +# Grafana datasource provisioning +# Auto-configures Prometheus as the default data source + +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: false + jsonData: + timeInterval: "15s" + httpMethod: POST diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml new file mode 100644 index 0000000..eae7a3a --- /dev/null +++ b/prometheus/prometheus.yml @@ -0,0 +1,19 @@ +# Prometheus configuration for NexusGate +# This file is auto-generated by quick-start.sh + +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + # NexusGate metrics + - job_name: 'nexusgate' + static_configs: + - targets: ['nexusgate:3000'] + metrics_path: /metrics + scrape_interval: 15s + + # Prometheus self-monitoring + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] diff --git a/python_test_code/test_metrics.py b/python_test_code/test_metrics.py new file mode 100644 index 0000000..024fee2 --- /dev/null +++ b/python_test_code/test_metrics.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python3 +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "httpx>=0.25.0", +# ] +# /// +""" +NexusGate Prometheus Metrics API Test + +Tests the /metrics endpoint returns valid Prometheus format metrics. + +Usage: + uv run test_metrics.py + +Environment variables: + NEXUSGATE_BASE_URL: NexusGate service address (default: http://localhost:3000) +""" + +import os +import re +import sys +import httpx + +# Configuration +BASE_URL = os.getenv("NEXUSGATE_BASE_URL", "http://localhost:3000") +METRICS_URL = f"{BASE_URL}/metrics" + + +def parse_prometheus_metrics(text: str) -> dict[str, list[dict]]: + """ + Parse Prometheus metrics text format into a structured dict. + + Returns: + dict mapping metric names to list of {labels: dict, value: float} + """ + metrics: dict[str, list[dict]] = {} + current_metric = None + + for line in text.strip().split('\n'): + line = line.strip() + if not line: + continue + + # Skip HELP and TYPE lines + if line.startswith('# HELP'): + current_metric = line.split()[2] if len(line.split()) > 2 else None + if current_metric and current_metric not in metrics: + metrics[current_metric] = [] + continue + if line.startswith('# TYPE'): + continue + if line.startswith('#'): + continue + + # Parse metric line: metric_name{labels} value + # or: metric_name value + match = re.match(r'^([a-zA-Z_:][a-zA-Z0-9_:]*)\{([^}]*)\}\s+(.+)$', line) + if match: + name, labels_str, value = match.groups() + # Parse labels + labels = {} + if labels_str: + for label in labels_str.split(','): + if '=' in label: + k, v = label.split('=', 1) + labels[k] = v.strip('"') + if name not in metrics: + metrics[name] = [] + metrics[name].append({'labels': labels, 'value': float(value)}) + else: + # No labels + match = re.match(r'^([a-zA-Z_:][a-zA-Z0-9_:]*)\s+(.+)$', line) + if match: + name, value = match.groups() + if name not in metrics: + metrics[name] = [] + metrics[name].append({'labels': {}, 'value': float(value)}) + + return metrics + + +def test_metrics_endpoint_returns_200(): + """Test that /metrics endpoint returns 200 OK""" + print("=" * 50) + print("Testing /metrics endpoint returns 200") + print("=" * 50) + + response = httpx.get(METRICS_URL, timeout=10.0) + assert response.status_code == 200, f"Expected 200, got {response.status_code}" + print(f"Status: {response.status_code} OK") + print() + + +def test_metrics_content_type(): + """Test that /metrics returns correct Content-Type""" + print("=" * 50) + print("Testing /metrics Content-Type header") + print("=" * 50) + + response = httpx.get(METRICS_URL, timeout=10.0) + content_type = response.headers.get('content-type', '') + assert 'text/plain' in content_type, f"Expected text/plain, got {content_type}" + print(f"Content-Type: {content_type}") + print() + + +def test_metrics_contains_expected_metrics(): + """Test that /metrics contains expected NexusGate metrics""" + print("=" * 50) + print("Testing /metrics contains expected metrics") + print("=" * 50) + + response = httpx.get(METRICS_URL, timeout=10.0) + content = response.text + + # List of metrics that should always be present + expected_metrics = [ + 'nexusgate_info', + 'nexusgate_active_api_keys', + 'nexusgate_active_providers', + 'nexusgate_active_models', + ] + + # Optional metrics (may not be present if no data) + optional_metrics = [ + 'nexusgate_completions_total', + 'nexusgate_embeddings_total', + 'nexusgate_tokens_prompt_total', + 'nexusgate_tokens_completion_total', + 'nexusgate_tokens_embedding_total', + 'nexusgate_completion_duration_seconds', + 'nexusgate_completion_ttft_seconds', + 'nexusgate_embedding_duration_seconds', + ] + + # Check required metrics + for metric in expected_metrics: + assert metric in content, f"Missing expected metric: {metric}" + print(f" Found: {metric}") + + # Check optional metrics (just report, don't fail) + for metric in optional_metrics: + if metric in content: + print(f" Found: {metric}") + else: + print(f" Not found (no data): {metric}") + + print() + + +def test_metrics_prometheus_format(): + """Test that /metrics output is valid Prometheus format""" + print("=" * 50) + print("Testing Prometheus format validity") + print("=" * 50) + + response = httpx.get(METRICS_URL, timeout=10.0) + content = response.text + + # Check for required format elements + assert '# HELP' in content, "Missing # HELP comments" + assert '# TYPE' in content, "Missing # TYPE comments" + print(" Has # HELP comments: Yes") + print(" Has # TYPE comments: Yes") + + # Parse and validate + metrics = parse_prometheus_metrics(content) + print(f" Parsed {len(metrics)} metric families") + + # Check info metric has version label + assert 'nexusgate_info' in metrics, "Missing nexusgate_info metric" + info_metric = metrics['nexusgate_info'] + assert len(info_metric) > 0, "nexusgate_info has no values" + assert 'version' in info_metric[0]['labels'], "nexusgate_info missing version label" + print(f" nexusgate_info version: {info_metric[0]['labels']['version']}") + + print() + + +def test_metrics_gauge_values(): + """Test that gauge metrics have valid values""" + print("=" * 50) + print("Testing gauge metric values") + print("=" * 50) + + response = httpx.get(METRICS_URL, timeout=10.0) + metrics = parse_prometheus_metrics(response.text) + + # Check active_api_keys is a valid number >= 0 + assert 'nexusgate_active_api_keys' in metrics + api_keys_value = metrics['nexusgate_active_api_keys'][0]['value'] + assert api_keys_value >= 0, f"Invalid api_keys value: {api_keys_value}" + print(f" nexusgate_active_api_keys: {int(api_keys_value)}") + + # Check active_providers is a valid number >= 0 + assert 'nexusgate_active_providers' in metrics + providers_value = metrics['nexusgate_active_providers'][0]['value'] + assert providers_value >= 0, f"Invalid providers value: {providers_value}" + print(f" nexusgate_active_providers: {int(providers_value)}") + + # Check active_models + assert 'nexusgate_active_models' in metrics + for entry in metrics['nexusgate_active_models']: + model_type = entry['labels'].get('type', 'unknown') + value = entry['value'] + assert value >= 0, f"Invalid models value: {value}" + print(f" nexusgate_active_models{{type=\"{model_type}\"}}: {int(value)}") + + print() + + +def test_metrics_histogram_format(): + """Test histogram metrics have correct bucket format (if present)""" + print("=" * 50) + print("Testing histogram metric format") + print("=" * 50) + + response = httpx.get(METRICS_URL, timeout=10.0) + content = response.text + + histogram_names = [ + 'nexusgate_completion_duration_seconds', + 'nexusgate_completion_ttft_seconds', + 'nexusgate_embedding_duration_seconds', + ] + + for hist_name in histogram_names: + if f'{hist_name}_bucket' in content: + print(f" {hist_name}:") + # Check bucket, sum, count exist + assert f'{hist_name}_bucket' in content, f"Missing _bucket for {hist_name}" + assert f'{hist_name}_sum' in content, f"Missing _sum for {hist_name}" + assert f'{hist_name}_count' in content, f"Missing _count for {hist_name}" + # Check +Inf bucket exists + assert f'{hist_name}_bucket{{' in content and 'le="+Inf"' in content, \ + f"Missing +Inf bucket for {hist_name}" + print(f" Has _bucket: Yes") + print(f" Has _sum: Yes") + print(f" Has _count: Yes") + print(f" Has +Inf bucket: Yes") + else: + print(f" {hist_name}: No data (skipped)") + + print() + + +def test_show_sample_output(): + """Display a sample of the metrics output""" + print("=" * 50) + print("Sample metrics output (first 50 lines)") + print("=" * 50) + + response = httpx.get(METRICS_URL, timeout=10.0) + lines = response.text.strip().split('\n') + for line in lines[:50]: + print(f" {line}") + if len(lines) > 50: + print(f" ... ({len(lines) - 50} more lines)") + print() + + +if __name__ == "__main__": + print("\n" + "=" * 60) + print(" NexusGate - Prometheus Metrics API Tests") + print(f" Target: {METRICS_URL}") + print("=" * 60 + "\n") + + tests = [ + test_metrics_endpoint_returns_200, + test_metrics_content_type, + test_metrics_contains_expected_metrics, + test_metrics_prometheus_format, + test_metrics_gauge_values, + test_metrics_histogram_format, + test_show_sample_output, + ] + + passed = 0 + failed = 0 + + for test in tests: + try: + test() + passed += 1 + except AssertionError as e: + print(f"FAILED: {test.__name__}") + print(f" Error: {e}") + failed += 1 + except Exception as e: + print(f"ERROR: {test.__name__}") + print(f" {type(e).__name__}: {e}") + failed += 1 + + print("=" * 60) + print(f"Results: {passed} passed, {failed} failed") + print("=" * 60) + + if failed > 0: + sys.exit(1) + print("\nAll Prometheus metrics tests passed!") diff --git a/scripts/quick-start.sh b/scripts/quick-start.sh index b0c267f..35070a3 100755 --- a/scripts/quick-start.sh +++ b/scripts/quick-start.sh @@ -10,11 +10,22 @@ RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' +CYAN='\033[0;36m' NC='\033[0m' # No Color echo -e "${BLUE}🚀 NexusGate 一键部署脚本${NC}" echo "====================================" +# 全局变量 +DOWNLOAD_SOURCE="" +COMPOSE_URL="" +MONITORING_COMPOSE_URL="" +PROMETHEUS_URL="" +GRAFANA_DATASOURCE_URL="" +GRAFANA_DASHBOARD_PROVIDER_URL="" +GRAFANA_DASHBOARD_URL="" +ENABLE_MONITORING="false" + # 选择下载源 select_download_source() { echo -e "${BLUE}🌐 请选择下载源${NC}" @@ -22,19 +33,29 @@ select_download_source() { echo "1) GitHub 官方源 (推荐海外用户)" echo "2) 国内镜像源 (推荐国内用户,更快更稳定)" echo "====================================" - + while true; do read -p "请选择 (1/2): " choice case $choice in 1) DOWNLOAD_SOURCE="github" COMPOSE_URL="https://raw.githubusercontent.com/EM-GeekLab/NexusGate/main/docker-compose.yaml" + MONITORING_COMPOSE_URL="https://raw.githubusercontent.com/EM-GeekLab/NexusGate/main/docker-compose.monitoring.yaml" + PROMETHEUS_URL="https://raw.githubusercontent.com/EM-GeekLab/NexusGate/main/prometheus/prometheus.yml" + GRAFANA_DATASOURCE_URL="https://raw.githubusercontent.com/EM-GeekLab/NexusGate/main/grafana/provisioning/datasources/prometheus.yml" + GRAFANA_DASHBOARD_PROVIDER_URL="https://raw.githubusercontent.com/EM-GeekLab/NexusGate/main/grafana/provisioning/dashboards/dashboards.yml" + GRAFANA_DASHBOARD_URL="https://raw.githubusercontent.com/EM-GeekLab/NexusGate/main/grafana/provisioning/dashboards/json/nexusgate-dashboard.json" echo -e "${GREEN}✅ 已选择 GitHub 官方源${NC}" break ;; 2) DOWNLOAD_SOURCE="china" COMPOSE_URL="https://cnb.cool/EM-GeekLab/NexusGate/-/git/raw/main/docker-compose.cn.yaml" + MONITORING_COMPOSE_URL="https://cnb.cool/EM-GeekLab/NexusGate/-/git/raw/main/docker-compose.monitoring.yaml" + PROMETHEUS_URL="https://cnb.cool/EM-GeekLab/NexusGate/-/git/raw/main/prometheus/prometheus.yml" + GRAFANA_DATASOURCE_URL="https://cnb.cool/EM-GeekLab/NexusGate/-/git/raw/main/grafana/provisioning/datasources/prometheus.yml" + GRAFANA_DASHBOARD_PROVIDER_URL="https://cnb.cool/EM-GeekLab/NexusGate/-/git/raw/main/grafana/provisioning/dashboards/dashboards.yml" + GRAFANA_DASHBOARD_URL="https://cnb.cool/EM-GeekLab/NexusGate/-/git/raw/main/grafana/provisioning/dashboards/json/nexusgate-dashboard.json" echo -e "${GREEN}✅ 已选择国内镜像源${NC}" break ;; @@ -46,6 +67,43 @@ select_download_source() { echo "" } +# 询问是否安装监控组件 +ask_monitoring() { + echo -e "${BLUE}📊 监控组件配置${NC}" + echo "====================================" + echo "NexusGate 支持 Prometheus + Grafana 监控栈,可以可视化以下指标:" + echo " - 请求数量和速率" + echo " - 延迟分布 (P50/P95/P99)" + echo " - Token 使用量" + echo " - 错误率和成功率" + echo " - 模型和 API 格式分布" + echo "" + echo -e "${YELLOW}是否安装 Prometheus + Grafana 监控组件?${NC}" + echo "1) 是 - 安装完整监控栈 (额外占用约 500MB 内存)" + echo "2) 否 - 仅安装核心服务 (推荐资源有限的环境)" + echo "====================================" + + while true; do + read -p "请选择 (1/2) [默认: 2]: " monitor_choice + case $monitor_choice in + 1) + ENABLE_MONITORING="true" + echo -e "${GREEN}✅ 将安装 Prometheus + Grafana 监控组件${NC}" + break + ;; + 2|"") + ENABLE_MONITORING="false" + echo -e "${GREEN}✅ 仅安装核心服务${NC}" + break + ;; + *) + echo -e "${RED}❌ 请输入有效选项 (1 或 2)${NC}" + ;; + esac + done + echo "" +} + # 检查 Docker 是否安装和权限 check_docker() { if ! command -v docker &> /dev/null; then @@ -53,13 +111,13 @@ check_docker() { echo -e "${YELLOW}请参考 README.md 中的 Docker 安装指南${NC}" exit 1 fi - + if ! command -v docker-compose &> /dev/null && ! docker compose version &> /dev/null; then echo -e "${RED}❌ Docker Compose 未安装,请先安装 Docker Compose!${NC}" echo -e "${YELLOW}请参考 README.md 中的 Docker 安装指南${NC}" exit 1 fi - + # 检查 Docker 权限 echo -e "${BLUE}🔍 检查 Docker 权限...${NC}" if ! docker ps &> /dev/null; then @@ -70,7 +128,7 @@ check_docker() { echo "" exit 1 fi - + echo -e "${GREEN}✅ Docker 环境和权限检查通过${NC}" } @@ -82,18 +140,69 @@ generate_password() { # 下载配置文件 download_configs() { echo -e "${BLUE}📥 下载配置文件...${NC}" - + local compose_file="docker-compose.yaml" if [ "$DOWNLOAD_SOURCE" = "china" ]; then compose_file="docker-compose.cn.yaml" fi - + + # 下载主配置文件 if [ ! -f "$compose_file" ]; then curl -fsSL "$COMPOSE_URL" -o "$compose_file" echo -e "${GREEN}✅ $compose_file 下载完成${NC}" else echo -e "${YELLOW}⚠️ $compose_file 已存在,跳过下载${NC}" fi + + # 下载监控组件配置文件 + if [ "$ENABLE_MONITORING" = "true" ]; then + echo -e "${BLUE}📥 下载监控组件配置文件...${NC}" + + # 下载 docker-compose.monitoring.yaml + if [ ! -f "docker-compose.monitoring.yaml" ]; then + curl -fsSL "$MONITORING_COMPOSE_URL" -o "docker-compose.monitoring.yaml" + echo -e "${GREEN}✅ docker-compose.monitoring.yaml 下载完成${NC}" + else + echo -e "${YELLOW}⚠️ docker-compose.monitoring.yaml 已存在,跳过下载${NC}" + fi + + # 创建 prometheus 目录并下载配置 + mkdir -p prometheus + if [ ! -f "prometheus/prometheus.yml" ]; then + curl -fsSL "$PROMETHEUS_URL" -o "prometheus/prometheus.yml" + echo -e "${GREEN}✅ prometheus/prometheus.yml 下载完成${NC}" + else + echo -e "${YELLOW}⚠️ prometheus/prometheus.yml 已存在,跳过下载${NC}" + fi + + # 创建 grafana provisioning 目录结构 + mkdir -p grafana/provisioning/datasources + mkdir -p grafana/provisioning/dashboards/json + + # 下载 Grafana 数据源配置 + if [ ! -f "grafana/provisioning/datasources/prometheus.yml" ]; then + curl -fsSL "$GRAFANA_DATASOURCE_URL" -o "grafana/provisioning/datasources/prometheus.yml" + echo -e "${GREEN}✅ grafana/provisioning/datasources/prometheus.yml 下载完成${NC}" + else + echo -e "${YELLOW}⚠️ grafana/provisioning/datasources/prometheus.yml 已存在,跳过下载${NC}" + fi + + # 下载 Grafana Dashboard 提供者配置 + if [ ! -f "grafana/provisioning/dashboards/dashboards.yml" ]; then + curl -fsSL "$GRAFANA_DASHBOARD_PROVIDER_URL" -o "grafana/provisioning/dashboards/dashboards.yml" + echo -e "${GREEN}✅ grafana/provisioning/dashboards/dashboards.yml 下载完成${NC}" + else + echo -e "${YELLOW}⚠️ grafana/provisioning/dashboards/dashboards.yml 已存在,跳过下载${NC}" + fi + + # 下载 NexusGate Dashboard + if [ ! -f "grafana/provisioning/dashboards/json/nexusgate-dashboard.json" ]; then + curl -fsSL "$GRAFANA_DASHBOARD_URL" -o "grafana/provisioning/dashboards/json/nexusgate-dashboard.json" + echo -e "${GREEN}✅ NexusGate Grafana Dashboard 下载完成${NC}" + else + echo -e "${YELLOW}⚠️ NexusGate Grafana Dashboard 已存在,跳过下载${NC}" + fi + fi } # 获取用户输入的密码 @@ -102,13 +211,13 @@ get_user_passwords() { echo "" echo -e "${YELLOW}💡 提示:为了安全起见,密码输入时不会显示字符${NC}" echo "" - + # 数据库密码输入 echo -e "${YELLOW}请设置数据库密码 (至少8位,直接回车将自动生成随机密码):${NC}" while true; do read -s -p "数据库密码: " db_input echo "" - + if [ -z "$db_input" ]; then DB_PASSWORD=$(generate_password) echo -e "${GREEN}✅ 已自动生成随机数据库密码(16位强密码)${NC}" @@ -122,15 +231,15 @@ get_user_passwords() { break fi done - + echo "" - + # 管理员密钥输入 echo -e "${YELLOW}请设置管理员密钥 (至少8位,直接回车将自动生成随机密钥):${NC}" while true; do read -s -p "管理员密钥: " admin_input echo "" - + if [ -z "$admin_input" ]; then ADMIN_SECRET=$(generate_password) echo -e "${GREEN}✅ 已自动生成随机管理员密钥(16位强密钥)${NC}" @@ -144,14 +253,14 @@ get_user_passwords() { break fi done - + echo "" - + # Web 端口输入 echo -e "${YELLOW}请设置 Web 服务端口 (1024-65535,默认 8080):${NC}" while true; do read -p "Web 端口: " port_input - + if [ -z "$port_input" ]; then WEB_PORT="8080" echo -e "${GREEN}✅ 使用默认端口 8080${NC}" @@ -164,27 +273,103 @@ get_user_passwords() { echo -e "${RED}❌ 请输入有效的端口号 (1024-65535)${NC}" fi done - + echo "" - + + # 如果启用了监控,配置 Grafana 密码 + if [ "$ENABLE_MONITORING" = "true" ]; then + echo -e "${CYAN}📊 监控组件配置${NC}" + echo "" + + # Prometheus 端口 + echo -e "${YELLOW}请设置 Prometheus 端口 (默认 9090):${NC}" + while true; do + read -p "Prometheus 端口: " prom_port_input + + if [ -z "$prom_port_input" ]; then + PROMETHEUS_PORT="9090" + echo -e "${GREEN}✅ 使用默认端口 9090${NC}" + break + elif [[ "$prom_port_input" =~ ^[0-9]+$ ]] && [ "$prom_port_input" -ge 1024 ] && [ "$prom_port_input" -le 65535 ]; then + PROMETHEUS_PORT="$prom_port_input" + echo -e "${GREEN}✅ 已设置 Prometheus 端口为 $prom_port_input${NC}" + break + else + echo -e "${RED}❌ 请输入有效的端口号 (1024-65535)${NC}" + fi + done + + echo "" + + # Grafana 端口 + echo -e "${YELLOW}请设置 Grafana 端口 (默认 3001):${NC}" + while true; do + read -p "Grafana 端口: " grafana_port_input + + if [ -z "$grafana_port_input" ]; then + GRAFANA_PORT="3001" + echo -e "${GREEN}✅ 使用默认端口 3001${NC}" + break + elif [[ "$grafana_port_input" =~ ^[0-9]+$ ]] && [ "$grafana_port_input" -ge 1024 ] && [ "$grafana_port_input" -le 65535 ]; then + GRAFANA_PORT="$grafana_port_input" + echo -e "${GREEN}✅ 已设置 Grafana 端口为 $grafana_port_input${NC}" + break + else + echo -e "${RED}❌ 请输入有效的端口号 (1024-65535)${NC}" + fi + done + + echo "" + + # Grafana 密码 + echo -e "${YELLOW}请设置 Grafana 管理员密码 (至少8位,直接回车将使用默认密码 'admin'):${NC}" + while true; do + read -s -p "Grafana 密码: " grafana_pass_input + echo "" + + if [ -z "$grafana_pass_input" ]; then + GRAFANA_PASSWORD="admin" + echo -e "${YELLOW}⚠️ 使用默认密码 'admin',建议登录后修改${NC}" + break + elif [ ${#grafana_pass_input} -lt 8 ]; then + echo -e "${RED}❌ 密码长度至少8位,请重新输入${NC}" + continue + else + GRAFANA_PASSWORD="$grafana_pass_input" + echo -e "${GREEN}✅ 已设置自定义 Grafana 密码${NC}" + break + fi + done + + echo "" + fi + # 配置确认 echo -e "${BLUE}📋 配置摘要${NC}" echo "==================================" echo -e "数据库密码: ${GREEN}[已设置]${NC}" - echo -e "管理员密钥: ${GREEN}[已设置]${NC}" + echo -e "管理员密钥: ${GREEN}[已设置]${NC}" echo -e "Web 端口: ${GREEN}${WEB_PORT}${NC}" + if [ "$ENABLE_MONITORING" = "true" ]; then + echo -e "监控组件: ${CYAN}已启用${NC}" + echo -e " - Prometheus 端口: ${GREEN}${PROMETHEUS_PORT}${NC}" + echo -e " - Grafana 端口: ${GREEN}${GRAFANA_PORT}${NC}" + echo -e " - Grafana 密码: ${GREEN}[已设置]${NC}" + else + echo -e "监控组件: ${YELLOW}未启用${NC}" + fi echo "==================================" echo "" echo -e "${YELLOW}确认以上配置并继续部署?(y/N)${NC}" read -p "请输入选择: " confirm - + if [[ "$confirm" =~ ^[Yy]$ ]]; then echo -e "${GREEN}✅ 配置确认,开始创建配置文件${NC}" else echo -e "${RED}❌ 已取消部署${NC}" exit 0 fi - + echo "" } @@ -192,10 +377,10 @@ get_user_passwords() { create_env_file() { if [ ! -f ".env" ]; then echo -e "${BLUE}📝 创建环境变量配置文件...${NC}" - + # 获取用户输入 get_user_passwords - + cat > .env << EOF # NexusGate 环境配置文件 # 生成时间: $(date) @@ -217,7 +402,36 @@ ADMIN_SUPER_SECRET=${ADMIN_SECRET} # Web 服务端口(默认 8080) WEB_PORT=${WEB_PORT} EOF - + + # 如果启用了监控,添加监控相关配置 + if [ "$ENABLE_MONITORING" = "true" ]; then + cat >> .env << EOF + +# ====================== +# 监控组件配置 +# ====================== +# 是否启用监控组件 +ENABLE_MONITORING=true + +# Prometheus 端口 +PROMETHEUS_PORT=${PROMETHEUS_PORT} + +# Grafana 配置 +GRAFANA_PORT=${GRAFANA_PORT} +GRAFANA_USER=admin +GRAFANA_PASSWORD=${GRAFANA_PASSWORD} +EOF + else + cat >> .env << EOF + +# ====================== +# 监控组件配置 +# ====================== +# 是否启用监控组件 +ENABLE_MONITORING=false +EOF + fi + echo -e "${GREEN}✅ .env 文件创建完成${NC}" echo "" echo -e "${YELLOW}⚠️ 重要:请保存好以下配置信息${NC}" @@ -225,32 +439,51 @@ EOF echo -e "数据库密码: ${GREEN}${DB_PASSWORD}${NC}" echo -e "管理员密钥: ${GREEN}${ADMIN_SECRET}${NC}" echo -e "访问地址: ${GREEN}http://localhost:${WEB_PORT}${NC}" + if [ "$ENABLE_MONITORING" = "true" ]; then + echo -e "Prometheus: ${CYAN}http://localhost:${PROMETHEUS_PORT}${NC}" + echo -e "Grafana: ${CYAN}http://localhost:${GRAFANA_PORT}${NC}" + echo -e "Grafana 用户名: ${CYAN}admin${NC}" + echo -e "Grafana 密码: ${CYAN}${GRAFANA_PASSWORD}${NC}" + fi echo "==================================" echo "" echo -e "${BLUE}📝 完整配置已保存到 .env 文件中${NC}" - + else echo -e "${YELLOW}⚠️ .env 文件已存在,跳过创建${NC}" echo -e "${BLUE}💡 如需重新生成,请删除 .env 文件后重新运行脚本${NC}" + + # 从现有 .env 读取监控配置 + if [ -f ".env" ]; then + ENABLE_MONITORING=$(grep "ENABLE_MONITORING=" .env 2>/dev/null | cut -d '=' -f2 | tr -d ' ' || echo "false") + fi fi } # 启动服务 start_services() { echo -e "${BLUE}🚀 启动 NexusGate 服务...${NC}" - + local compose_file="docker-compose.yaml" if [ "$DOWNLOAD_SOURCE" = "china" ]; then compose_file="docker-compose.cn.yaml" fi - + # 检查是否使用新版 docker compose 命令 - if docker compose version &> /dev/null; then - docker compose -f "$compose_file" up -d + local compose_cmd="docker compose" + if ! docker compose version &> /dev/null; then + compose_cmd="docker-compose" + fi + + # 启动服务 + if [ "$ENABLE_MONITORING" = "true" ]; then + echo -e "${CYAN}📊 启动核心服务和监控组件...${NC}" + $compose_cmd -f "$compose_file" -f "docker-compose.monitoring.yaml" up -d else - docker-compose -f "$compose_file" up -d + echo -e "${GREEN}🚀 启动核心服务...${NC}" + $compose_cmd -f "$compose_file" up -d fi - + echo -e "${GREEN}✅ 服务启动完成!${NC}" } @@ -260,30 +493,55 @@ show_access_info() { echo "====================================" echo -e "${GREEN}🎉 NexusGate 部署完成!${NC}" echo "====================================" - + # 从 .env 文件读取配置 if [ -f ".env" ]; then WEB_PORT=$(grep "WEB_PORT=" .env | cut -d '=' -f2 | tr -d ' ') ADMIN_SECRET=$(grep "ADMIN_SUPER_SECRET=" .env | cut -d '=' -f2 | tr -d ' ') - - echo -e "🌐 访问地址: ${GREEN}http://localhost:${WEB_PORT:-8080}${NC}" + ENABLE_MONITORING=$(grep "ENABLE_MONITORING=" .env | cut -d '=' -f2 | tr -d ' ') + + echo -e "🌐 NexusGate 访问地址: ${GREEN}http://localhost:${WEB_PORT:-8080}${NC}" echo -e "🔑 管理员密钥: ${GREEN}${ADMIN_SECRET}${NC}" + + if [ "$ENABLE_MONITORING" = "true" ]; then + PROMETHEUS_PORT=$(grep "PROMETHEUS_PORT=" .env | cut -d '=' -f2 | tr -d ' ') + GRAFANA_PORT=$(grep "GRAFANA_PORT=" .env | cut -d '=' -f2 | tr -d ' ') + GRAFANA_PASSWORD=$(grep "GRAFANA_PASSWORD=" .env | cut -d '=' -f2 | tr -d ' ') + + echo "" + echo -e "${CYAN}📊 监控组件访问信息:${NC}" + echo -e " Prometheus: ${CYAN}http://localhost:${PROMETHEUS_PORT:-9090}${NC}" + echo -e " Grafana: ${CYAN}http://localhost:${GRAFANA_PORT:-3001}${NC}" + echo -e " Grafana 用户名: ${CYAN}admin${NC}" + echo -e " Grafana 密码: ${CYAN}${GRAFANA_PASSWORD:-admin}${NC}" + fi else echo -e "🌐 访问地址: ${GREEN}http://localhost:8080${NC}" echo -e "🔑 管理员密钥: ${YELLOW}请查看 .env 文件${NC}" fi - + echo "" echo -e "${BLUE}📖 使用说明:${NC}" echo "1. 在浏览器中打开上述地址" echo "2. 使用管理员密钥登录系统" echo "3. 开始配置您的第一个模型和应用,其中 BaseURL 需要设置为 http://localhost:${WEB_PORT:-8080}/v1/" echo "后续您也可以通过该服务器的 IP 地址或域名访问 NexusGate,BaseURL 需要设置为 http://<服务器IP或域名>:${WEB_PORT:-8080}/v1/" + + if [ "$ENABLE_MONITORING" = "true" ]; then + echo "" + echo -e "${CYAN}📊 监控使用说明:${NC}" + echo "1. 访问 Grafana 地址并使用上述凭证登录" + echo "2. 在 Dashboards 中找到 'NexusGate LLM Gateway' 仪表板" + echo "3. 查看请求量、延迟、Token 使用量等指标" + echo "" + echo -e "${YELLOW}💡 提示: NexusGate 的 /metrics 端点可被任何 Prometheus 实例抓取${NC}" + fi } # 主函数 main() { select_download_source + ask_monitoring check_docker download_configs create_env_file