diff --git a/.gitignore b/.gitignore
index c4641bb..d82cdb7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,3 +29,4 @@ python_test_code/
 !python_test_code/pyproject.toml
 !python_test_code/uv.lock
 !python_test_code/test_unified_suite.py
+!python_test_code/test_metrics.py
diff --git a/backend/src/api/metrics.ts b/backend/src/api/metrics.ts
new file mode 100644
index 0000000..fb711a7
--- /dev/null
+++ b/backend/src/api/metrics.ts
@@ -0,0 +1,47 @@
+import { Elysia } from "elysia";
+import { generatePrometheusMetrics } from "@/services/prometheus";
+
+/**
+ * Prometheus metrics endpoint
+ * Exposes operational metrics in Prometheus exposition format
+ *
+ * SECURITY NOTE: This endpoint is intentionally public (no authentication required).
+ * This is a deliberate design choice because:
+ *
+ * 1. Standard Practice: Prometheus metrics endpoints are typically unauthenticated
+ *    to allow easy scraping by monitoring systems.
+ *
+ * 2. Operational Data Only: The metrics expose only aggregated operational data
+ *    (request counts, latencies, token usage, error rates). No sensitive data
+ *    like API keys, request/response content, or user data is exposed.
+ *
+ * 3. API Key Privacy: The `api_key_comment` label is used instead of the actual
+ *    API key value, providing meaningful aggregation without exposing secrets.
+ *
+ * 4. Network Security: In production deployments, network-level security (firewall
+ *    rules, VPC, ingress policies) should restrict access to the metrics endpoint
+ *    to authorized monitoring systems only.
+ *
+ * If stricter security is required, consider:
+ * - Using network policies to restrict access to Prometheus scrapers
+ * - Deploying a metrics proxy with authentication
+ * - Adding optional bearer token authentication via environment variable
+ */
+export const metricsApi = new Elysia().get(
+  "/metrics",
+  async () => {
+    const metrics = await generatePrometheusMetrics();
+    return new Response(metrics, {
+      headers: {
+        "Content-Type": "text/plain; version=0.0.4; charset=utf-8",
+      },
+    });
+  },
+  {
+    detail: {
+      description:
+        "Prometheus metrics endpoint. Returns operational metrics in Prometheus exposition format.",
+      tags: ["Metrics"],
+    },
+  },
+);
diff --git a/backend/src/db/index.ts b/backend/src/db/index.ts
index e46142b..f0e8431 100644
--- a/backend/src/db/index.ts
+++ b/backend/src/db/index.ts
@@ -1347,3 +1347,181 @@ export async function updateCompletion(
   const [first] = r;
   return first ?? null;
 }
+
+// ============================================
+// Prometheus Metrics Operations
+// ============================================
+
+/**
+ * Get completion metrics grouped by model, status, and api_format
+ * Returns all-time totals for Prometheus counters
+ * Joins with api_keys table to get api_key_comment for meaningful aggregation
+ */
+export async function getCompletionMetricsByModelAndStatus() {
+  logger.debug("getCompletionMetricsByModelAndStatus");
+  const result = await db.execute(sql`
+    SELECT
+      c.model,
+      c.status,
+      c.api_format,
+      COALESCE(ak.comment, 'unknown') AS api_key_comment,
+      COUNT(*) AS count,
+      COALESCE(SUM(CASE WHEN c.prompt_tokens > 0 THEN c.prompt_tokens ELSE 0 END), 0) AS prompt_tokens,
+      COALESCE(SUM(CASE WHEN c.completion_tokens > 0 THEN c.completion_tokens ELSE 0 END), 0) AS completion_tokens
+    FROM completions c
+    LEFT JOIN api_keys ak ON c.api_key_id = ak.id
+    WHERE c.deleted = false
+    GROUP BY c.model, c.status, c.api_format, ak.comment
+  `);
+  return result as unknown as {
+    model: string;
+    status: string;
+    api_format: string | null;
+    api_key_comment: string;
+    count: string;
+    prompt_tokens: string;
+    completion_tokens: string;
+  }[];
+}
+
+/**
+ * Get embedding metrics grouped by model and status
+ * Returns all-time totals for Prometheus counters
+ * Joins with api_keys table to get api_key_comment for meaningful aggregation
+ */
+export async function getEmbeddingMetricsByModelAndStatus() {
+  logger.debug("getEmbeddingMetricsByModelAndStatus");
+  const result = await db.execute(sql`
+    SELECT
+      e.model,
+      e.status,
+      COALESCE(ak.comment, 'unknown') AS api_key_comment,
+      COUNT(*) AS count,
+      COALESCE(SUM(CASE WHEN e.input_tokens > 0 THEN e.input_tokens ELSE 0 END), 0) AS input_tokens
+    FROM embeddings e
+    LEFT JOIN api_keys ak ON e.api_key_id = ak.id
+    WHERE e.deleted = false
+    GROUP BY e.model, e.status, ak.comment
+  `);
+  return result as unknown as {
+    model: string;
+    status: string;
+    api_key_comment: string;
+    count: string;
+    input_tokens: string;
+  }[];
+}
+
+// Histogram bucket boundaries in milliseconds (for LLM latency)
+export const LATENCY_BUCKETS_MS = [100, 250, 500, 1000, 2500, 5000, 10000, 30000, 60000, 120000];
+
+// Pre-computed bucket case SQL fragments (constant, computed once at module load)
+const DURATION_BUCKET_CASES = LATENCY_BUCKETS_MS.map(
+  (b) => `SUM(CASE WHEN duration <= ${b} THEN 1 ELSE 0 END) AS bucket_${b}`,
+).join(",\n      ");
+
+const TTFT_BUCKET_CASES = LATENCY_BUCKETS_MS.map(
+  (b) => `SUM(CASE WHEN ttft <= ${b} THEN 1 ELSE 0 END) AS bucket_${b}`,
+).join(",\n      ");
+
+/**
+ * Get completion duration histogram data grouped by model
+ * Duration is stored in milliseconds in the database
+ *
+ * Note: We use SUM(duration) not AVG because Prometheus histogram format requires
+ * the total sum of all observations (_sum metric). Average can be computed by
+ * Prometheus as sum/count when needed.
+ */
+export async function getCompletionDurationHistogram() {
+  logger.debug("getCompletionDurationHistogram");
+  const result = await db.execute(sql.raw(`
+    SELECT
+      model,
+      ${DURATION_BUCKET_CASES},
+      COUNT(*) AS total_count,
+      COALESCE(SUM(duration), 0) AS duration_sum
+    FROM completions
+    WHERE deleted = false AND duration > 0
+    GROUP BY model
+  `));
+  return result as unknown as Record<string, string>[];
+}
+
+/**
+ * Get completion TTFT (Time To First Token) histogram data grouped by model
+ * TTFT is stored in milliseconds in the database
+ */
+export async function getCompletionTTFTHistogram() {
+  logger.debug("getCompletionTTFTHistogram");
+  const result = await db.execute(sql.raw(`
+    SELECT
+      model,
+      ${TTFT_BUCKET_CASES},
+      COUNT(*) AS total_count,
+      COALESCE(SUM(ttft), 0) AS ttft_sum
+    FROM completions
+    WHERE deleted = false AND ttft > 0 AND status = 'completed'
+    GROUP BY model
+  `));
+  return result as unknown as Record<string, string>[];
+}
+
+/**
+ * Get embedding duration histogram data grouped by model
+ * Duration is stored in milliseconds in the database
+ */
+export async function getEmbeddingDurationHistogram() {
+  logger.debug("getEmbeddingDurationHistogram");
+  const result = await db.execute(sql.raw(`
+    SELECT
+      model,
+      ${DURATION_BUCKET_CASES},
+      COUNT(*) AS total_count,
+      COALESCE(SUM(duration), 0) AS duration_sum
+    FROM embeddings
+    WHERE deleted = false AND duration > 0
+    GROUP BY model
+  `));
+  return result as unknown as Record<string, string>[];
+}
+
+/**
+ * Get API key rate limit configuration for Prometheus metrics
+ * Returns all active (non-revoked) API keys with their rate limits
+ */
+export async function getApiKeyRateLimitConfig() {
+  logger.debug("getApiKeyRateLimitConfig");
+  return await db
+    .select({
+      id: schema.ApiKeysTable.id,
+      comment: schema.ApiKeysTable.comment,
+      rpmLimit: schema.ApiKeysTable.rpmLimit,
+      tpmLimit: schema.ApiKeysTable.tpmLimit,
+    })
+    .from(schema.ApiKeysTable)
+    .where(not(schema.ApiKeysTable.revoked));
+}
+
+/**
+ * Get counts of active entities for Prometheus gauges
+ * Uses a single query with subqueries for efficiency (one DB round-trip)
+ */
+export async function getActiveEntityCounts() {
+  logger.debug("getActiveEntityCounts");
+
+  const result = await db.execute(sql`
+    SELECT
+      (SELECT COUNT(*) FROM api_keys WHERE NOT revoked) AS api_keys,
+      (SELECT COUNT(*) FROM providers WHERE NOT deleted) AS providers,
+      (SELECT COUNT(*) FROM models WHERE NOT deleted AND model_type = 'chat') AS chat_models,
+      (SELECT COUNT(*) FROM models WHERE NOT deleted AND model_type = 'embedding') AS embedding_models
+  `);
+
+  const row = (result as unknown as Record<string, string>[])[0];
+  return {
+    apiKeys: Number(row?.api_keys ?? 0),
+    providers: Number(row?.providers ?? 0),
+    chatModels: Number(row?.chat_models ?? 0),
+    embeddingModels: Number(row?.embedding_models ?? 0),
+  };
+}
diff --git a/backend/src/index.ts b/backend/src/index.ts
index 39b8cee..613f313 100644
--- a/backend/src/index.ts
+++ b/backend/src/index.ts
@@ -16,6 +16,7 @@ async function exists(path: string): Promise<boolean> {
 }
 import { join } from "node:path";
 import { routes } from "@/api";
+import { metricsApi } from "@/api/metrics";
 import { loggerPlugin } from "@/plugins/loggerPlugin";
 import {
   ALLOWED_ORIGINS,
@@ -151,8 +152,8 @@ async function spaPlugin(dir: string) {
       if (path.startsWith("/docs") || path.startsWith("/__tsr")) {
         return status(404);
       }
-      // Skip API routes
-      if (path.startsWith("/api") || path.startsWith("/v1")) {
+      // Skip API routes and metrics (include trailing slash to prevent SPA fallback)
+      if (path.startsWith("/api") || path.startsWith("/v1") || path === "/metrics" || path === "/metrics/") {
         return status(404);
       }
 
@@ -205,6 +206,7 @@ const app = new Elysia()
   )
   .use(serverTiming())
   .use(routes)
+  .use(metricsApi)
   .use(await docsPlugin(DOCS_DIR))
   .use(await spaPlugin(FRONTEND_DIR))
   .listen({
diff --git a/backend/src/plugins/apiKeyRateLimitPlugin.ts b/backend/src/plugins/apiKeyRateLimitPlugin.ts
index a00cf3d..978dfc0 100644
--- a/backend/src/plugins/apiKeyRateLimitPlugin.ts
+++ b/backend/src/plugins/apiKeyRateLimitPlugin.ts
@@ -1,10 +1,43 @@
+import { consola } from "consola";
 import { Elysia } from "elysia";
 import { apiKeyPlugin } from "./apiKeyPlugin";
 import { checkRpmLimit, checkTpmLimit } from "@/utils/apiKeyRateLimit";
+import { redisClient } from "@/utils/redisClient";
 
 // Re-export consumeTokens for use in API handlers
 export { consumeTokens } from "@/utils/apiKeyRateLimit";
 
+const logger = consola.withTag("apiKeyRateLimitPlugin");
+
+// Redis key for tracking rate limit rejections (for Prometheus metrics)
+const RATE_LIMIT_REJECTIONS_KEY = "nexusgate:metrics:rate_limit_rejections";
+
+/**
+ * Track a rate limit rejection in Redis for Prometheus metrics
+ * @param apiKeyComment The API key comment for label
+ * @param limitType Type of limit exceeded ('rpm' or 'tpm')
+ */
+async function trackRateLimitRejection(
+  apiKeyComment: string | null,
+  limitType: "rpm" | "tpm",
+): Promise<void> {
+  try {
+    const field = `${apiKeyComment ?? "unknown"}:${limitType}`;
+    await redisClient.hincrby(RATE_LIMIT_REJECTIONS_KEY, field, 1);
+  } catch (error) {
+    logger.error("Failed to track rate limit rejection:", error);
+  }
+}
+
+/**
+ * Get all rate limit rejections from Redis for Prometheus metrics
+ */
+export async function getRateLimitRejections(): Promise<
+  Record<string, string>
+> {
+  return await redisClient.hgetall(RATE_LIMIT_REJECTIONS_KEY);
+}
+
 /**
  * OpenAI-compatible rate limit error response
  */
@@ -39,6 +72,9 @@ export const apiKeyRateLimitPlugin = new Elysia({
         );
 
         if (!rpmResult.allowed) {
+          // Track rejection for Prometheus metrics
+          await trackRateLimitRejection(apiKeyRecord.comment, "rpm");
+
           set.headers["X-RateLimit-Limit-RPM"] =
             apiKeyRecord.rpmLimit.toString();
           set.headers["X-RateLimit-Remaining-RPM"] = "0";
@@ -59,6 +95,9 @@ export const apiKeyRateLimitPlugin = new Elysia({
         );
 
         if (!tpmResult.allowed) {
+          // Track rejection for Prometheus metrics
+          await trackRateLimitRejection(apiKeyRecord.comment, "tpm");
+
           set.headers["X-RateLimit-Limit-TPM"] =
             apiKeyRecord.tpmLimit.toString();
           set.headers["X-RateLimit-Remaining-TPM"] = "0";
diff --git a/backend/src/services/prometheus.ts b/backend/src/services/prometheus.ts
new file mode 100644
index 0000000..58b3426
--- /dev/null
+++ b/backend/src/services/prometheus.ts
@@ -0,0 +1,498 @@
+import { consola } from "consola";
+import {
+  getCompletionMetricsByModelAndStatus,
+  getEmbeddingMetricsByModelAndStatus,
+  getCompletionDurationHistogram,
+  getCompletionTTFTHistogram,
+  getEmbeddingDurationHistogram,
+  getActiveEntityCounts,
+  getApiKeyRateLimitConfig,
+  LATENCY_BUCKETS_MS,
+} from "@/db";
+import { COMMIT_SHA, METRICS_CACHE_TTL_SECONDS } from "@/utils/config";
+import { redisClient } from "@/utils/redisClient";
+import { getRateLimitStatus } from "@/utils/apiKeyRateLimit";
+import { getRateLimitRejections } from "@/plugins/apiKeyRateLimitPlugin";
+
+const logger = consola.withTag("prometheus");
+
+// Redis cache key for metrics
+const METRICS_CACHE_KEY = "nexusgate:metrics:cache";
+
+// Convert milliseconds to seconds for Prometheus (standard unit)
+const LATENCY_BUCKETS_SEC = LATENCY_BUCKETS_MS.map((ms) => ms / 1000);
+
+/**
+ * Escape label values according to Prometheus format
+ * Backslash, double-quote, and newline must be escaped
+ */
+function escapeLabelValue(value: string): string {
+  return value
+    .replace(/\\/g, "\\\\")
+    .replace(/"/g, '\\"')
+    .replace(/\n/g, "\\n");
+}
+
+/**
+ * Format labels as Prometheus label string
+ */
+function formatLabels(labels: Record<string, string | number | null | undefined>): string {
+  const parts: string[] = [];
+  for (const [key, value] of Object.entries(labels)) {
+    if (value !== null && value !== undefined && value !== "") {
+      parts.push(`${key}="${escapeLabelValue(String(value))}"`);
+    }
+  }
+  return parts.length > 0 ? `{${parts.join(",")}}` : "";
+}
+
+interface MetricValue {
+  labels: Record<string, string | number | null | undefined>;
+  value: number;
+}
+
+/**
+ * Format a counter metric in Prometheus exposition format
+ */
+function formatCounter(name: string, help: string, values: MetricValue[]): string {
+  const lines: string[] = [
+    `# HELP ${name} ${help}`,
+    `# TYPE ${name} counter`,
+  ];
+  for (const { labels, value } of values) {
+    lines.push(`${name}${formatLabels(labels)} ${value}`);
+  }
+  return lines.join("\n");
+}
+
+/**
+ * Format a gauge metric in Prometheus exposition format
+ */
+function formatGauge(name: string, help: string, values: MetricValue[]): string {
+  const lines: string[] = [
+    `# HELP ${name} ${help}`,
+    `# TYPE ${name} gauge`,
+  ];
+  for (const { labels, value } of values) {
+    lines.push(`${name}${formatLabels(labels)} ${value}`);
+  }
+  return lines.join("\n");
+}
+
+interface HistogramValue {
+  labels: Record<string, string | number | null | undefined>;
+  buckets: Map<number, number>; // le (in seconds) -> cumulative count
+  sum: number;
+  count: number;
+}
+
+/**
+ * Format a histogram metric in Prometheus exposition format
+ */
+function formatHistogram(name: string, help: string, buckets: number[], values: HistogramValue[]): string {
+  const lines: string[] = [
+    `# HELP ${name} ${help}`,
+    `# TYPE ${name} histogram`,
+  ];
+  for (const { labels, buckets: bucketCounts, sum, count } of values) {
+    // Output bucket lines
+    for (const le of buckets) {
+      const bucketCount = bucketCounts.get(le) ?? 0;
+      lines.push(`${name}_bucket${formatLabels({ ...labels, le })} ${bucketCount}`);
+    }
+    // +Inf bucket (total count)
+    lines.push(`${name}_bucket${formatLabels({ ...labels, le: "+Inf" })} ${count}`);
+    // Sum and count
+    lines.push(`${name}_sum${formatLabels(labels)} ${sum}`);
+    lines.push(`${name}_count${formatLabels(labels)} ${count}`);
+  }
+  return lines.join("\n");
+}
+
+/**
+ * Generate all Prometheus metrics
+ */
+export async function generatePrometheusMetrics(): Promise<string> {
+  try {
+    // Try to get cached metrics first
+    const cachedMetrics = await redisClient.get(METRICS_CACHE_KEY);
+    if (cachedMetrics) {
+      logger.debug("Returning cached metrics");
+      return cachedMetrics;
+    }
+
+    // Generate fresh metrics
+    const metrics = await generateMetricsInternal();
+
+    // Cache the metrics
+    await redisClient.set(METRICS_CACHE_KEY, metrics, { EX: METRICS_CACHE_TTL_SECONDS });
+
+    return metrics;
+  } catch (error) {
+    logger.error("Error generating metrics:", error);
+    // Return minimal fallback metrics on error
+    return generateFallbackMetrics();
+  }
+}
+
+/**
+ * Generate fallback metrics when main generation fails
+ */
+function generateFallbackMetrics(): string {
+  const sections: string[] = [];
+
+  // Info metric always works
+  sections.push(
+    formatGauge("nexusgate_info", "NexusGate build information", [
+      { labels: { version: COMMIT_SHA }, value: 1 },
+    ]),
+  );
+
+  // Error indicator
+  sections.push(
+    formatGauge("nexusgate_metrics_error", "Indicates metrics generation failed", [
+      { labels: {}, value: 1 },
+    ]),
+  );
+
+  return sections.join("\n\n") + "\n";
+}
+
+/**
+ * Internal metrics generation (the actual work)
+ */
+async function generateMetricsInternal(): Promise<string> {
+  // Fetch all metrics data in parallel
+  const [
+    completionMetrics,
+    embeddingMetrics,
+    completionDurationHist,
+    completionTTFTHist,
+    embeddingDurationHist,
+    entityCounts,
+    apiKeyConfigs,
+    rateLimitRejections,
+  ] = await Promise.all([
+    getCompletionMetricsByModelAndStatus(),
+    getEmbeddingMetricsByModelAndStatus(),
+    getCompletionDurationHistogram(),
+    getCompletionTTFTHistogram(),
+    getEmbeddingDurationHistogram(),
+    getActiveEntityCounts(),
+    getApiKeyRateLimitConfig(),
+    getRateLimitRejections(),
+  ]);
+
+  const sections: string[] = [];
+
+  // Info metric
+  sections.push(
+    formatGauge("nexusgate_info", "NexusGate build information", [
+      { labels: { version: COMMIT_SHA }, value: 1 },
+    ]),
+  );
+
+  // Completion counter metrics
+  const completionCounts: MetricValue[] = [];
+  const promptTokenCounts: Map<string, number> = new Map();
+  const completionTokenCounts: Map<string, number> = new Map();
+
+  for (const row of completionMetrics) {
+    completionCounts.push({
+      labels: {
+        model: row.model,
+        status: row.status,
+        api_format: row.api_format,
+        api_key_comment: row.api_key_comment,
+      },
+      value: Number(row.count),
+    });
+
+    // Aggregate tokens by model
+    const currentPrompt = promptTokenCounts.get(row.model) ?? 0;
+    promptTokenCounts.set(row.model, currentPrompt + Number(row.prompt_tokens));
+
+    const currentCompletion = completionTokenCounts.get(row.model) ?? 0;
+    completionTokenCounts.set(row.model, currentCompletion + Number(row.completion_tokens));
+  }
+
+  if (completionCounts.length > 0) {
+    sections.push(
+      formatCounter(
+        "nexusgate_completions_total",
+        "Total number of completion requests",
+        completionCounts,
+      ),
+    );
+  }
+
+  // Prompt token counter
+  const promptTokenValues: MetricValue[] = [];
+  for (const [model, tokens] of promptTokenCounts) {
+    promptTokenValues.push({ labels: { model }, value: tokens });
+  }
+  if (promptTokenValues.length > 0) {
+    sections.push(
+      formatCounter(
+        "nexusgate_tokens_prompt_total",
+        "Total prompt tokens processed",
+        promptTokenValues,
+      ),
+    );
+  }
+
+  // Completion token counter
+  const completionTokenValues: MetricValue[] = [];
+  for (const [model, tokens] of completionTokenCounts) {
+    completionTokenValues.push({ labels: { model }, value: tokens });
+  }
+  if (completionTokenValues.length > 0) {
+    sections.push(
+      formatCounter(
+        "nexusgate_tokens_completion_total",
+        "Total completion tokens generated",
+        completionTokenValues,
+      ),
+    );
+  }
+
+  // Embedding counter metrics
+  const embeddingCounts: MetricValue[] = [];
+  const embeddingTokenCounts: Map<string, number> = new Map();
+
+  for (const row of embeddingMetrics) {
+    embeddingCounts.push({
+      labels: {
+        model: row.model,
+        status: row.status,
+        api_key_comment: row.api_key_comment,
+      },
+      value: Number(row.count),
+    });
+
+    const currentTokens = embeddingTokenCounts.get(row.model) ?? 0;
+    embeddingTokenCounts.set(row.model, currentTokens + Number(row.input_tokens));
+  }
+
+  if (embeddingCounts.length > 0) {
+    sections.push(
+      formatCounter(
+        "nexusgate_embeddings_total",
+        "Total number of embedding requests",
+        embeddingCounts,
+      ),
+    );
+  }
+
+  // Embedding token counter
+  const embeddingTokenValues: MetricValue[] = [];
+  for (const [model, tokens] of embeddingTokenCounts) {
+    embeddingTokenValues.push({ labels: { model }, value: tokens });
+  }
+  if (embeddingTokenValues.length > 0) {
+    sections.push(
+      formatCounter(
+        "nexusgate_tokens_embedding_total",
+        "Total embedding tokens processed",
+        embeddingTokenValues,
+      ),
+    );
+  }
+
+  // Completion duration histogram
+  const durationHistValues = parseHistogramData(completionDurationHist, "duration");
+  if (durationHistValues.length > 0) {
+    sections.push(
+      formatHistogram(
+        "nexusgate_completion_duration_seconds",
+        "Completion request duration in seconds",
+        LATENCY_BUCKETS_SEC,
+        durationHistValues,
+      ),
+    );
+  }
+
+  // Completion TTFT histogram
+  const ttftHistValues = parseHistogramData(completionTTFTHist, "ttft");
+  if (ttftHistValues.length > 0) {
+    sections.push(
+      formatHistogram(
+        "nexusgate_completion_ttft_seconds",
+        "Time to first token in seconds",
+        LATENCY_BUCKETS_SEC,
+        ttftHistValues,
+      ),
+    );
+  }
+
+  // Embedding duration histogram
+  const embeddingDurationHistValues = parseHistogramData(embeddingDurationHist, "duration");
+  if (embeddingDurationHistValues.length > 0) {
+    sections.push(
+      formatHistogram(
+        "nexusgate_embedding_duration_seconds",
+        "Embedding request duration in seconds",
+        LATENCY_BUCKETS_SEC,
+        embeddingDurationHistValues,
+      ),
+    );
+  }
+
+  // Gauge metrics for active entities
+  sections.push(
+    formatGauge("nexusgate_active_api_keys", "Number of active (non-revoked) API keys", [
+      { labels: {}, value: entityCounts.apiKeys },
+    ]),
+  );
+
+  sections.push(
+    formatGauge("nexusgate_active_providers", "Number of active providers", [
+      { labels: {}, value: entityCounts.providers },
+    ]),
+  );
+
+  sections.push(
+    formatGauge("nexusgate_active_models", "Number of active models", [
+      { labels: { type: "chat" }, value: entityCounts.chatModels },
+      { labels: { type: "embedding" }, value: entityCounts.embeddingModels },
+    ]),
+  );
+
+  // API Key Rate Limit Metrics
+  // Fetch current usage from Redis for each API key in parallel for better performance
+  const rpmUsageValues: MetricValue[] = [];
+  const rpmLimitValues: MetricValue[] = [];
+  const tpmUsageValues: MetricValue[] = [];
+  const tpmLimitValues: MetricValue[] = [];
+
+  const rateLimitStatuses = await Promise.all(
+    apiKeyConfigs.map(async (apiKey) =>
+      getRateLimitStatus(apiKey.id, {
+        rpmLimit: apiKey.rpmLimit,
+        tpmLimit: apiKey.tpmLimit,
+      }),
+    ),
+  );
+
+  for (let i = 0; i < apiKeyConfigs.length; i++) {
+    const apiKey = apiKeyConfigs[i];
+    const status = rateLimitStatuses[i];
+    if (!apiKey || !status) {
+      continue;
+    }
+
+    const comment = apiKey.comment ?? "unknown";
+
+    rpmUsageValues.push({
+      labels: { api_key_comment: comment },
+      value: status.rpm.current,
+    });
+    rpmLimitValues.push({
+      labels: { api_key_comment: comment },
+      value: status.rpm.limit,
+    });
+    tpmUsageValues.push({
+      labels: { api_key_comment: comment },
+      value: status.tpm.current,
+    });
+    tpmLimitValues.push({
+      labels: { api_key_comment: comment },
+      value: status.tpm.limit,
+    });
+  }
+
+  if (rpmUsageValues.length > 0) {
+    sections.push(
+      formatGauge(
+        "nexusgate_api_key_rpm_usage",
+        "Current RPM usage per API key",
+        rpmUsageValues,
+      ),
+    );
+    sections.push(
+      formatGauge(
+        "nexusgate_api_key_rpm_limit",
+        "RPM limit per API key",
+        rpmLimitValues,
+      ),
+    );
+    sections.push(
+      formatGauge(
+        "nexusgate_api_key_tpm_usage",
+        "Current TPM usage per API key",
+        tpmUsageValues,
+      ),
+    );
+    sections.push(
+      formatGauge(
+        "nexusgate_api_key_tpm_limit",
+        "TPM limit per API key",
+        tpmLimitValues,
+      ),
+    );
+  }
+
+  // Rate Limit Rejection Counter
+  // Field format is "apiKeyComment:limitType" where apiKeyComment may contain colons
+  const rejectionValues: MetricValue[] = [];
+  for (const [field, count] of Object.entries(rateLimitRejections)) {
+    const parts = field.split(":");
+    const limitType = parts.pop(); // Last part is always the limit type (rpm/tpm)
+    const apiKeyComment = parts.join(":"); // Rejoin in case comment contained colons
+
+    if (apiKeyComment && limitType) {
+      rejectionValues.push({
+        labels: { api_key_comment: apiKeyComment, limit_type: limitType },
+        value: Number(count),
+      });
+    }
+  }
+
+  if (rejectionValues.length > 0) {
+    sections.push(
+      formatCounter(
+        "nexusgate_rate_limit_rejections_total",
+        "Total number of rate limit rejections (429 responses)",
+        rejectionValues,
+      ),
+    );
+  }
+
+  return sections.join("\n\n") + "\n";
+}
+
+/**
+ * Parse histogram data from database results
+ */
+function parseHistogramData(
+  data: Record<string, string>[],
+  sumField: "duration" | "ttft",
+): HistogramValue[] {
+  const values: HistogramValue[] = [];
+
+  for (const row of data) {
+    const model = row.model;
+    const buckets = new Map<number, number>();
+
+    // Parse bucket counts and convert to seconds
+    for (const ms of LATENCY_BUCKETS_MS) {
+      const bucketKey = `bucket_${ms}`;
+      const count = Number(row[bucketKey] ?? 0);
+      // Convert ms bucket boundary to seconds
+      buckets.set(ms / 1000, count);
+    }
+
+    // Sum is in milliseconds in DB, convert to seconds
+    const sum = Number(row[`${sumField}_sum`] ?? 0) / 1000;
+    const count = Number(row.total_count ?? 0);
+
+    values.push({
+      labels: { model },
+      buckets,
+      sum,
+      count,
+    });
+  }
+
+  return values;
+}
diff --git a/backend/src/utils/config.ts b/backend/src/utils/config.ts
index 23741f3..6ff379b 100644
--- a/backend/src/utils/config.ts
+++ b/backend/src/utils/config.ts
@@ -143,3 +143,10 @@ export const FORCILY_ADD_API_KEYS = env(
 
 export const FRONTEND_DIR = env("frontend dir", z.coerce.string(), "dist");
 export const DOCS_DIR = env("docs dir", z.coerce.string(), "docs");
+
+// Prometheus metrics configuration
+export const METRICS_CACHE_TTL_SECONDS = env(
+  "metrics cache ttl seconds",
+  z.coerce.number().int().positive(),
+  "30",
+);
diff --git a/backend/src/utils/redisClient.ts b/backend/src/utils/redisClient.ts
index 2a8d3f8..98dd766 100644
--- a/backend/src/utils/redisClient.ts
+++ b/backend/src/utils/redisClient.ts
@@ -149,6 +149,40 @@ class RedisClient {
     }
   }
 
+  /**
+   * Increment a field in a hash by the given amount
+   * @param {string} key - Hash key
+   * @param {string} field - Field within the hash
+   * @param {number} increment - Amount to increment by
+   * @returns {Promise<number>} New value after increment
+   */
+  public async hincrby(
+    key: string,
+    field: string,
+    increment: number,
+  ): Promise<number> {
+    try {
+      return await this.client.hincrby(key, field, increment);
+    } catch (error) {
+      logger.error(`Redis hincrby error: ${(error as Error).message}`);
+      return 0;
+    }
+  }
+
+  /**
+   * Get all fields and values from a hash
+   * @param {string} key - Hash key
+   * @returns {Promise<Record<string, string>>} Hash fields and values
+   */
+  public async hgetall(key: string): Promise<Record<string, string>> {
+    try {
+      return await this.client.hgetall(key);
+    } catch (error) {
+      logger.error(`Redis hgetall error: ${(error as Error).message}`);
+      return {};
+    }
+  }
+
   /**
    * Close the Redis connection
    */
diff --git a/docker-compose.monitoring.yaml b/docker-compose.monitoring.yaml
new file mode 100644
index 0000000..9d4926a
--- /dev/null
+++ b/docker-compose.monitoring.yaml
@@ -0,0 +1,41 @@
+# Docker Compose override for Prometheus + Grafana monitoring stack
+# Use with: docker compose -f docker-compose.yaml -f docker-compose.monitoring.yaml up -d
+
+services:
+  prometheus:
+    image: "prom/prometheus:latest"
+    container_name: nexusgate-prometheus
+    volumes:
+      - "./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro"
+      - "prometheus_data:/prometheus"
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+      - '--storage.tsdb.path=/prometheus'
+      - '--web.enable-lifecycle'
+      - '--storage.tsdb.retention.time=15d'
+    ports:
+      - "${PROMETHEUS_PORT:-9090}:9090"
+    restart: on-failure
+    depends_on:
+      - nexusgate
+
+  grafana:
+    image: "grafana/grafana:latest"
+    container_name: nexusgate-grafana
+    environment:
+      - "GF_SECURITY_ADMIN_USER=${GRAFANA_USER:-admin}"
+      - "GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-admin}"
+      - "GF_USERS_ALLOW_SIGN_UP=false"
+      - "GF_SERVER_ROOT_URL=http://localhost:${GRAFANA_PORT:-3001}"
+    volumes:
+      - "grafana_data:/var/lib/grafana"
+      - "./grafana/provisioning:/etc/grafana/provisioning:ro"
+    ports:
+      - "${GRAFANA_PORT:-3001}:3000"
+    restart: on-failure
+    depends_on:
+      - prometheus
+
+volumes:
+  prometheus_data:
+  grafana_data:
diff --git a/grafana/provisioning/dashboards/dashboards.yml b/grafana/provisioning/dashboards/dashboards.yml
new file mode 100644
index 0000000..3a63b6a
--- /dev/null
+++ b/grafana/provisioning/dashboards/dashboards.yml
@@ -0,0 +1,16 @@
+# Grafana dashboard provisioning
+# Auto-loads dashboards from the dashboards directory
+
+apiVersion: 1
+
+providers:
+  - name: 'NexusGate Dashboards'
+    orgId: 1
+    folder: 'NexusGate'
+    folderUid: 'nexusgate'
+    type: file
+    disableDeletion: false
+    editable: true
+    updateIntervalSeconds: 30
+    options:
+      path: /etc/grafana/provisioning/dashboards/json
diff --git a/grafana/provisioning/dashboards/json/nexusgate-dashboard.json b/grafana/provisioning/dashboards/json/nexusgate-dashboard.json
new file mode 100644
index 0000000..4a109fe
--- /dev/null
+++ b/grafana/provisioning/dashboards/json/nexusgate-dashboard.json
@@ -0,0 +1,2178 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 1,
+  "id": null,
+  "links": [],
+  "panels": [
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 0
+      },
+      "id": 100,
+      "panels": [],
+      "title": "Overview",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 4,
+        "w": 4,
+        "x": 0,
+        "y": 1
+      },
+      "id": 1,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "none",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(increase(nexusgate_completions_total{status=\"completed\"}[$__range]))",
+          "legendFormat": "Completions",
+          "refId": "A"
+        }
+      ],
+      "title": "Total Completions",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 4,
+        "w": 4,
+        "x": 4,
+        "y": 1
+      },
+      "id": 2,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "none",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(increase(nexusgate_embeddings_total{status=\"completed\"}[$__range]))",
+          "legendFormat": "Embeddings",
+          "refId": "A"
+        }
+      ],
+      "title": "Total Embeddings",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "decimals": 1,
+          "mappings": [],
+          "max": 100,
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "red",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 90
+              },
+              {
+                "color": "green",
+                "value": 99
+              }
+            ]
+          },
+          "unit": "percent"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 4,
+        "w": 4,
+        "x": 8,
+        "y": 1
+      },
+      "id": 3,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "100 * sum(increase(nexusgate_completions_total{status=\"completed\"}[$__range])) / sum(increase(nexusgate_completions_total[$__range]))",
+          "legendFormat": "Success Rate",
+          "refId": "A"
+        }
+      ],
+      "title": "Completion Success Rate",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "blue",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 4,
+        "w": 4,
+        "x": 12,
+        "y": 1
+      },
+      "id": 4,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "none",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "nexusgate_active_api_keys",
+          "legendFormat": "API Keys",
+          "refId": "A"
+        }
+      ],
+      "title": "Active API Keys",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "purple",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 4,
+        "w": 4,
+        "x": 16,
+        "y": 1
+      },
+      "id": 5,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "none",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "nexusgate_active_providers",
+          "legendFormat": "Providers",
+          "refId": "A"
+        }
+      ],
+      "title": "Active Providers",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "orange",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 4,
+        "w": 4,
+        "x": 20,
+        "y": 1
+      },
+      "id": 6,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "none",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(nexusgate_active_models)",
+          "legendFormat": "Models",
+          "refId": "A"
+        }
+      ],
+      "title": "Active Models",
+      "type": "stat"
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 5
+      },
+      "id": 101,
+      "panels": [],
+      "title": "Request Rate & Throughput",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "reqps"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 6
+      },
+      "id": 10,
+      "options": {
+        "legend": {
+          "calcs": ["mean", "max"],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(rate(nexusgate_completions_total[$__rate_interval])) by (model)",
+          "legendFormat": "{{model}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Completion Request Rate (by Model)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "reqps"
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "failed"
+            },
+            "properties": [
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "red",
+                  "mode": "fixed"
+                }
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "completed"
+            },
+            "properties": [
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "green",
+                  "mode": "fixed"
+                }
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "aborted"
+            },
+            "properties": [
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "orange",
+                  "mode": "fixed"
+                }
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "cache_hit"
+            },
+            "properties": [
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "blue",
+                  "mode": "fixed"
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 6
+      },
+      "id": 11,
+      "options": {
+        "legend": {
+          "calcs": ["mean", "max"],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(rate(nexusgate_completions_total[$__rate_interval])) by (status)",
+          "legendFormat": "{{status}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Completion Request Rate (by Status)",
+      "type": "timeseries"
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 14
+      },
+      "id": 102,
+      "panels": [],
+      "title": "Latency",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 15
+      },
+      "id": 20,
+      "options": {
+        "legend": {
+          "calcs": ["mean", "max"],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "histogram_quantile(0.50, sum(rate(nexusgate_completion_duration_seconds_bucket[$__rate_interval])) by (le, model))",
+          "legendFormat": "p50 - {{model}}",
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "histogram_quantile(0.95, sum(rate(nexusgate_completion_duration_seconds_bucket[$__rate_interval])) by (le, model))",
+          "legendFormat": "p95 - {{model}}",
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "histogram_quantile(0.99, sum(rate(nexusgate_completion_duration_seconds_bucket[$__rate_interval])) by (le, model))",
+          "legendFormat": "p99 - {{model}}",
+          "refId": "C"
+        }
+      ],
+      "title": "Completion Duration (p50/p95/p99)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 15
+      },
+      "id": 21,
+      "options": {
+        "legend": {
+          "calcs": ["mean", "max"],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "histogram_quantile(0.50, sum(rate(nexusgate_completion_ttft_seconds_bucket[$__rate_interval])) by (le, model))",
+          "legendFormat": "p50 - {{model}}",
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "histogram_quantile(0.95, sum(rate(nexusgate_completion_ttft_seconds_bucket[$__rate_interval])) by (le, model))",
+          "legendFormat": "p95 - {{model}}",
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "histogram_quantile(0.99, sum(rate(nexusgate_completion_ttft_seconds_bucket[$__rate_interval])) by (le, model))",
+          "legendFormat": "p99 - {{model}}",
+          "refId": "C"
+        }
+      ],
+      "title": "Time To First Token (TTFT) (p50/p95/p99)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "bars",
+            "fillOpacity": 80,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 23
+      },
+      "id": 22,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"0.5\"}[$__range]))",
+          "legendFormat": "< 0.5s",
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"1\"}[$__range])) - sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"0.5\"}[$__range]))",
+          "legendFormat": "0.5s - 1s",
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"2.5\"}[$__range])) - sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"1\"}[$__range]))",
+          "legendFormat": "1s - 2.5s",
+          "refId": "C"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"5\"}[$__range])) - sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"2.5\"}[$__range]))",
+          "legendFormat": "2.5s - 5s",
+          "refId": "D"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"10\"}[$__range])) - sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"5\"}[$__range]))",
+          "legendFormat": "5s - 10s",
+          "refId": "E"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"+Inf\"}[$__range])) - sum(increase(nexusgate_completion_duration_seconds_bucket{le=\"10\"}[$__range]))",
+          "legendFormat": "> 10s",
+          "refId": "F"
+        }
+      ],
+      "title": "Latency Distribution",
+      "type": "timeseries"
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 31
+      },
+      "id": 103,
+      "panels": [],
+      "title": "Token Usage",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "normal"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 32
+      },
+      "id": 30,
+      "options": {
+        "legend": {
+          "calcs": ["sum"],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(rate(nexusgate_tokens_prompt_total[$__rate_interval])) by (model)",
+          "legendFormat": "Prompt - {{model}}",
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(rate(nexusgate_tokens_completion_total[$__rate_interval])) by (model)",
+          "legendFormat": "Completion - {{model}}",
+          "refId": "B"
+        }
+      ],
+      "title": "Token Rate (Prompt vs Completion)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 6,
+        "x": 12,
+        "y": 32
+      },
+      "id": 31,
+      "options": {
+        "displayLabels": ["percent"],
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "showLegend": true,
+          "values": ["value"]
+        },
+        "pieType": "donut",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(increase(nexusgate_tokens_prompt_total[$__range])) by (model)",
+          "legendFormat": "{{model}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Prompt Tokens by Model",
+      "type": "piechart"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 6,
+        "x": 18,
+        "y": 32
+      },
+      "id": 32,
+      "options": {
+        "displayLabels": ["percent"],
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "showLegend": true,
+          "values": ["value"]
+        },
+        "pieType": "donut",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(increase(nexusgate_tokens_completion_total[$__range])) by (model)",
+          "legendFormat": "{{model}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Completion Tokens by Model",
+      "type": "piechart"
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 40
+      },
+      "id": 104,
+      "panels": [],
+      "title": "Errors, Cache & Rate Limits",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 30,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "percentunit"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 41
+      },
+      "id": 40,
+      "options": {
+        "legend": {
+          "calcs": ["mean", "max"],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(rate(nexusgate_completions_total{status=\"failed\"}[$__rate_interval])) by (model) / sum(rate(nexusgate_completions_total[$__rate_interval])) by (model)",
+          "legendFormat": "{{model}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Error Rate by Model",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 30,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "percentunit"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 41
+      },
+      "id": 41,
+      "options": {
+        "legend": {
+          "calcs": ["mean", "max"],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(rate(nexusgate_completions_total{status=\"cache_hit\"}[$__rate_interval])) / sum(rate(nexusgate_completions_total[$__rate_interval]))",
+          "legendFormat": "Cache Hit Rate",
+          "refId": "A"
+        }
+      ],
+      "title": "Cache Hit Rate",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "failed"
+            },
+            "properties": [
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "red",
+                  "mode": "fixed"
+                }
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "aborted"
+            },
+            "properties": [
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "orange",
+                  "mode": "fixed"
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 8,
+        "x": 0,
+        "y": 49
+      },
+      "id": 42,
+      "options": {
+        "displayLabels": ["percent", "value"],
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "showLegend": true,
+          "values": ["value"]
+        },
+        "pieType": "donut",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(increase(nexusgate_completions_total{status=~\"failed|aborted\"}[$__range])) by (status)",
+          "legendFormat": "{{status}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Error Type Distribution",
+      "type": "piechart"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 30,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byRegexp",
+              "options": ".*rpm.*"
+            },
+            "properties": [
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "orange",
+                  "mode": "fixed"
+                }
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byRegexp",
+              "options": ".*tpm.*"
+            },
+            "properties": [
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "red",
+                  "mode": "fixed"
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 8,
+        "x": 8,
+        "y": 49
+      },
+      "id": 43,
+      "options": {
+        "legend": {
+          "calcs": ["sum"],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(rate(nexusgate_rate_limit_rejections_total[$__rate_interval])) by (api_key_comment, limit_type)",
+          "legendFormat": "{{api_key_comment}} ({{limit_type}})",
+          "refId": "A"
+        }
+      ],
+      "title": "Rate Limit Rejections",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "max": 100,
+          "min": 0,
+          "thresholds": {
+            "mode": "percentage",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 70
+              },
+              {
+                "color": "red",
+                "value": 90
+              }
+            ]
+          },
+          "unit": "percent"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 8,
+        "x": 16,
+        "y": 49
+      },
+      "id": 44,
+      "options": {
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "showThresholdLabels": false,
+        "showThresholdMarkers": true,
+        "text": {}
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "100 * nexusgate_api_key_rpm_usage / nexusgate_api_key_rpm_limit",
+          "legendFormat": "{{api_key_comment}} RPM",
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "100 * nexusgate_api_key_tpm_usage / nexusgate_api_key_tpm_limit",
+          "legendFormat": "{{api_key_comment}} TPM",
+          "refId": "B"
+        }
+      ],
+      "title": "API Key Rate Limit Usage (%)",
+      "type": "gauge"
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 57
+      },
+      "id": 105,
+      "panels": [],
+      "title": "API Format Distribution",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 8,
+        "x": 0,
+        "y": 58
+      },
+      "id": 50,
+      "options": {
+        "displayLabels": ["percent", "value"],
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "showLegend": true,
+          "values": ["value"]
+        },
+        "pieType": "pie",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(increase(nexusgate_completions_total[$__range])) by (api_format)",
+          "legendFormat": "{{api_format}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Requests by API Format",
+      "type": "piechart"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 8,
+        "x": 8,
+        "y": 58
+      },
+      "id": 51,
+      "options": {
+        "displayLabels": ["percent", "value"],
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "showLegend": true,
+          "values": ["value"]
+        },
+        "pieType": "pie",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(increase(nexusgate_completions_total[$__range])) by (model)",
+          "legendFormat": "{{model}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Requests by Model",
+      "type": "piechart"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 8,
+        "x": 16,
+        "y": 58
+      },
+      "id": 52,
+      "options": {
+        "displayLabels": ["percent", "value"],
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "showLegend": true,
+          "values": ["value"]
+        },
+        "pieType": "pie",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(increase(nexusgate_completions_total[$__range])) by (status)",
+          "legendFormat": "{{status}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Requests by Status",
+      "type": "piechart"
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 66
+      },
+      "id": 106,
+      "panels": [],
+      "title": "Embeddings",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "reqps"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 67
+      },
+      "id": 60,
+      "options": {
+        "legend": {
+          "calcs": ["mean", "max"],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "sum(rate(nexusgate_embeddings_total[$__rate_interval])) by (model)",
+          "legendFormat": "{{model}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Embedding Request Rate",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 67
+      },
+      "id": 61,
+      "options": {
+        "legend": {
+          "calcs": ["mean", "max"],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "pluginVersion": "10.0.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "histogram_quantile(0.50, sum(rate(nexusgate_embedding_duration_seconds_bucket[$__rate_interval])) by (le, model))",
+          "legendFormat": "p50 - {{model}}",
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "histogram_quantile(0.95, sum(rate(nexusgate_embedding_duration_seconds_bucket[$__rate_interval])) by (le, model))",
+          "legendFormat": "p95 - {{model}}",
+          "refId": "B"
+        }
+      ],
+      "title": "Embedding Latency (p50/p95)",
+      "type": "timeseries"
+    }
+  ],
+  "refresh": "30s",
+  "schemaVersion": 38,
+  "tags": ["nexusgate", "llm", "prometheus"],
+  "templating": {
+    "list": [
+      {
+        "current": {
+          "selected": false,
+          "text": "Prometheus",
+          "value": "prometheus"
+        },
+        "hide": 0,
+        "includeAll": false,
+        "label": "Data Source",
+        "multi": false,
+        "name": "datasource",
+        "options": [],
+        "query": "prometheus",
+        "queryValue": "",
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "type": "datasource"
+      }
+    ]
+  },
+  "time": {
+    "from": "now-1h",
+    "to": "now"
+  },
+  "timepicker": {
+    "refresh_intervals": [
+      "5s",
+      "10s",
+      "30s",
+      "1m",
+      "5m",
+      "15m",
+      "30m",
+      "1h"
+    ]
+  },
+  "timezone": "browser",
+  "title": "NexusGate LLM Gateway",
+  "uid": "nexusgate-overview",
+  "version": 1,
+  "weekStart": ""
+}
diff --git a/grafana/provisioning/datasources/prometheus.yml b/grafana/provisioning/datasources/prometheus.yml
new file mode 100644
index 0000000..68be1ed
--- /dev/null
+++ b/grafana/provisioning/datasources/prometheus.yml
@@ -0,0 +1,15 @@
+# Grafana datasource provisioning
+# Auto-configures Prometheus as the default data source
+
+apiVersion: 1
+
+datasources:
+  - name: Prometheus
+    type: prometheus
+    access: proxy
+    url: http://prometheus:9090
+    isDefault: true
+    editable: false
+    jsonData:
+      timeInterval: "15s"
+      httpMethod: POST
diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml
new file mode 100644
index 0000000..eae7a3a
--- /dev/null
+++ b/prometheus/prometheus.yml
@@ -0,0 +1,19 @@
+# Prometheus configuration for NexusGate
+# This file is auto-generated by quick-start.sh
+
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+
+scrape_configs:
+  # NexusGate metrics
+  - job_name: 'nexusgate'
+    static_configs:
+      - targets: ['nexusgate:3000']
+    metrics_path: /metrics
+    scrape_interval: 15s
+
+  # Prometheus self-monitoring
+  - job_name: 'prometheus'
+    static_configs:
+      - targets: ['localhost:9090']
diff --git a/python_test_code/test_metrics.py b/python_test_code/test_metrics.py
new file mode 100644
index 0000000..024fee2
--- /dev/null
+++ b/python_test_code/test_metrics.py
@@ -0,0 +1,301 @@
+#!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "httpx>=0.25.0",
+# ]
+# ///
+"""
+NexusGate Prometheus Metrics API Test
+
+Tests the /metrics endpoint returns valid Prometheus format metrics.
+
+Usage:
+    uv run test_metrics.py
+
+Environment variables:
+    NEXUSGATE_BASE_URL: NexusGate service address (default: http://localhost:3000)
+"""
+
+import os
+import re
+import sys
+import httpx
+
+# Configuration
+BASE_URL = os.getenv("NEXUSGATE_BASE_URL", "http://localhost:3000")
+METRICS_URL = f"{BASE_URL}/metrics"
+
+
+def parse_prometheus_metrics(text: str) -> dict[str, list[dict]]:
+    """
+    Parse Prometheus metrics text format into a structured dict.
+
+    Returns:
+        dict mapping metric names to list of {labels: dict, value: float}
+    """
+    metrics: dict[str, list[dict]] = {}
+    current_metric = None
+
+    for line in text.strip().split('\n'):
+        line = line.strip()
+        if not line:
+            continue
+
+        # Skip HELP and TYPE lines
+        if line.startswith('# HELP'):
+            current_metric = line.split()[2] if len(line.split()) > 2 else None
+            if current_metric and current_metric not in metrics:
+                metrics[current_metric] = []
+            continue
+        if line.startswith('# TYPE'):
+            continue
+        if line.startswith('#'):
+            continue
+
+        # Parse metric line: metric_name{labels} value
+        # or: metric_name value
+        match = re.match(r'^([a-zA-Z_:][a-zA-Z0-9_:]*)\{([^}]*)\}\s+(.+)$', line)
+        if match:
+            name, labels_str, value = match.groups()
+            # Parse labels
+            labels = {}
+            if labels_str:
+                for label in labels_str.split(','):
+                    if '=' in label:
+                        k, v = label.split('=', 1)
+                        labels[k] = v.strip('"')
+            if name not in metrics:
+                metrics[name] = []
+            metrics[name].append({'labels': labels, 'value': float(value)})
+        else:
+            # No labels
+            match = re.match(r'^([a-zA-Z_:][a-zA-Z0-9_:]*)\s+(.+)$', line)
+            if match:
+                name, value = match.groups()
+                if name not in metrics:
+                    metrics[name] = []
+                metrics[name].append({'labels': {}, 'value': float(value)})
+
+    return metrics
+
+
+def test_metrics_endpoint_returns_200():
+    """Test that /metrics endpoint returns 200 OK"""
+    print("=" * 50)
+    print("Testing /metrics endpoint returns 200")
+    print("=" * 50)
+
+    response = httpx.get(METRICS_URL, timeout=10.0)
+    assert response.status_code == 200, f"Expected 200, got {response.status_code}"
+    print(f"Status: {response.status_code} OK")
+    print()
+
+
+def test_metrics_content_type():
+    """Test that /metrics returns correct Content-Type"""
+    print("=" * 50)
+    print("Testing /metrics Content-Type header")
+    print("=" * 50)
+
+    response = httpx.get(METRICS_URL, timeout=10.0)
+    content_type = response.headers.get('content-type', '')
+    assert 'text/plain' in content_type, f"Expected text/plain, got {content_type}"
+    print(f"Content-Type: {content_type}")
+    print()
+
+
+def test_metrics_contains_expected_metrics():
+    """Test that /metrics contains expected NexusGate metrics"""
+    print("=" * 50)
+    print("Testing /metrics contains expected metrics")
+    print("=" * 50)
+
+    response = httpx.get(METRICS_URL, timeout=10.0)
+    content = response.text
+
+    # List of metrics that should always be present
+    expected_metrics = [
+        'nexusgate_info',
+        'nexusgate_active_api_keys',
+        'nexusgate_active_providers',
+        'nexusgate_active_models',
+    ]
+
+    # Optional metrics (may not be present if no data)
+    optional_metrics = [
+        'nexusgate_completions_total',
+        'nexusgate_embeddings_total',
+        'nexusgate_tokens_prompt_total',
+        'nexusgate_tokens_completion_total',
+        'nexusgate_tokens_embedding_total',
+        'nexusgate_completion_duration_seconds',
+        'nexusgate_completion_ttft_seconds',
+        'nexusgate_embedding_duration_seconds',
+    ]
+
+    # Check required metrics
+    for metric in expected_metrics:
+        assert metric in content, f"Missing expected metric: {metric}"
+        print(f"  Found: {metric}")
+
+    # Check optional metrics (just report, don't fail)
+    for metric in optional_metrics:
+        if metric in content:
+            print(f"  Found: {metric}")
+        else:
+            print(f"  Not found (no data): {metric}")
+
+    print()
+
+
+def test_metrics_prometheus_format():
+    """Test that /metrics output is valid Prometheus format"""
+    print("=" * 50)
+    print("Testing Prometheus format validity")
+    print("=" * 50)
+
+    response = httpx.get(METRICS_URL, timeout=10.0)
+    content = response.text
+
+    # Check for required format elements
+    assert '# HELP' in content, "Missing # HELP comments"
+    assert '# TYPE' in content, "Missing # TYPE comments"
+    print("  Has # HELP comments: Yes")
+    print("  Has # TYPE comments: Yes")
+
+    # Parse and validate
+    metrics = parse_prometheus_metrics(content)
+    print(f"  Parsed {len(metrics)} metric families")
+
+    # Check info metric has version label
+    assert 'nexusgate_info' in metrics, "Missing nexusgate_info metric"
+    info_metric = metrics['nexusgate_info']
+    assert len(info_metric) > 0, "nexusgate_info has no values"
+    assert 'version' in info_metric[0]['labels'], "nexusgate_info missing version label"
+    print(f"  nexusgate_info version: {info_metric[0]['labels']['version']}")
+
+    print()
+
+
+def test_metrics_gauge_values():
+    """Test that gauge metrics have valid values"""
+    print("=" * 50)
+    print("Testing gauge metric values")
+    print("=" * 50)
+
+    response = httpx.get(METRICS_URL, timeout=10.0)
+    metrics = parse_prometheus_metrics(response.text)
+
+    # Check active_api_keys is a valid number >= 0
+    assert 'nexusgate_active_api_keys' in metrics
+    api_keys_value = metrics['nexusgate_active_api_keys'][0]['value']
+    assert api_keys_value >= 0, f"Invalid api_keys value: {api_keys_value}"
+    print(f"  nexusgate_active_api_keys: {int(api_keys_value)}")
+
+    # Check active_providers is a valid number >= 0
+    assert 'nexusgate_active_providers' in metrics
+    providers_value = metrics['nexusgate_active_providers'][0]['value']
+    assert providers_value >= 0, f"Invalid providers value: {providers_value}"
+    print(f"  nexusgate_active_providers: {int(providers_value)}")
+
+    # Check active_models
+    assert 'nexusgate_active_models' in metrics
+    for entry in metrics['nexusgate_active_models']:
+        model_type = entry['labels'].get('type', 'unknown')
+        value = entry['value']
+        assert value >= 0, f"Invalid models value: {value}"
+        print(f"  nexusgate_active_models{{type=\"{model_type}\"}}: {int(value)}")
+
+    print()
+
+
+def test_metrics_histogram_format():
+    """Test histogram metrics have correct bucket format (if present)"""
+    print("=" * 50)
+    print("Testing histogram metric format")
+    print("=" * 50)
+
+    response = httpx.get(METRICS_URL, timeout=10.0)
+    content = response.text
+
+    histogram_names = [
+        'nexusgate_completion_duration_seconds',
+        'nexusgate_completion_ttft_seconds',
+        'nexusgate_embedding_duration_seconds',
+    ]
+
+    for hist_name in histogram_names:
+        if f'{hist_name}_bucket' in content:
+            print(f"  {hist_name}:")
+            # Check bucket, sum, count exist
+            assert f'{hist_name}_bucket' in content, f"Missing _bucket for {hist_name}"
+            assert f'{hist_name}_sum' in content, f"Missing _sum for {hist_name}"
+            assert f'{hist_name}_count' in content, f"Missing _count for {hist_name}"
+            # Check +Inf bucket exists
+            assert f'{hist_name}_bucket{{' in content and 'le="+Inf"' in content, \
+                f"Missing +Inf bucket for {hist_name}"
+            print(f"    Has _bucket: Yes")
+            print(f"    Has _sum: Yes")
+            print(f"    Has _count: Yes")
+            print(f"    Has +Inf bucket: Yes")
+        else:
+            print(f"  {hist_name}: No data (skipped)")
+
+    print()
+
+
+def test_show_sample_output():
+    """Display a sample of the metrics output"""
+    print("=" * 50)
+    print("Sample metrics output (first 50 lines)")
+    print("=" * 50)
+
+    response = httpx.get(METRICS_URL, timeout=10.0)
+    lines = response.text.strip().split('\n')
+    for line in lines[:50]:
+        print(f"  {line}")
+    if len(lines) > 50:
+        print(f"  ... ({len(lines) - 50} more lines)")
+    print()
+
+
+if __name__ == "__main__":
+    print("\n" + "=" * 60)
+    print("   NexusGate - Prometheus Metrics API Tests")
+    print(f"   Target: {METRICS_URL}")
+    print("=" * 60 + "\n")
+
+    tests = [
+        test_metrics_endpoint_returns_200,
+        test_metrics_content_type,
+        test_metrics_contains_expected_metrics,
+        test_metrics_prometheus_format,
+        test_metrics_gauge_values,
+        test_metrics_histogram_format,
+        test_show_sample_output,
+    ]
+
+    passed = 0
+    failed = 0
+
+    for test in tests:
+        try:
+            test()
+            passed += 1
+        except AssertionError as e:
+            print(f"FAILED: {test.__name__}")
+            print(f"  Error: {e}")
+            failed += 1
+        except Exception as e:
+            print(f"ERROR: {test.__name__}")
+            print(f"  {type(e).__name__}: {e}")
+            failed += 1
+
+    print("=" * 60)
+    print(f"Results: {passed} passed, {failed} failed")
+    print("=" * 60)
+
+    if failed > 0:
+        sys.exit(1)
+    print("\nAll Prometheus metrics tests passed!")
diff --git a/scripts/quick-start.sh b/scripts/quick-start.sh
index b0c267f..35070a3 100755
--- a/scripts/quick-start.sh
+++ b/scripts/quick-start.sh
@@ -10,11 +10,22 @@ RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 BLUE='\033[0;34m'
+CYAN='\033[0;36m'
 NC='\033[0m' # No Color
 
 echo -e "${BLUE}🚀 NexusGate 一键部署脚本${NC}"
 echo "===================================="
 
+# 全局变量
+DOWNLOAD_SOURCE=""
+COMPOSE_URL=""
+MONITORING_COMPOSE_URL=""
+PROMETHEUS_URL=""
+GRAFANA_DATASOURCE_URL=""
+GRAFANA_DASHBOARD_PROVIDER_URL=""
+GRAFANA_DASHBOARD_URL=""
+ENABLE_MONITORING="false"
+
 # 选择下载源
 select_download_source() {
     echo -e "${BLUE}🌐 请选择下载源${NC}"
@@ -22,19 +33,29 @@ select_download_source() {
     echo "1) GitHub 官方源 (推荐海外用户)"
     echo "2) 国内镜像源 (推荐国内用户，更快更稳定)"
     echo "===================================="
-    
+
     while true; do
         read -p "请选择 (1/2): " choice
         case $choice in
             1)
                 DOWNLOAD_SOURCE="github"
                 COMPOSE_URL="https://raw.githubusercontent.com/EM-GeekLab/NexusGate/main/docker-compose.yaml"
+                MONITORING_COMPOSE_URL="https://raw.githubusercontent.com/EM-GeekLab/NexusGate/main/docker-compose.monitoring.yaml"
+                PROMETHEUS_URL="https://raw.githubusercontent.com/EM-GeekLab/NexusGate/main/prometheus/prometheus.yml"
+                GRAFANA_DATASOURCE_URL="https://raw.githubusercontent.com/EM-GeekLab/NexusGate/main/grafana/provisioning/datasources/prometheus.yml"
+                GRAFANA_DASHBOARD_PROVIDER_URL="https://raw.githubusercontent.com/EM-GeekLab/NexusGate/main/grafana/provisioning/dashboards/dashboards.yml"
+                GRAFANA_DASHBOARD_URL="https://raw.githubusercontent.com/EM-GeekLab/NexusGate/main/grafana/provisioning/dashboards/json/nexusgate-dashboard.json"
                 echo -e "${GREEN}✅ 已选择 GitHub 官方源${NC}"
                 break
                 ;;
             2)
                 DOWNLOAD_SOURCE="china"
                 COMPOSE_URL="https://cnb.cool/EM-GeekLab/NexusGate/-/git/raw/main/docker-compose.cn.yaml"
+                MONITORING_COMPOSE_URL="https://cnb.cool/EM-GeekLab/NexusGate/-/git/raw/main/docker-compose.monitoring.yaml"
+                PROMETHEUS_URL="https://cnb.cool/EM-GeekLab/NexusGate/-/git/raw/main/prometheus/prometheus.yml"
+                GRAFANA_DATASOURCE_URL="https://cnb.cool/EM-GeekLab/NexusGate/-/git/raw/main/grafana/provisioning/datasources/prometheus.yml"
+                GRAFANA_DASHBOARD_PROVIDER_URL="https://cnb.cool/EM-GeekLab/NexusGate/-/git/raw/main/grafana/provisioning/dashboards/dashboards.yml"
+                GRAFANA_DASHBOARD_URL="https://cnb.cool/EM-GeekLab/NexusGate/-/git/raw/main/grafana/provisioning/dashboards/json/nexusgate-dashboard.json"
                 echo -e "${GREEN}✅ 已选择国内镜像源${NC}"
                 break
                 ;;
@@ -46,6 +67,43 @@ select_download_source() {
     echo ""
 }
 
+# 询问是否安装监控组件
+ask_monitoring() {
+    echo -e "${BLUE}📊 监控组件配置${NC}"
+    echo "===================================="
+    echo "NexusGate 支持 Prometheus + Grafana 监控栈，可以可视化以下指标："
+    echo "  - 请求数量和速率"
+    echo "  - 延迟分布 (P50/P95/P99)"
+    echo "  - Token 使用量"
+    echo "  - 错误率和成功率"
+    echo "  - 模型和 API 格式分布"
+    echo ""
+    echo -e "${YELLOW}是否安装 Prometheus + Grafana 监控组件？${NC}"
+    echo "1) 是 - 安装完整监控栈 (额外占用约 500MB 内存)"
+    echo "2) 否 - 仅安装核心服务 (推荐资源有限的环境)"
+    echo "===================================="
+
+    while true; do
+        read -p "请选择 (1/2) [默认: 2]: " monitor_choice
+        case $monitor_choice in
+            1)
+                ENABLE_MONITORING="true"
+                echo -e "${GREEN}✅ 将安装 Prometheus + Grafana 监控组件${NC}"
+                break
+                ;;
+            2|"")
+                ENABLE_MONITORING="false"
+                echo -e "${GREEN}✅ 仅安装核心服务${NC}"
+                break
+                ;;
+            *)
+                echo -e "${RED}❌ 请输入有效选项 (1 或 2)${NC}"
+                ;;
+        esac
+    done
+    echo ""
+}
+
 # 检查 Docker 是否安装和权限
 check_docker() {
     if ! command -v docker &> /dev/null; then
@@ -53,13 +111,13 @@ check_docker() {
         echo -e "${YELLOW}请参考 README.md 中的 Docker 安装指南${NC}"
         exit 1
     fi
-    
+
     if ! command -v docker-compose &> /dev/null && ! docker compose version &> /dev/null; then
         echo -e "${RED}❌ Docker Compose 未安装，请先安装 Docker Compose！${NC}"
         echo -e "${YELLOW}请参考 README.md 中的 Docker 安装指南${NC}"
         exit 1
     fi
-    
+
     # 检查 Docker 权限
     echo -e "${BLUE}🔍 检查 Docker 权限...${NC}"
     if ! docker ps &> /dev/null; then
@@ -70,7 +128,7 @@ check_docker() {
         echo ""
         exit 1
     fi
-    
+
     echo -e "${GREEN}✅ Docker 环境和权限检查通过${NC}"
 }
 
@@ -82,18 +140,69 @@ generate_password() {
 # 下载配置文件
 download_configs() {
     echo -e "${BLUE}📥 下载配置文件...${NC}"
-    
+
     local compose_file="docker-compose.yaml"
     if [ "$DOWNLOAD_SOURCE" = "china" ]; then
         compose_file="docker-compose.cn.yaml"
     fi
-    
+
+    # 下载主配置文件
     if [ ! -f "$compose_file" ]; then
         curl -fsSL "$COMPOSE_URL" -o "$compose_file"
         echo -e "${GREEN}✅ $compose_file 下载完成${NC}"
     else
         echo -e "${YELLOW}⚠️  $compose_file 已存在，跳过下载${NC}"
     fi
+
+    # 下载监控组件配置文件
+    if [ "$ENABLE_MONITORING" = "true" ]; then
+        echo -e "${BLUE}📥 下载监控组件配置文件...${NC}"
+
+        # 下载 docker-compose.monitoring.yaml
+        if [ ! -f "docker-compose.monitoring.yaml" ]; then
+            curl -fsSL "$MONITORING_COMPOSE_URL" -o "docker-compose.monitoring.yaml"
+            echo -e "${GREEN}✅ docker-compose.monitoring.yaml 下载完成${NC}"
+        else
+            echo -e "${YELLOW}⚠️  docker-compose.monitoring.yaml 已存在，跳过下载${NC}"
+        fi
+
+        # 创建 prometheus 目录并下载配置
+        mkdir -p prometheus
+        if [ ! -f "prometheus/prometheus.yml" ]; then
+            curl -fsSL "$PROMETHEUS_URL" -o "prometheus/prometheus.yml"
+            echo -e "${GREEN}✅ prometheus/prometheus.yml 下载完成${NC}"
+        else
+            echo -e "${YELLOW}⚠️  prometheus/prometheus.yml 已存在，跳过下载${NC}"
+        fi
+
+        # 创建 grafana provisioning 目录结构
+        mkdir -p grafana/provisioning/datasources
+        mkdir -p grafana/provisioning/dashboards/json
+
+        # 下载 Grafana 数据源配置
+        if [ ! -f "grafana/provisioning/datasources/prometheus.yml" ]; then
+            curl -fsSL "$GRAFANA_DATASOURCE_URL" -o "grafana/provisioning/datasources/prometheus.yml"
+            echo -e "${GREEN}✅ grafana/provisioning/datasources/prometheus.yml 下载完成${NC}"
+        else
+            echo -e "${YELLOW}⚠️  grafana/provisioning/datasources/prometheus.yml 已存在，跳过下载${NC}"
+        fi
+
+        # 下载 Grafana Dashboard 提供者配置
+        if [ ! -f "grafana/provisioning/dashboards/dashboards.yml" ]; then
+            curl -fsSL "$GRAFANA_DASHBOARD_PROVIDER_URL" -o "grafana/provisioning/dashboards/dashboards.yml"
+            echo -e "${GREEN}✅ grafana/provisioning/dashboards/dashboards.yml 下载完成${NC}"
+        else
+            echo -e "${YELLOW}⚠️  grafana/provisioning/dashboards/dashboards.yml 已存在，跳过下载${NC}"
+        fi
+
+        # 下载 NexusGate Dashboard
+        if [ ! -f "grafana/provisioning/dashboards/json/nexusgate-dashboard.json" ]; then
+            curl -fsSL "$GRAFANA_DASHBOARD_URL" -o "grafana/provisioning/dashboards/json/nexusgate-dashboard.json"
+            echo -e "${GREEN}✅ NexusGate Grafana Dashboard 下载完成${NC}"
+        else
+            echo -e "${YELLOW}⚠️  NexusGate Grafana Dashboard 已存在，跳过下载${NC}"
+        fi
+    fi
 }
 
 # 获取用户输入的密码
@@ -102,13 +211,13 @@ get_user_passwords() {
     echo ""
     echo -e "${YELLOW}💡 提示：为了安全起见，密码输入时不会显示字符${NC}"
     echo ""
-    
+
     # 数据库密码输入
     echo -e "${YELLOW}请设置数据库密码 (至少8位，直接回车将自动生成随机密码):${NC}"
     while true; do
         read -s -p "数据库密码: " db_input
         echo ""
-        
+
         if [ -z "$db_input" ]; then
             DB_PASSWORD=$(generate_password)
             echo -e "${GREEN}✅ 已自动生成随机数据库密码（16位强密码）${NC}"
@@ -122,15 +231,15 @@ get_user_passwords() {
             break
         fi
     done
-    
+
     echo ""
-    
+
     # 管理员密钥输入
     echo -e "${YELLOW}请设置管理员密钥 (至少8位，直接回车将自动生成随机密钥):${NC}"
     while true; do
         read -s -p "管理员密钥: " admin_input
         echo ""
-        
+
         if [ -z "$admin_input" ]; then
             ADMIN_SECRET=$(generate_password)
             echo -e "${GREEN}✅ 已自动生成随机管理员密钥（16位强密钥）${NC}"
@@ -144,14 +253,14 @@ get_user_passwords() {
             break
         fi
     done
-    
+
     echo ""
-    
+
     # Web 端口输入
     echo -e "${YELLOW}请设置 Web 服务端口 (1024-65535，默认 8080):${NC}"
     while true; do
         read -p "Web 端口: " port_input
-        
+
         if [ -z "$port_input" ]; then
             WEB_PORT="8080"
             echo -e "${GREEN}✅ 使用默认端口 8080${NC}"
@@ -164,27 +273,103 @@ get_user_passwords() {
             echo -e "${RED}❌ 请输入有效的端口号 (1024-65535)${NC}"
         fi
     done
-    
+
     echo ""
-    
+
+    # 如果启用了监控，配置 Grafana 密码
+    if [ "$ENABLE_MONITORING" = "true" ]; then
+        echo -e "${CYAN}📊 监控组件配置${NC}"
+        echo ""
+
+        # Prometheus 端口
+        echo -e "${YELLOW}请设置 Prometheus 端口 (默认 9090):${NC}"
+        while true; do
+            read -p "Prometheus 端口: " prom_port_input
+
+            if [ -z "$prom_port_input" ]; then
+                PROMETHEUS_PORT="9090"
+                echo -e "${GREEN}✅ 使用默认端口 9090${NC}"
+                break
+            elif [[ "$prom_port_input" =~ ^[0-9]+$ ]] && [ "$prom_port_input" -ge 1024 ] && [ "$prom_port_input" -le 65535 ]; then
+                PROMETHEUS_PORT="$prom_port_input"
+                echo -e "${GREEN}✅ 已设置 Prometheus 端口为 $prom_port_input${NC}"
+                break
+            else
+                echo -e "${RED}❌ 请输入有效的端口号 (1024-65535)${NC}"
+            fi
+        done
+
+        echo ""
+
+        # Grafana 端口
+        echo -e "${YELLOW}请设置 Grafana 端口 (默认 3001):${NC}"
+        while true; do
+            read -p "Grafana 端口: " grafana_port_input
+
+            if [ -z "$grafana_port_input" ]; then
+                GRAFANA_PORT="3001"
+                echo -e "${GREEN}✅ 使用默认端口 3001${NC}"
+                break
+            elif [[ "$grafana_port_input" =~ ^[0-9]+$ ]] && [ "$grafana_port_input" -ge 1024 ] && [ "$grafana_port_input" -le 65535 ]; then
+                GRAFANA_PORT="$grafana_port_input"
+                echo -e "${GREEN}✅ 已设置 Grafana 端口为 $grafana_port_input${NC}"
+                break
+            else
+                echo -e "${RED}❌ 请输入有效的端口号 (1024-65535)${NC}"
+            fi
+        done
+
+        echo ""
+
+        # Grafana 密码
+        echo -e "${YELLOW}请设置 Grafana 管理员密码 (至少8位，直接回车将使用默认密码 'admin'):${NC}"
+        while true; do
+            read -s -p "Grafana 密码: " grafana_pass_input
+            echo ""
+
+            if [ -z "$grafana_pass_input" ]; then
+                GRAFANA_PASSWORD="admin"
+                echo -e "${YELLOW}⚠️  使用默认密码 'admin'，建议登录后修改${NC}"
+                break
+            elif [ ${#grafana_pass_input} -lt 8 ]; then
+                echo -e "${RED}❌ 密码长度至少8位，请重新输入${NC}"
+                continue
+            else
+                GRAFANA_PASSWORD="$grafana_pass_input"
+                echo -e "${GREEN}✅ 已设置自定义 Grafana 密码${NC}"
+                break
+            fi
+        done
+
+        echo ""
+    fi
+
     # 配置确认
     echo -e "${BLUE}📋 配置摘要${NC}"
     echo "=================================="
     echo -e "数据库密码: ${GREEN}[已设置]${NC}"
-    echo -e "管理员密钥: ${GREEN}[已设置]${NC}"  
+    echo -e "管理员密钥: ${GREEN}[已设置]${NC}"
     echo -e "Web 端口: ${GREEN}${WEB_PORT}${NC}"
+    if [ "$ENABLE_MONITORING" = "true" ]; then
+        echo -e "监控组件: ${CYAN}已启用${NC}"
+        echo -e "  - Prometheus 端口: ${GREEN}${PROMETHEUS_PORT}${NC}"
+        echo -e "  - Grafana 端口: ${GREEN}${GRAFANA_PORT}${NC}"
+        echo -e "  - Grafana 密码: ${GREEN}[已设置]${NC}"
+    else
+        echo -e "监控组件: ${YELLOW}未启用${NC}"
+    fi
     echo "=================================="
     echo ""
     echo -e "${YELLOW}确认以上配置并继续部署？(y/N)${NC}"
     read -p "请输入选择: " confirm
-    
+
     if [[ "$confirm" =~ ^[Yy]$ ]]; then
         echo -e "${GREEN}✅ 配置确认，开始创建配置文件${NC}"
     else
         echo -e "${RED}❌ 已取消部署${NC}"
         exit 0
     fi
-    
+
     echo ""
 }
 
@@ -192,10 +377,10 @@ get_user_passwords() {
 create_env_file() {
     if [ ! -f ".env" ]; then
         echo -e "${BLUE}📝 创建环境变量配置文件...${NC}"
-        
+
         # 获取用户输入
         get_user_passwords
-        
+
         cat > .env << EOF
 # NexusGate 环境配置文件
 # 生成时间: $(date)
@@ -217,7 +402,36 @@ ADMIN_SUPER_SECRET=${ADMIN_SECRET}
 # Web 服务端口（默认 8080）
 WEB_PORT=${WEB_PORT}
 EOF
-        
+
+        # 如果启用了监控，添加监控相关配置
+        if [ "$ENABLE_MONITORING" = "true" ]; then
+            cat >> .env << EOF
+
+# ======================
+# 监控组件配置
+# ======================
+# 是否启用监控组件
+ENABLE_MONITORING=true
+
+# Prometheus 端口
+PROMETHEUS_PORT=${PROMETHEUS_PORT}
+
+# Grafana 配置
+GRAFANA_PORT=${GRAFANA_PORT}
+GRAFANA_USER=admin
+GRAFANA_PASSWORD=${GRAFANA_PASSWORD}
+EOF
+        else
+            cat >> .env << EOF
+
+# ======================
+# 监控组件配置
+# ======================
+# 是否启用监控组件
+ENABLE_MONITORING=false
+EOF
+        fi
+
         echo -e "${GREEN}✅ .env 文件创建完成${NC}"
         echo ""
         echo -e "${YELLOW}⚠️  重要：请保存好以下配置信息${NC}"
@@ -225,32 +439,51 @@ EOF
         echo -e "数据库密码: ${GREEN}${DB_PASSWORD}${NC}"
         echo -e "管理员密钥: ${GREEN}${ADMIN_SECRET}${NC}"
         echo -e "访问地址: ${GREEN}http://localhost:${WEB_PORT}${NC}"
+        if [ "$ENABLE_MONITORING" = "true" ]; then
+            echo -e "Prometheus: ${CYAN}http://localhost:${PROMETHEUS_PORT}${NC}"
+            echo -e "Grafana: ${CYAN}http://localhost:${GRAFANA_PORT}${NC}"
+            echo -e "Grafana 用户名: ${CYAN}admin${NC}"
+            echo -e "Grafana 密码: ${CYAN}${GRAFANA_PASSWORD}${NC}"
+        fi
         echo "=================================="
         echo ""
         echo -e "${BLUE}📝 完整配置已保存到 .env 文件中${NC}"
-        
+
     else
         echo -e "${YELLOW}⚠️  .env 文件已存在，跳过创建${NC}"
         echo -e "${BLUE}💡 如需重新生成，请删除 .env 文件后重新运行脚本${NC}"
+
+        # 从现有 .env 读取监控配置
+        if [ -f ".env" ]; then
+            ENABLE_MONITORING=$(grep "ENABLE_MONITORING=" .env 2>/dev/null | cut -d '=' -f2 | tr -d ' ' || echo "false")
+        fi
     fi
 }
 
 # 启动服务
 start_services() {
     echo -e "${BLUE}🚀 启动 NexusGate 服务...${NC}"
-    
+
     local compose_file="docker-compose.yaml"
     if [ "$DOWNLOAD_SOURCE" = "china" ]; then
         compose_file="docker-compose.cn.yaml"
     fi
-    
+
     # 检查是否使用新版 docker compose 命令
-    if docker compose version &> /dev/null; then
-        docker compose -f "$compose_file" up -d
+    local compose_cmd="docker compose"
+    if ! docker compose version &> /dev/null; then
+        compose_cmd="docker-compose"
+    fi
+
+    # 启动服务
+    if [ "$ENABLE_MONITORING" = "true" ]; then
+        echo -e "${CYAN}📊 启动核心服务和监控组件...${NC}"
+        $compose_cmd -f "$compose_file" -f "docker-compose.monitoring.yaml" up -d
     else
-        docker-compose -f "$compose_file" up -d
+        echo -e "${GREEN}🚀 启动核心服务...${NC}"
+        $compose_cmd -f "$compose_file" up -d
     fi
-    
+
     echo -e "${GREEN}✅ 服务启动完成！${NC}"
 }
 
@@ -260,30 +493,55 @@ show_access_info() {
     echo "===================================="
     echo -e "${GREEN}🎉 NexusGate 部署完成！${NC}"
     echo "===================================="
-    
+
     # 从 .env 文件读取配置
     if [ -f ".env" ]; then
         WEB_PORT=$(grep "WEB_PORT=" .env | cut -d '=' -f2 | tr -d ' ')
         ADMIN_SECRET=$(grep "ADMIN_SUPER_SECRET=" .env | cut -d '=' -f2 | tr -d ' ')
-        
-        echo -e "🌐 访问地址: ${GREEN}http://localhost:${WEB_PORT:-8080}${NC}"
+        ENABLE_MONITORING=$(grep "ENABLE_MONITORING=" .env | cut -d '=' -f2 | tr -d ' ')
+
+        echo -e "🌐 NexusGate 访问地址: ${GREEN}http://localhost:${WEB_PORT:-8080}${NC}"
         echo -e "🔑 管理员密钥: ${GREEN}${ADMIN_SECRET}${NC}"
+
+        if [ "$ENABLE_MONITORING" = "true" ]; then
+            PROMETHEUS_PORT=$(grep "PROMETHEUS_PORT=" .env | cut -d '=' -f2 | tr -d ' ')
+            GRAFANA_PORT=$(grep "GRAFANA_PORT=" .env | cut -d '=' -f2 | tr -d ' ')
+            GRAFANA_PASSWORD=$(grep "GRAFANA_PASSWORD=" .env | cut -d '=' -f2 | tr -d ' ')
+
+            echo ""
+            echo -e "${CYAN}📊 监控组件访问信息:${NC}"
+            echo -e "  Prometheus: ${CYAN}http://localhost:${PROMETHEUS_PORT:-9090}${NC}"
+            echo -e "  Grafana: ${CYAN}http://localhost:${GRAFANA_PORT:-3001}${NC}"
+            echo -e "  Grafana 用户名: ${CYAN}admin${NC}"
+            echo -e "  Grafana 密码: ${CYAN}${GRAFANA_PASSWORD:-admin}${NC}"
+        fi
     else
         echo -e "🌐 访问地址: ${GREEN}http://localhost:8080${NC}"
         echo -e "🔑 管理员密钥: ${YELLOW}请查看 .env 文件${NC}"
     fi
-    
+
     echo ""
     echo -e "${BLUE}📖 使用说明:${NC}"
     echo "1. 在浏览器中打开上述地址"
     echo "2. 使用管理员密钥登录系统"
     echo "3. 开始配置您的第一个模型和应用，其中 BaseURL 需要设置为 http://localhost:${WEB_PORT:-8080}/v1/"
     echo "后续您也可以通过该服务器的 IP 地址或域名访问 NexusGate，BaseURL 需要设置为 http://<服务器IP或域名>:${WEB_PORT:-8080}/v1/"
+
+    if [ "$ENABLE_MONITORING" = "true" ]; then
+        echo ""
+        echo -e "${CYAN}📊 监控使用说明:${NC}"
+        echo "1. 访问 Grafana 地址并使用上述凭证登录"
+        echo "2. 在 Dashboards 中找到 'NexusGate LLM Gateway' 仪表板"
+        echo "3. 查看请求量、延迟、Token 使用量等指标"
+        echo ""
+        echo -e "${YELLOW}💡 提示: NexusGate 的 /metrics 端点可被任何 Prometheus 实例抓取${NC}"
+    fi
 }
 
 # 主函数
 main() {
     select_download_source
+    ask_monitoring
     check_docker
     download_configs
     create_env_file