From 0855538cc2c6abd75caad59b56e405a49f5a6910 Mon Sep 17 00:00:00 2001 From: Justas Balcas Date: Mon, 4 May 2026 17:45:38 -0500 Subject: [PATCH] Otel metrics and documentation --- README.md | 32 ++++++++++++++++++++++++++++++-- app/config.py | 6 ++++++ app/main.py | 36 ++++++++++++++++++++++-------------- 3 files changed, 58 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index f5a1de24..ce0854a2 100644 --- a/README.md +++ b/README.md @@ -51,8 +51,36 @@ If using docker (see next section), your dockerfile could extend this reference - `API_URL_ROOT`: the base url when constructing links returned by the api (eg.: https://iri.myfacility.com) - `API_PREFIX`: the path prefix where the api is hosted. Defaults to `/`. (eg.: `/api`) - `API_URL`: the path to the api itself. Defaults to `api/v1`. -- `OPENTELEMETRY_ENABLED`: Enables OpenTelemetry. If enabled, the application will use OpenTelemetry SDKs and emit traces, metrics, and logs. Default to false -- `OTLP_ENDPOINT`: OpenTelemetry Protocol collector endpoint to export telemetry data. If empty or not set, telemetry data is logged locally to log file. Default: "" +### OpenTelemetry + +The API supports OpenTelemetry for distributed tracing and metrics. Traces and metrics can be independently enabled or disabled. + +| Variable | Default | Description | +|---|---|---| +| `OPENTELEMETRY_ENABLED` | `false` | Master switch. Must be `true` for any telemetry to be emitted. | +| `OTEL_TRACES_ENABLED` | `true` | Enable trace export. Only takes effect when `OPENTELEMETRY_ENABLED=true`. | +| `OTEL_METRICS_ENABLED` | `true` | Enable metric export. Only takes effect when `OPENTELEMETRY_ENABLED=true`. | +| `OTLP_ENDPOINT` | `""` | gRPC endpoint for the OTLP collector (e.g. `http://otel-collector:4317`). When empty, telemetry is printed to the console. | +| `OPENTELEMETRY_DEBUG` | `false` | Sets trace sample rate to 100% (overrides `OTEL_SAMPLE_RATE`). | +| `OTEL_SAMPLE_RATE` | `0.2` | Trace sampling rate (0.0 to 1.0). Ignored when `OPENTELEMETRY_DEBUG=true`. | +| `OTEL_METRIC_EXPORT_INTERVAL` | `60000` | Metric export interval in milliseconds. | + +When metrics are enabled, the FastAPI instrumentor automatically emits standard HTTP server metrics: `http.server.active_requests`, `http.server.duration`, and `http.server.response.size`. + +Examples: +```bash +# Traces and metrics to an OTLP collector +OPENTELEMETRY_ENABLED=true OTLP_ENDPOINT=http://otel-collector:4317 + +# Traces only, no metrics +OPENTELEMETRY_ENABLED=true OTEL_METRICS_ENABLED=false + +# Metrics only, no traces +OPENTELEMETRY_ENABLED=true OTEL_TRACES_ENABLED=false + +# Debug mode: 100% sampling, console output +OPENTELEMETRY_ENABLED=true OPENTELEMETRY_DEBUG=true +``` Links to data, created by this api, will concatenate these values producing links, eg: `https://iri.myfacility.com/my_api_prefix/my_api_url/projects/123` diff --git a/app/config.py b/app/config.py index 35c17c93..958936d8 100644 --- a/app/config.py +++ b/app/config.py @@ -44,6 +44,9 @@ OPENTELEMETRY_DEBUG = os.environ.get("OPENTELEMETRY_DEBUG", "false").lower() == "true" OTLP_ENDPOINT = os.environ.get("OTLP_ENDPOINT", "") OTEL_SAMPLE_RATE = float(os.environ.get("OTEL_SAMPLE_RATE", "0.2")) +OTEL_TRACES_ENABLED = os.environ.get("OTEL_TRACES_ENABLED", "true").lower() == "true" +OTEL_METRICS_ENABLED = os.environ.get("OTEL_METRICS_ENABLED", "true").lower() == "true" +OTEL_METRIC_EXPORT_INTERVAL = int(os.environ.get("OTEL_METRIC_EXPORT_INTERVAL", "60000")) # Print all startup config for debugging logger.info("IRI Facility API starting with config:") @@ -58,4 +61,7 @@ logger.info(f"OPENTELEMETRY_DEBUG={OPENTELEMETRY_DEBUG}") logger.info(f"OTLP_ENDPOINT={OTLP_ENDPOINT}") logger.info(f"OTEL_SAMPLE_RATE={OTEL_SAMPLE_RATE}") +logger.info(f"OTEL_TRACES_ENABLED={OTEL_TRACES_ENABLED}") +logger.info(f"OTEL_METRICS_ENABLED={OTEL_METRICS_ENABLED}") +logger.info(f"OTEL_METRIC_EXPORT_INTERVAL={OTEL_METRIC_EXPORT_INTERVAL}") logger.info("="*40) diff --git a/app/main.py b/app/main.py index 98d55eaf..1f6faccc 100644 --- a/app/main.py +++ b/app/main.py @@ -3,17 +3,21 @@ import logging from fastapi import FastAPI, Request -from opentelemetry import trace from starlette.middleware.base import BaseHTTPMiddleware +from opentelemetry import trace, metrics from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import ConsoleSpanExporter, BatchSpanProcessor, SimpleSpanProcessor from opentelemetry.sdk.trace.sampling import TraceIdRatioBased, ParentBased +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import ConsoleMetricExporter, PeriodicExportingMetricReader from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor from . import config from .apilogger import configure_logging +from .request_context import set_api_url_base, _api_url_base from app.routers.error_handlers import install_error_handlers from app.routers.facility import facility @@ -26,23 +30,27 @@ configure_logging(config.LOG_LEVEL) # ------------------------------------------------------------------ -# OpenTelemetry Tracing Configuration +# OpenTelemetry Configuration # ------------------------------------------------------------------ if config.OPENTELEMETRY_ENABLED: resource = Resource.create({"service.name": "iri-facility-api", "service.version": config.API_VERSION, "service.endpoint": config.API_URL_ROOT}) - samplerate = "1.0" if config.OPENTELEMETRY_DEBUG else config.OTEL_SAMPLE_RATE - provider = TracerProvider(resource=resource, sampler=ParentBased(TraceIdRatioBased(samplerate))) - trace.set_tracer_provider(provider) - - if config.OTLP_ENDPOINT: - exporter = OTLPSpanExporter(endpoint=config.OTLP_ENDPOINT, insecure=True) - span_processor = BatchSpanProcessor(exporter) - else: - exporter = ConsoleSpanExporter() - span_processor = SimpleSpanProcessor(exporter) - provider.add_span_processor(span_processor) - tracer = trace.get_tracer(__name__) + if config.OTEL_TRACES_ENABLED: + samplerate = "1.0" if config.OPENTELEMETRY_DEBUG else config.OTEL_SAMPLE_RATE + tracer_provider = TracerProvider(resource=resource, sampler=ParentBased(TraceIdRatioBased(samplerate))) + if config.OTLP_ENDPOINT: + span_processor = BatchSpanProcessor(OTLPSpanExporter(endpoint=config.OTLP_ENDPOINT, insecure=True)) + else: + span_processor = SimpleSpanProcessor(ConsoleSpanExporter()) + tracer_provider.add_span_processor(span_processor) + trace.set_tracer_provider(tracer_provider) + + if config.OTEL_METRICS_ENABLED: + if config.OTLP_ENDPOINT: + metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter(endpoint=config.OTLP_ENDPOINT, insecure=True), export_interval_millis=config.OTEL_METRIC_EXPORT_INTERVAL) + else: + metric_reader = PeriodicExportingMetricReader(ConsoleMetricExporter(), export_interval_millis=config.OTEL_METRIC_EXPORT_INTERVAL) + metrics.set_meter_provider(MeterProvider(resource=resource, metric_readers=[metric_reader])) # ------------------------------------------------------------------ APP = FastAPI(servers=[{"url": config.API_URL_ROOT}], **config.API_CONFIG)