diff --git a/src/common/core/logging.py b/src/common/core/logging.py index 121dede..40a3928 100644 --- a/src/common/core/logging.py +++ b/src/common/core/logging.py @@ -15,6 +15,7 @@ from typing_extensions import TypedDict from common.core.constants import LOGGING_DEFAULT_ROOT_LOG_LEVEL +from common.core.sentry import sentry_processor logger = logging.getLogger(__name__) @@ -29,27 +30,6 @@ class JsonRecord(TypedDict, extra_items=Any, total=False): # type: ignore[call- exc_info: str -class JsonFormatter(logging.Formatter): - """Custom formatter for json logs.""" - - def get_json_record(self, record: logging.LogRecord) -> JsonRecord: - formatted_message = record.getMessage() - json_record: JsonRecord = { - "levelname": record.levelname, - "message": formatted_message, - "timestamp": self.formatTime(record, self.datefmt), - "logger_name": record.name, - "pid": record.process, - "thread_name": record.threadName, - } - if record.exc_info: - json_record["exc_info"] = self.formatException(record.exc_info) - return json_record - - def format(self, record: logging.LogRecord) -> str: - return json.dumps(self.get_json_record(record)) - - def setup_logging( log_level: str = "INFO", log_format: str = "generic", @@ -120,7 +100,10 @@ def map_event_to_json_record( method_name: str, event_dict: EventDict, ) -> EventDict: - """Map structlog fields to match :class:`JsonFormatter` output schema.""" + """Map structlog fields to match :class:`JsonRecord` output schema.""" + # Remove foreign record args injected by pass_foreign_args so they + # don't leak into the rendered JSON output. + event_dict.pop("positional_args", None) record: JsonRecord = { "message": event_dict.pop("event", ""), "levelname": event_dict.pop("level", "").upper(), @@ -136,91 +119,40 @@ def map_event_to_json_record( return event_dict -class _SentryFriendlyProcessorFormatter(structlog.stdlib.ProcessorFormatter): - """Preserves ``record.msg`` and ``record.args`` across formatting. - - Sentry's ``LoggingIntegration`` reads these fields *after* handlers run; - structlog's ``ProcessorFormatter`` replaces them with rendered output, breaking event - grouping. We snapshot before and restore after so Sentry sees the originals - and avoids failed event deduplication. - """ - - def __init__( - self, - log_format: str = "generic", - extra_foreign_processors: list[Processor] | None = None, - **kwargs: Any, - ) -> None: - if log_format == "json": - renderer_processors: list[Processor] = [ - map_event_to_json_record, - structlog.processors.JSONRenderer(), - ] - else: - colors = sys.stdout.isatty() and structlog.dev._has_colors - renderer_processors = [ - structlog.dev.ConsoleRenderer(colors=colors), - ] - - foreign_pre_chain: list[Processor] = [ - structlog.contextvars.merge_contextvars, - structlog.stdlib.add_logger_name, - structlog.stdlib.add_log_level, - structlog.processors.TimeStamper(fmt="iso"), - structlog.stdlib.ExtraAdder(), - self.drop_internal_keys, - *(extra_foreign_processors or []), - ] - - super().__init__( - processors=[ - structlog.stdlib.ProcessorFormatter.remove_processors_meta, - *renderer_processors, - ], - foreign_pre_chain=foreign_pre_chain, - **kwargs, - ) - - def format(self, record: logging.LogRecord) -> str: - # Snapshot the original fields before ProcessorFormatter - # replaces them with rendered output. - original_msg = record.msg - original_args = record.args - - # Stash original args on the record so foreign_pre_chain - # processors (e.g. the Gunicorn access log extractor) can - # access them — ProcessorFormatter clears record.args to () - # before running the chain. - record._original_args = original_args - - formatted = super().format(record) - - # Restore so Sentry (and any other post-handler hook) sees - # the original message template and substitution args. - record.msg = original_msg - record.args = original_args - del record._original_args # type: ignore[attr-defined] - - return formatted - - @staticmethod - def drop_internal_keys( - _: WrappedLogger, __: str, event_dict: EventDict - ) -> EventDict: - """Remove internal attributes that leak via ``ExtraAdder``.""" - event_dict.pop("_original_args", None) - return event_dict - - def setup_structlog( log_format: str, extra_foreign_processors: list[Processor] | None = None, ) -> None: """Configure structlog to route through stdlib logging.""" - from common.core.sentry import sentry_processor - formatter = _SentryFriendlyProcessorFormatter( - log_format=log_format, extra_foreign_processors=extra_foreign_processors + if log_format == "json": + renderer_processors: list[Processor] = [ + map_event_to_json_record, + structlog.processors.JSONRenderer(), + ] + else: + colors = sys.stdout.isatty() and structlog.dev._has_colors + renderer_processors = [ + structlog.dev.ConsoleRenderer(colors=colors), + ] + + foreign_pre_chain: list[Processor] = [ + structlog.contextvars.merge_contextvars, + structlog.stdlib.add_logger_name, + structlog.stdlib.add_log_level, + structlog.processors.TimeStamper(fmt="iso"), + structlog.processors.format_exc_info, + structlog.stdlib.ExtraAdder(), + *(extra_foreign_processors or []), + ] + + formatter = structlog.stdlib.ProcessorFormatter( + processors=[ + structlog.stdlib.ProcessorFormatter.remove_processors_meta, + *renderer_processors, + ], + foreign_pre_chain=foreign_pre_chain, + pass_foreign_args=True, ) # Replace the formatter on existing root handlers with ProcessorFormatter. diff --git a/src/common/gunicorn/processors.py b/src/common/gunicorn/processors.py index 28029a1..78190ed 100644 --- a/src/common/gunicorn/processors.py +++ b/src/common/gunicorn/processors.py @@ -30,10 +30,10 @@ def processor( record = event_dict.get("_record") if record is None or record.name != "gunicorn.access": return event_dict - # ProcessorFormatter clears record.args before running - # foreign_pre_chain; the originals are stashed on the record - # by _SentryFriendlyProcessorFormatter.format(). - args = getattr(record, "_original_args", record.args) + # Gunicorn passes request data as a dict in record.args. + # By the time foreign_pre_chain runs, ProcessorFormatter has + # cleared args on its copy; positional_args has the originals. + args = event_dict.get("positional_args", record.args) if not isinstance(args, dict): return event_dict diff --git a/tests/unit/common/core/test_logging.py b/tests/unit/common/core/test_logging.py index e50b388..c49da9f 100644 --- a/tests/unit/common/core/test_logging.py +++ b/tests/unit/common/core/test_logging.py @@ -11,7 +11,7 @@ from sentry_sdk.envelope import Envelope from sentry_sdk.transport import Transport -from common.core.logging import JsonFormatter, setup_logging +from common.core.logging import setup_logging if TYPE_CHECKING: from sentry_sdk._types import Event @@ -49,61 +49,6 @@ def sentry_transport_mock() -> Generator[MockSentryTransport, None, None]: sentry_sdk.init(transport=None) -@pytest.mark.freeze_time("2023-12-08T06:05:47+00:00") -def test_json_formatter__format_log__outputs_expected( - caplog: pytest.LogCaptureFixture, - request: pytest.FixtureRequest, -) -> None: - # Given - json_formatter = JsonFormatter() - - caplog.handler.setFormatter(json_formatter) - logger = logging.getLogger("test_json_formatter__outputs_expected") - logger.setLevel(logging.INFO) - - expected_pid = os.getpid() - expected_module_path = os.path.abspath(request.path) - - def _log_traceback() -> None: - try: - raise Exception() - except Exception as exc: - logger.error("this is an error", exc_info=exc) - - expected_lineno = _log_traceback.__code__.co_firstlineno + 2 - expected_tb_string = ( - "Traceback (most recent call last):\n" - f' File "{expected_module_path}",' - f" line {expected_lineno}, in _log_traceback\n" - " raise Exception()\nException" - ) - - # When - logger.info("hello %s, %d", "arg1", 22.22) - _log_traceback() - - # Then - assert [json.loads(message) for message in caplog.text.split("\n") if message] == [ - { - "levelname": "INFO", - "message": "hello arg1, 22", - "timestamp": "2023-12-08 06:05:47,000", - "logger_name": "test_json_formatter__outputs_expected", - "pid": expected_pid, - "thread_name": "MainThread", - }, - { - "levelname": "ERROR", - "message": "this is an error", - "timestamp": "2023-12-08 06:05:47,000", - "logger_name": "test_json_formatter__outputs_expected", - "pid": expected_pid, - "thread_name": "MainThread", - "exc_info": expected_tb_string, - }, - ] - - def test_setup_logging__generic_format__configures_stdlib( capsys: pytest.CaptureFixture[str], test_app_loggers: list[str], @@ -352,26 +297,7 @@ def test_setup_logging__stdlib_error__sentry_captures_clean_message( test_app_loggers: list[str], sentry_transport_mock: "MockSentryTransport", ) -> None: - """Verify Sentry receives the original message, not the rendered output. - - Sentry's ``LoggingIntegration`` monkey-patches ``Logger.callHandlers`` - and reads ``record.msg`` / ``record.args`` in a ``finally`` block - *after* all handlers (and formatters) have run:: - - try: - old_callhandlers(self, record) # formatters run here - finally: - integration._handle_record(record) # reads record.msg - - structlog's ``ProcessorFormatter.format()`` replaces ``record.msg`` - with rendered output (JSON/console) and clears ``record.args`` to - ``()``. Without ``_SentryFriendlyProcessorFormatter``, Sentry would - see a JSON blob as the message — breaking event grouping because every - rendered string (containing timestamps, PIDs, …) is unique. - - ``_SentryFriendlyProcessorFormatter`` snapshots and restores the - originals so Sentry sees the clean message template and args. - """ + """Verify Sentry receives the original message, not the rendered output.""" # Given setup_logging( log_level="DEBUG", log_format="json", application_loggers=test_app_loggers @@ -388,34 +314,184 @@ def test_setup_logging__stdlib_error__sentry_captures_clean_message( # And — Sentry received the original message template, not the JSON blob. assert len(sentry_transport_mock.events) == 1 - logentry = sentry_transport_mock.events[0]["logentry"] - assert logentry["message"] == "order %s failed for %s" - assert logentry["params"] == ["ORD-123", "alice"] - assert logentry["formatted"] == "order ORD-123 failed for alice" + assert ( + sentry_transport_mock.events[0].items() + >= { + "level": "error", + "logger": "test.sentry_compat", + "logentry": { + "message": "order %s failed for %s", + "formatted": "order ORD-123 failed for alice", + "params": ["ORD-123", "alice"], + }, + }.items() + ) -def test_setup_logging__structlog_error__sentry_captures_with_context( +def test_setup_logging__structlog_exception__sentry_captures_with_context( capsys: pytest.CaptureFixture[str], test_app_loggers: list[str], sentry_transport_mock: "MockSentryTransport", ) -> None: - """Verify structlog errors reach Sentry with context from sentry_processor.""" + """Verify structlog exceptions reach Sentry with context and traceback.""" # Given setup_logging( - log_level="DEBUG", log_format="json", application_loggers=test_app_loggers + log_level="DEBUG", + log_format="json", + application_loggers=test_app_loggers, ) structured_logger = structlog.get_logger("test.sentry_structlog") + expected_module_path = os.path.abspath(__file__) + + def _raise_and_log() -> None: + try: + raise RuntimeError("payment gateway timeout") + except RuntimeError: + structured_logger.exception("payment failed", order_id="ORD-456") + + expected_lineno = _raise_and_log.__code__.co_firstlineno + 2 + expected_tb_string = ( + "Traceback (most recent call last):\n" + f' File "{expected_module_path}",' + f" line {expected_lineno}, in _raise_and_log\n" + ' raise RuntimeError("payment gateway timeout")\n' + "RuntimeError: payment gateway timeout" + ) # When - structured_logger.error("payment failed", order_id="ORD-456") + _raise_and_log() - # Then — Sentry captured the event + # Then — Sentry captured the event with exception and structlog context assert len(sentry_transport_mock.events) == 1 event = sentry_transport_mock.events[0] + assert ( + event.items() + >= { + "level": "error", + "logger": "test.sentry_structlog", + "platform": "python", + }.items() + ) + + # And — exception traceback is correct + exc = event["exception"]["values"][0] + assert ( + exc.items() + >= { + "mechanism": {"type": "logging", "handled": True}, + "module": None, + "type": "RuntimeError", + "value": "payment gateway timeout", + }.items() + ) + frame = exc["stacktrace"]["frames"][-1] + assert ( + frame.items() + >= { + "abs_path": expected_module_path, + "function": "_raise_and_log", + "module": "tests.unit.common.core.test_logging", + "lineno": expected_lineno, + "context_line": ' raise RuntimeError("payment gateway timeout")', + "in_app": True, + }.items() + ) # And — sentry_processor set the structlog context - assert "structlog" in event["contexts"] - assert event["contexts"]["structlog"]["order_id"] == "ORD-456" + assert event["contexts"]["structlog"] == { + "order_id": "ORD-456", + "logger": "test.sentry_structlog", + "exception": expected_tb_string, + } + + +@pytest.mark.freeze_time("2023-12-08T06:05:47+00:00") +def test_setup_logging__stdlib_exception__sentry_captures_exc_info( + capsys: pytest.CaptureFixture[str], + test_app_loggers: list[str], + sentry_transport_mock: "MockSentryTransport", +) -> None: + """Verify Sentry receives full exception info from stdlib loggers.""" + # Given + setup_logging( + log_level="DEBUG", + log_format="json", + application_loggers=test_app_loggers, + ) + test_logger = logging.getLogger("test.sentry_exc") + expected_module_path = os.path.abspath(__file__) + + def _raise_and_log() -> None: + try: + raise ValueError("something broke") + except ValueError: + test_logger.error("request failed", exc_info=True) + + expected_lineno = _raise_and_log.__code__.co_firstlineno + 2 + expected_tb_string = ( + "Traceback (most recent call last):\n" + f' File "{expected_module_path}",' + f" line {expected_lineno}, in _raise_and_log\n" + ' raise ValueError("something broke")\n' + "ValueError: something broke" + ) + + # When + _raise_and_log() + + # Then — Sentry captured the exception with full traceback + assert len(sentry_transport_mock.events) == 1 + event = sentry_transport_mock.events[0] + assert ( + event.items() + >= { + "level": "error", + "logger": "test.sentry_exc", + "logentry": { + "message": "request failed", + "formatted": "request failed", + "params": [], + }, + "platform": "python", + }.items() + ) + + # And — exception traceback is correct + exc = event["exception"]["values"][0] + assert ( + exc.items() + >= { + "mechanism": {"type": "logging", "handled": True}, + "module": None, + "type": "ValueError", + "value": "something broke", + }.items() + ) + frame = exc["stacktrace"]["frames"][-1] + assert ( + frame.items() + >= { + "abs_path": expected_module_path, + "function": "_raise_and_log", + "module": "tests.unit.common.core.test_logging", + "lineno": expected_lineno, + "context_line": ' raise ValueError("something broke")', + "in_app": True, + }.items() + ) + + # And — the JSON output has a formatted traceback string, not a list + output = capsys.readouterr().out.strip() + parsed = json.loads(output) + assert parsed == { + "timestamp": "2023-12-08T06:05:47Z", + "message": "request failed", + "levelname": "ERROR", + "logger_name": "test.sentry_exc", + "pid": os.getpid(), + "thread_name": "MainThread", + "exc_info": expected_tb_string, + } def test_setup_logging__default_args__root_at_warning() -> None: