From 65312d5fbcccb15f4e61d31c3373a6fb395e9bbd Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Sat, 21 Mar 2026 16:57:18 +0100
Subject: [PATCH 01/41] first stab

---
 Lib/profiling/sampling/__init__.py         |  11 +-
 Lib/profiling/sampling/binary_reader.py    |   3 +
 Lib/profiling/sampling/cli.py              |  26 ++-
 Lib/profiling/sampling/ndjson_collector.py | 216 +++++++++++++++++++++
 4 files changed, 251 insertions(+), 5 deletions(-)
 create mode 100644 Lib/profiling/sampling/ndjson_collector.py

diff --git a/Lib/profiling/sampling/__init__.py b/Lib/profiling/sampling/__init__.py
index 6a0bb5e5c2f387..21d3a773a2ba63 100644
--- a/Lib/profiling/sampling/__init__.py
+++ b/Lib/profiling/sampling/__init__.py
@@ -9,6 +9,15 @@
 from .stack_collector import CollapsedStackCollector
 from .heatmap_collector import HeatmapCollector
 from .gecko_collector import GeckoCollector
+from .ndjson_collector import NdjsonCollector
 from .string_table import StringTable
 
-__all__ = ("Collector", "PstatsCollector", "CollapsedStackCollector", "HeatmapCollector", "GeckoCollector", "StringTable")
+__all__ = (
+    "Collector",
+    "PstatsCollector",
+    "CollapsedStackCollector",
+    "HeatmapCollector",
+    "GeckoCollector",
+    "NdjsonCollector",
+    "StringTable",
+)
diff --git a/Lib/profiling/sampling/binary_reader.py b/Lib/profiling/sampling/binary_reader.py
index a11be3652597a6..d5bfc0d6130f1a 100644
--- a/Lib/profiling/sampling/binary_reader.py
+++ b/Lib/profiling/sampling/binary_reader.py
@@ -4,6 +4,7 @@
 
 from .gecko_collector import GeckoCollector
 from .stack_collector import FlamegraphCollector, CollapsedStackCollector
+from .ndjson_collector import NdjsonCollector
 from .pstats_collector import PstatsCollector
 
 
@@ -117,6 +118,8 @@ def convert_binary_to_format(input_file, output_file, output_format,
             collector = PstatsCollector(interval)
         elif output_format == 'gecko':
             collector = GeckoCollector(interval)
+        elif output_format == 'ndjson':
+            collector = NdjsonCollector(interval)
         else:
             raise ValueError(f"Unknown output format: {output_format}")
 
diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py
index 9900415ae8a927..655d5b51cea901 100644
--- a/Lib/profiling/sampling/cli.py
+++ b/Lib/profiling/sampling/cli.py
@@ -20,6 +20,7 @@
 from .stack_collector import CollapsedStackCollector, FlamegraphCollector, DiffFlamegraphCollector
 from .heatmap_collector import HeatmapCollector
 from .gecko_collector import GeckoCollector
+from .ndjson_collector import NdjsonCollector
 from .binary_collector import BinaryCollector
 from .binary_reader import BinaryReader
 from .constants import (
@@ -101,6 +102,7 @@ def __call__(self, parser, namespace, values, option_string=None):
     "diff_flamegraph": "html",
     "gecko": "json",
     "heatmap": "html",
+    "ndjson": "ndjson",
     "binary": "bin",
 }
 
@@ -111,6 +113,7 @@ def __call__(self, parser, namespace, values, option_string=None):
     "diff_flamegraph": DiffFlamegraphCollector,
     "gecko": GeckoCollector,
     "heatmap": HeatmapCollector,
+    "ndjson": NdjsonCollector,
     "binary": BinaryCollector,
 }
 
@@ -488,6 +491,13 @@ def _add_format_options(parser, include_compression=True, include_binary=True):
         action=DiffFlamegraphAction,
         help="Generate differential flamegraph comparing current profile to `BASELINE` binary file",
     )
+    format_group.add_argument(
+        "--ndjson",
+        action="store_const",
+        const="ndjson",
+        dest="format",
+        help="Generate NDJSON snapshot output for external consumers",
+    )
     if include_binary:
         format_group.add_argument(
             "--binary",
@@ -611,15 +621,18 @@ def _sort_to_mode(sort_choice):
     return sort_map.get(sort_choice, SORT_MODE_NSAMPLES)
 
 def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=False,
-                      output_file=None, compression='auto', diff_baseline=None):
+                      mode=None, output_file=None, compression='auto',
+                      diff_baseline=None):
     """Create the appropriate collector based on format type.
 
     Args:
-        format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap', 'binary', 'diff_flamegraph')
+        format_type: The output format ('pstats', 'collapsed', 'flamegraph',
+                    'gecko', 'heatmap', 'ndjson', 'binary', 'diff_flamegraph')
         sample_interval_usec: Sampling interval in microseconds
         skip_idle: Whether to skip idle samples
         opcodes: Whether to collect opcode information (only used by gecko format
                  for creating interval markers in Firefox Profiler)
+        mode: Profiling mode for collectors that expose it in metadata
         output_file: Output file path (required for binary format)
         compression: Compression type for binary format ('auto', 'zstd', 'none')
         diff_baseline: Path to baseline binary file for differential flamegraph
@@ -655,6 +668,11 @@ def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=Fals
         skip_idle = False
         return collector_class(sample_interval_usec, skip_idle=skip_idle, opcodes=opcodes)
 
+    if format_type == "ndjson":
+        return collector_class(
+            sample_interval_usec, skip_idle=skip_idle, mode=mode
+        )
+
     return collector_class(sample_interval_usec, skip_idle=skip_idle)
 
 
@@ -1142,7 +1160,7 @@ def _handle_attach(args):
 
     # Create the appropriate collector
     collector = _create_collector(
-        args.format, args.sample_interval_usec, skip_idle, args.opcodes,
+        args.format, args.sample_interval_usec, skip_idle, args.opcodes, mode,
         output_file=output_file,
         compression=getattr(args, 'compression', 'auto'),
         diff_baseline=args.diff_baseline
@@ -1249,7 +1267,7 @@ def _handle_run(args):
 
     # Create the appropriate collector
     collector = _create_collector(
-        args.format, args.sample_interval_usec, skip_idle, args.opcodes,
+        args.format, args.sample_interval_usec, skip_idle, args.opcodes, mode,
         output_file=output_file,
         compression=getattr(args, 'compression', 'auto'),
         diff_baseline=args.diff_baseline
diff --git a/Lib/profiling/sampling/ndjson_collector.py b/Lib/profiling/sampling/ndjson_collector.py
new file mode 100644
index 00000000000000..123ec1c5ea9a1c
--- /dev/null
+++ b/Lib/profiling/sampling/ndjson_collector.py
@@ -0,0 +1,216 @@
+"""NDJSON collector."""
+
+import json
+import uuid
+from itertools import batched
+
+from .constants import (
+    PROFILING_MODE_ALL,
+    PROFILING_MODE_CPU,
+    PROFILING_MODE_EXCEPTION,
+    PROFILING_MODE_GIL,
+    PROFILING_MODE_WALL,
+)
+from .stack_collector import StackTraceCollector
+
+
+_CHUNK_SIZE = 1000
+
+_MODE_NAMES = {
+    PROFILING_MODE_WALL: "wall",
+    PROFILING_MODE_CPU: "cpu",
+    PROFILING_MODE_GIL: "gil",
+    PROFILING_MODE_ALL: "all",
+    PROFILING_MODE_EXCEPTION: "exception",
+}
+
+
+class NdjsonCollector(StackTraceCollector):
+    """Collector that exports finalized profiling data as NDJSON."""
+
+    def __init__(self, sample_interval_usec, *, skip_idle=False, mode=None):
+        super().__init__(sample_interval_usec, skip_idle=skip_idle)
+        self.run_id = uuid.uuid4().hex
+
+        self._string_to_id = {}
+        self._strings = []
+
+        self._frame_to_id = {}
+        self._frames = []
+
+        self._frame_self = {}
+        self._frame_cumulative = {}
+        self._samples_total = 0
+
+        self._mode = mode
+
+    def process_frames(self, frames, _thread_id, weight=1):
+        if not frames:
+            return
+
+        self._samples_total += weight
+
+        frame_ids = [
+            self._get_or_create_frame_id(filename, location, funcname)
+            for filename, location, funcname, _opcode in frames
+        ]
+        leaf_frame_id = frame_ids[0]
+
+        self._frame_self[leaf_frame_id] = (
+            self._frame_self.get(leaf_frame_id, 0) + weight
+        )
+
+        for frame_id in set(frame_ids):
+            self._frame_cumulative[frame_id] = (
+                self._frame_cumulative.get(frame_id, 0) + weight
+            )
+
+    def export(self, filename):
+        with open(filename, "w", encoding="utf-8") as output:
+            self._write_message(output, self._build_meta_record())
+            self._write_chunked_defs(output, "str_def", self._strings)
+            self._write_chunked_defs(output, "frame_def", self._frames)
+            self._write_chunked_agg(output, self._iter_agg_entries())
+            self._write_message(
+                output,
+                {
+                    "type": "end",
+                    "v": 1,
+                    "run_id": self.run_id,
+                    "samples_total": self._samples_total,
+                },
+            )
+
+        print(f"NDJSON profile written to {filename}")
+
+    def _build_meta_record(self):
+        record = {
+            "type": "meta",
+            "v": 1,
+            "run_id": self.run_id,
+            "sample_interval_usec": self.sample_interval_usec,
+        }
+
+        if self._mode is not None:
+            record["mode"] = _MODE_NAMES.get(self._mode, str(self._mode))
+
+        return record
+
+    def _get_or_create_frame_id(self, filename, location, funcname):
+        synthetic = location is None
+        location_fields = self._normalize_export_location(location)
+        func_str_id = self._intern_string(funcname)
+        path_str_id = self._intern_string(filename)
+
+        frame_key = (
+            path_str_id,
+            func_str_id,
+            location_fields["line"],
+            location_fields.get("end_line"),
+            location_fields.get("col"),
+            location_fields.get("end_col"),
+            synthetic,
+        )
+
+        if (frame_id := self._frame_to_id.get(frame_key)) is not None:
+            return frame_id
+
+        frame_id = len(self._frames) + 1
+        frame_record = {
+            "frame_id": frame_id,
+            "path_str_id": path_str_id,
+            "func_str_id": func_str_id,
+            **location_fields,
+        }
+        if synthetic:
+            frame_record["synthetic"] = True
+
+        self._frame_to_id[frame_key] = frame_id
+        self._frames.append(frame_record)
+        return frame_id
+
+    def _intern_string(self, value):
+        value = str(value)
+
+        if (string_id := self._string_to_id.get(value)) is not None:
+            return string_id
+
+        string_id = len(self._strings) + 1
+        self._string_to_id[value] = string_id
+        self._strings.append({"str_id": string_id, "value": value})
+        return string_id
+
+    @staticmethod
+    def _normalize_export_location(location):
+        if location is None:
+            return {"line": 0}
+
+        if isinstance(location, int):
+            return {"line": max(location, 0)}
+
+        if not isinstance(location, tuple):
+            lineno = getattr(location, "lineno", 0)
+            location = (
+                lineno,
+                getattr(location, "end_lineno", lineno),
+                getattr(location, "col_offset", -1),
+                getattr(location, "end_col_offset", -1),
+            )
+
+        lineno, end_lineno, col_offset, end_col_offset = location
+        if not isinstance(lineno, int) or lineno <= 0:
+            return {"line": 0}
+
+        normalized = {"line": lineno}
+        if isinstance(end_lineno, int) and end_lineno > 0:
+            normalized["end_line"] = end_lineno
+        if isinstance(col_offset, int) and col_offset >= 0:
+            normalized["col"] = col_offset
+        if isinstance(end_col_offset, int) and end_col_offset >= 0:
+            normalized["end_col"] = end_col_offset
+        return normalized
+
+    def _iter_agg_entries(self):
+        entries = []
+        for frame_record in self._frames:
+            frame_id = frame_record["frame_id"]
+            entries.append(
+                {
+                    "frame_id": frame_id,
+                    "self": self._frame_self.get(frame_id, 0),
+                    "cumulative": self._frame_cumulative.get(frame_id, 0),
+                }
+            )
+        return entries
+
+    def _write_chunked_defs(self, output, record_type, entries):
+        for chunk in batched(entries, _CHUNK_SIZE):
+            self._write_message(
+                output,
+                {
+                    "type": record_type,
+                    "v": 1,
+                    "run_id": self.run_id,
+                    "defs": chunk,
+                },
+            )
+
+    def _write_chunked_agg(self, output, entries):
+        for chunk in batched(entries, _CHUNK_SIZE):
+            self._write_message(
+                output,
+                {
+                    "type": "agg",
+                    "v": 1,
+                    "run_id": self.run_id,
+                    "kind": "frame",
+                    "scope": "final",
+                    "samples_total": self._samples_total,
+                    "entries": chunk,
+                },
+            )
+
+    @staticmethod
+    def _write_message(output, record):
+        output.write(json.dumps(record, separators=(",", ":")))
+        output.write("\n")

From dff2ead5e96a49230bb5b4388ae66a035ab4fbd3 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Sat, 21 Mar 2026 21:08:18 +0100
Subject: [PATCH 02/41] s/ndjson/jsonl/

---
 Lib/profiling/sampling/__init__.py               |  4 ++--
 Lib/profiling/sampling/binary_reader.py          |  6 +++---
 Lib/profiling/sampling/cli.py                    | 16 ++++++++--------
 .../{ndjson_collector.py => jsonl_collector.py}  |  8 ++++----
 4 files changed, 17 insertions(+), 17 deletions(-)
 rename Lib/profiling/sampling/{ndjson_collector.py => jsonl_collector.py} (97%)

diff --git a/Lib/profiling/sampling/__init__.py b/Lib/profiling/sampling/__init__.py
index 21d3a773a2ba63..71579a3903253e 100644
--- a/Lib/profiling/sampling/__init__.py
+++ b/Lib/profiling/sampling/__init__.py
@@ -9,7 +9,7 @@
 from .stack_collector import CollapsedStackCollector
 from .heatmap_collector import HeatmapCollector
 from .gecko_collector import GeckoCollector
-from .ndjson_collector import NdjsonCollector
+from .jsonl_collector import JsonlCollector
 from .string_table import StringTable
 
 __all__ = (
@@ -18,6 +18,6 @@
     "CollapsedStackCollector",
     "HeatmapCollector",
     "GeckoCollector",
-    "NdjsonCollector",
+    "JsonlCollector",
     "StringTable",
 )
diff --git a/Lib/profiling/sampling/binary_reader.py b/Lib/profiling/sampling/binary_reader.py
index d5bfc0d6130f1a..8d1d8eef9155eb 100644
--- a/Lib/profiling/sampling/binary_reader.py
+++ b/Lib/profiling/sampling/binary_reader.py
@@ -4,7 +4,7 @@
 
 from .gecko_collector import GeckoCollector
 from .stack_collector import FlamegraphCollector, CollapsedStackCollector
-from .ndjson_collector import NdjsonCollector
+from .jsonl_collector import JsonlCollector
 from .pstats_collector import PstatsCollector
 
 
@@ -118,8 +118,8 @@ def convert_binary_to_format(input_file, output_file, output_format,
             collector = PstatsCollector(interval)
         elif output_format == 'gecko':
             collector = GeckoCollector(interval)
-        elif output_format == 'ndjson':
-            collector = NdjsonCollector(interval)
+        elif output_format == 'jsonl':
+            collector = JsonlCollector(interval)
         else:
             raise ValueError(f"Unknown output format: {output_format}")
 
diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py
index 655d5b51cea901..ccefd2402edc8e 100644
--- a/Lib/profiling/sampling/cli.py
+++ b/Lib/profiling/sampling/cli.py
@@ -20,7 +20,7 @@
 from .stack_collector import CollapsedStackCollector, FlamegraphCollector, DiffFlamegraphCollector
 from .heatmap_collector import HeatmapCollector
 from .gecko_collector import GeckoCollector
-from .ndjson_collector import NdjsonCollector
+from .jsonl_collector import JsonlCollector
 from .binary_collector import BinaryCollector
 from .binary_reader import BinaryReader
 from .constants import (
@@ -102,7 +102,7 @@ def __call__(self, parser, namespace, values, option_string=None):
     "diff_flamegraph": "html",
     "gecko": "json",
     "heatmap": "html",
-    "ndjson": "ndjson",
+    "jsonl": "jsonl",
     "binary": "bin",
 }
 
@@ -113,7 +113,7 @@ def __call__(self, parser, namespace, values, option_string=None):
     "diff_flamegraph": DiffFlamegraphCollector,
     "gecko": GeckoCollector,
     "heatmap": HeatmapCollector,
-    "ndjson": NdjsonCollector,
+    "jsonl": JsonlCollector,
     "binary": BinaryCollector,
 }
 
@@ -492,11 +492,11 @@ def _add_format_options(parser, include_compression=True, include_binary=True):
         help="Generate differential flamegraph comparing current profile to `BASELINE` binary file",
     )
     format_group.add_argument(
-        "--ndjson",
+        "--jsonl",
         action="store_const",
-        const="ndjson",
+        const="jsonl",
         dest="format",
-        help="Generate NDJSON snapshot output for external consumers",
+        help="Generate JSONL snapshot output for external consumers",
     )
     if include_binary:
         format_group.add_argument(
@@ -627,7 +627,7 @@ def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=Fals
 
     Args:
         format_type: The output format ('pstats', 'collapsed', 'flamegraph',
-                    'gecko', 'heatmap', 'ndjson', 'binary', 'diff_flamegraph')
+                    'gecko', 'heatmap', 'jsonl', 'binary', 'diff_flamegraph')
         sample_interval_usec: Sampling interval in microseconds
         skip_idle: Whether to skip idle samples
         opcodes: Whether to collect opcode information (only used by gecko format
@@ -668,7 +668,7 @@ def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=Fals
         skip_idle = False
         return collector_class(sample_interval_usec, skip_idle=skip_idle, opcodes=opcodes)
 
-    if format_type == "ndjson":
+    if format_type == "jsonl":
         return collector_class(
             sample_interval_usec, skip_idle=skip_idle, mode=mode
         )
diff --git a/Lib/profiling/sampling/ndjson_collector.py b/Lib/profiling/sampling/jsonl_collector.py
similarity index 97%
rename from Lib/profiling/sampling/ndjson_collector.py
rename to Lib/profiling/sampling/jsonl_collector.py
index 123ec1c5ea9a1c..1d6575425c2616 100644
--- a/Lib/profiling/sampling/ndjson_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -1,4 +1,4 @@
-"""NDJSON collector."""
+"""JSONL collector."""
 
 import json
 import uuid
@@ -25,8 +25,8 @@
 }
 
 
-class NdjsonCollector(StackTraceCollector):
-    """Collector that exports finalized profiling data as NDJSON."""
+class JsonlCollector(StackTraceCollector):
+    """Collector that exports finalized profiling data as JSONL."""
 
     def __init__(self, sample_interval_usec, *, skip_idle=False, mode=None):
         super().__init__(sample_interval_usec, skip_idle=skip_idle)
@@ -81,7 +81,7 @@ def export(self, filename):
                 },
             )
 
-        print(f"NDJSON profile written to {filename}")
+        print(f"JSONL profile written to {filename}")
 
     def _build_meta_record(self):
         record = {

From 23b5df1d374d0a954160ae75d4cee44acea244d9 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Sun, 22 Mar 2026 02:51:12 +0100
Subject: [PATCH 03/41] printing to stdout isn't a great idea

---
 Lib/profiling/sampling/jsonl_collector.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 1d6575425c2616..3333b7352c9411 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -81,8 +81,6 @@ def export(self, filename):
                 },
             )
 
-        print(f"JSONL profile written to {filename}")
-
     def _build_meta_record(self):
         record = {
             "type": "meta",

From 9cdb9710b8b6854e5b78b3b207fae6ffe8c1c943 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Mon, 30 Mar 2026 22:50:59 +0200
Subject: [PATCH 04/41] even a basic test

---
 .../test_sampling_profiler/test_collectors.py | 81 +++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 240ec8a195c43b..bac81bafde7533 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -16,6 +16,7 @@
         CollapsedStackCollector,
         FlamegraphCollector,
     )
+    from profiling.sampling.jsonl_collector import JsonlCollector
     from profiling.sampling.gecko_collector import GeckoCollector
     from profiling.sampling.collector import extract_lineno, normalize_location
     from profiling.sampling.opcode_utils import get_opcode_info, format_opcode
@@ -1669,6 +1670,86 @@ def test_diff_flamegraph_load_baseline(self):
         self.assertAlmostEqual(cold_node["diff"], -1.0)
         self.assertAlmostEqual(cold_node["diff_pct"], -50.0)
 
+    def test_jsonl_collector_basic(self):
+        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, collapsed_out)
+
+        collector = JsonlCollector(1000)
+        run_id = collector.run_id
+
+        self.assertIsNotNone(run_id)
+
+        test_frames1 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1, [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")]
+                    )
+                ],
+            )
+        ]
+        test_frames2 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1, [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")]
+                    )
+                ],
+            )
+        ]  # Same stack
+        test_frames3 = [
+            MockInterpreterInfo(
+                0, [MockThreadInfo(1, [MockFrameInfo("other.py", 5, "other_func")])]
+            )
+        ]
+
+        collector.collect(test_frames1)
+        collector.collect(test_frames2)
+        collector.collect(test_frames3)
+
+        with captured_stdout(), captured_stderr():
+            collector.export(collapsed_out.name)
+
+        # Check file contents
+        with open(collapsed_out.name, "r") as f:
+            content = f.read()
+
+        lines = content.strip().split("\n")
+        self.assertEqual(len(lines), 5)
+
+        def jsonl(obj):
+            return json.dumps(obj, separators=(",", ":"))
+
+        expected = [
+            jsonl({"type": "meta", "v": 1, "run_id": run_id,
+                   "sample_interval_usec": 1000}),
+            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
+                   "defs": [{"str_id": 1, "value": "func1"},
+                            {"str_id": 2, "value": "file.py"},
+                            {"str_id": 3, "value": "func2"},
+                            {"str_id": 4, "value": "other_func"},
+                            {"str_id": 5, "value": "other.py"}]}),
+            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
+                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
+                             "line": 10, "end_line": 10},
+                            {"frame_id": 2, "path_str_id": 2, "func_str_id": 3,
+                             "line": 20, "end_line": 20},
+                            {"frame_id": 3, "path_str_id": 5, "func_str_id": 4,
+                             "line": 5, "end_line": 5}]}),
+            jsonl({"type": "agg", "v": 1, "run_id": run_id,
+                   "kind": "frame", "scope": "final", "samples_total": 3,
+                   "entries": [{"frame_id": 1, "self": 2, "cumulative": 2},
+                               {"frame_id": 2, "self": 0, "cumulative": 2},
+                               {"frame_id": 3, "self": 1, "cumulative": 1}]}),
+            jsonl({"type": "end", "v": 1, "run_id": run_id,
+                   "samples_total": 3}),
+        ]
+
+        for exp in expected:
+            self.assertIn(exp, lines)
+
 
 class TestRecursiveFunctionHandling(unittest.TestCase):
     """Tests for correct handling of recursive functions in cumulative stats."""

From 5920559f3e2484bb6ef2ccaf3f5b3a133467e446 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Mon, 30 Mar 2026 22:54:22 +0200
Subject: [PATCH 05/41] separate func for end record

---
 Lib/profiling/sampling/jsonl_collector.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 3333b7352c9411..59ab3b865c182c 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -71,15 +71,7 @@ def export(self, filename):
             self._write_chunked_defs(output, "str_def", self._strings)
             self._write_chunked_defs(output, "frame_def", self._frames)
             self._write_chunked_agg(output, self._iter_agg_entries())
-            self._write_message(
-                output,
-                {
-                    "type": "end",
-                    "v": 1,
-                    "run_id": self.run_id,
-                    "samples_total": self._samples_total,
-                },
-            )
+            self._write_message(output, self._build_end_record())
 
     def _build_meta_record(self):
         record = {
@@ -94,6 +86,16 @@ def _build_meta_record(self):
 
         return record
 
+    def _build_end_record(self):
+        record = {
+            "type": "end",
+            "v": 1,
+            "run_id": self.run_id,
+            "samples_total": self._samples_total,
+        }
+
+        return record
+
     def _get_or_create_frame_id(self, filename, location, funcname):
         synthetic = location is None
         location_fields = self._normalize_export_location(location)

From 28ebd2a64b2ca060c064e6fd207ed03a3af7556f Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Mon, 30 Mar 2026 23:14:18 +0200
Subject: [PATCH 06/41] proper name

---
 .../test_profiling/test_sampling_profiler/test_collectors.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index bac81bafde7533..2afb9b5a2ca459 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -1670,7 +1670,7 @@ def test_diff_flamegraph_load_baseline(self):
         self.assertAlmostEqual(cold_node["diff"], -1.0)
         self.assertAlmostEqual(cold_node["diff_pct"], -50.0)
 
-    def test_jsonl_collector_basic(self):
+    def test_jsonl_collector_export(self):
         collapsed_out = tempfile.NamedTemporaryFile(delete=False)
         self.addCleanup(close_and_unlink, collapsed_out)
 

From bc3370b083fe42a2687539ff937c263aca5629fd Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 09:48:26 +0200
Subject: [PATCH 07/41] test_jsonl_collector_with_location_info

---
 .../test_sampling_profiler/test_collectors.py | 50 +++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 2afb9b5a2ca459..14744be75a0e9b 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2149,6 +2149,56 @@ def test_gecko_collector_with_location_info(self):
         # Verify function name is in string table
         self.assertIn("handle_request", string_array)
 
+    def test_jsonl_collector_with_location_info(self):
+        """Test JsonlCollector handles LocationInfo properly."""
+        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, collapsed_out)
+
+        collector = JsonlCollector(sample_interval_usec=1000)
+        run_id = collector.run_id
+
+        # Frame with LocationInfo
+        frame = MockFrameInfo("test.py", 42, "my_function")
+        frames = [
+            MockInterpreterInfo(
+                0, [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+            )
+        ]
+        collector.collect(frames)
+
+        # Should extract lineno from location
+        with captured_stdout(), captured_stderr():
+            collector.export(collapsed_out.name)
+
+        # Check file contents
+        with open(collapsed_out.name, "r") as f:
+            content = f.read()
+
+        lines = content.strip().split("\n")
+        self.assertEqual(len(lines), 5)
+
+        def jsonl(obj):
+            return json.dumps(obj, separators=(",", ":"))
+
+        expected = [
+            jsonl({"type": "meta", "v": 1, "run_id": run_id,
+                   "sample_interval_usec": 1000}),
+            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
+                   "defs": [{"str_id": 1, "value": "my_function"},
+                            {"str_id": 2, "value": "test.py"}]}),
+            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
+                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
+                             "line": 42, "end_line": 42}]}),
+            jsonl({"type": "agg", "v": 1, "run_id": run_id,
+                   "kind": "frame", "scope": "final", "samples_total": 1,
+                   "entries": [{"frame_id": 1, "self": 1, "cumulative": 1}]}),
+            jsonl({"type": "end", "v": 1, "run_id": run_id,
+                   "samples_total": 1}),
+        ]
+
+        for exp in expected:
+            self.assertIn(exp, lines)
+
 
 class TestOpcodeHandling(unittest.TestCase):
     """Tests for opcode field handling in collectors."""

From a151578460b53530ca86c00d7c93a2be00d840d9 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 09:52:15 +0200
Subject: [PATCH 08/41] test synthetic frames

---
 .../test_sampling_profiler/test_collectors.py | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 14744be75a0e9b..9d88c5283a44e8 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2149,6 +2149,7 @@ def test_gecko_collector_with_location_info(self):
         # Verify function name is in string table
         self.assertIn("handle_request", string_array)
 
+
     def test_jsonl_collector_with_location_info(self):
         """Test JsonlCollector handles LocationInfo properly."""
         collapsed_out = tempfile.NamedTemporaryFile(delete=False)
@@ -2200,6 +2201,59 @@ def jsonl(obj):
             self.assertIn(exp, lines)
 
 
+    def test_jsonl_collector_with_none_location(self):
+        """Test JsonlCollector handles None location (synthetic frames)."""
+        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, collapsed_out)
+
+        collector = JsonlCollector(sample_interval_usec=1000)
+        run_id = collector.run_id
+
+        # Create frame with None location (like GC frame)
+        frame = MockFrameInfo("~", 0, "<GC>")
+        frame.location = None  # Synthetic frame has no location
+        frames = [
+            MockInterpreterInfo(
+                0,
+                [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+            )
+        ]
+        collector.collect(frames)
+
+        # Should handle None location as synthetic frame
+        with captured_stdout(), captured_stderr():
+            collector.export(collapsed_out.name)
+
+        # Check file contents
+        with open(collapsed_out.name, "r") as f:
+            content = f.read()
+
+        lines = content.strip().split("\n")
+        self.assertEqual(len(lines), 5)
+
+        def jsonl(obj):
+            return json.dumps(obj, separators=(",", ":"))
+
+        expected = [
+            jsonl({"type": "meta", "v": 1, "run_id": run_id,
+                   "sample_interval_usec": 1000}),
+            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
+                   "defs": [{"str_id": 1, "value": "<GC>"},
+                            {"str_id": 2, "value": "~"}]}),
+            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
+                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
+                             "line": 0, "synthetic": True}]}),
+            jsonl({"type": "agg", "v": 1, "run_id": run_id,
+                   "kind": "frame", "scope": "final", "samples_total": 1,
+                   "entries": [{"frame_id": 1, "self": 1, "cumulative": 1}]}),
+            jsonl({"type": "end", "v": 1, "run_id": run_id,
+                   "samples_total": 1}),
+        ]
+
+        for exp in expected:
+            self.assertIn(exp, lines)
+
+
 class TestOpcodeHandling(unittest.TestCase):
     """Tests for opcode field handling in collectors."""
 

From f851de9c9490ac970296ddc97d143efe59393c0a Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 10:02:08 +0200
Subject: [PATCH 09/41] too many new lines

---
 .../test_profiling/test_sampling_profiler/test_collectors.py    | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 9d88c5283a44e8..4c73c42a811076 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2149,7 +2149,6 @@ def test_gecko_collector_with_location_info(self):
         # Verify function name is in string table
         self.assertIn("handle_request", string_array)
 
-
     def test_jsonl_collector_with_location_info(self):
         """Test JsonlCollector handles LocationInfo properly."""
         collapsed_out = tempfile.NamedTemporaryFile(delete=False)
@@ -2200,7 +2199,6 @@ def jsonl(obj):
         for exp in expected:
             self.assertIn(exp, lines)
 
-
     def test_jsonl_collector_with_none_location(self):
         """Test JsonlCollector handles None location (synthetic frames)."""
         collapsed_out = tempfile.NamedTemporaryFile(delete=False)

From e5831a8cb7c38db37bae5d3211669277595236b5 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 10:21:42 +0200
Subject: [PATCH 10/41] BUG? confusing... two ways to set skip_idle?

---
 .../test_sampling_profiler/test_modes.py      | 157 +++++++++++++++++-
 1 file changed, 155 insertions(+), 2 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
index 0b38fb4ad4bcf6..67b82eff091d08 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
@@ -9,6 +9,7 @@
     import profiling.sampling
     import profiling.sampling.sample
     from profiling.sampling.pstats_collector import PstatsCollector
+    from profiling.sampling.jsonl_collector import JsonlCollector
     from profiling.sampling.cli import main, _parse_mode
     from profiling.sampling.constants import PROFILING_MODE_EXCEPTION
     from _remote_debugging import (
@@ -20,9 +21,13 @@
         "Test only runs when _remote_debugging is available"
     )
 
-from test.support import requires_remote_subprocess_debugging
+from test.support import (
+    captured_stdout,
+    captured_stderr,
+    requires_remote_subprocess_debugging,
+)
 
-from .helpers import test_subprocess
+from .helpers import close_and_unlink, test_subprocess
 from .mocks import MockFrameInfo, MockInterpreterInfo
 
 
@@ -228,6 +233,154 @@ def test_cpu_mode_with_no_samples(self):
         self.assertIn("No samples were collected", output)
         self.assertIn("CPU mode", output)
 
+    def test_jsonl_collector_rspects_skip_idle(self):
+        """Test that frames are actually filtered when skip_idle=True."""
+        import tempfile
+        import json
+
+        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, collapsed_out)
+
+        # Create mock frames with different thread statuses
+        class MockThreadInfoWithStatus:
+            def __init__(self, thread_id, frame_info, status):
+                self.thread_id = thread_id
+                self.frame_info = frame_info
+                self.status = status
+
+        # Create test data: active thread (HAS_GIL | ON_CPU), idle thread (neither), and another active thread
+        ACTIVE_STATUS = (
+            THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU
+        )  # Has GIL and on CPU
+        IDLE_STATUS = 0  # Neither has GIL nor on CPU
+
+        test_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfoWithStatus(
+                        1,
+                        [MockFrameInfo("active1.py", 10, "active_func1")],
+                        ACTIVE_STATUS,
+                    ),
+                    MockThreadInfoWithStatus(
+                        2,
+                        [MockFrameInfo("idle.py", 20, "idle_func")],
+                        IDLE_STATUS,
+                    ),
+                    MockThreadInfoWithStatus(
+                        3,
+                        [MockFrameInfo("active2.py", 30, "active_func2")],
+                        ACTIVE_STATUS,
+                    ),
+                ],
+            )
+        ]
+
+        # Test with skip_idle=True - should only process running threads
+        collector_skip = JsonlCollector(
+            sample_interval_usec=1000, skip_idle=True
+        )
+        collector_skip.collect(test_frames)
+
+        run_id = collector_skip.run_id
+
+        # Should only have functions from running threads (status 0)
+        with captured_stdout(), captured_stderr():
+            collector_skip.export(collapsed_out.name)
+
+        # Check file contents
+        with open(collapsed_out.name, "r") as f:
+            content = f.read()
+
+        lines = content.strip().split("\n")
+        self.assertEqual(len(lines), 5)
+
+        def jsonl(obj):
+            return json.dumps(obj, separators=(",", ":"))
+
+        expected = [
+            jsonl({"type": "meta", "v": 1, "run_id": run_id,
+                   "sample_interval_usec": 1000}),
+            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
+                   "defs": [{"str_id": 1, "value": "active_func1"},
+                            {"str_id": 2, "value": "active1.py"},
+                            {"str_id": 3, "value": "idle_func"},
+                            {"str_id": 4, "value": "idle.py"},
+                            {"str_id": 5, "value": "active_func2"},
+                            {"str_id": 6, "value": "active2.py"}]}),
+            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
+                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
+                             "line": 10, "end_line": 10},
+                            {"frame_id": 2, "path_str_id": 4, "func_str_id": 3,
+                             "line": 20, "end_line": 20},
+                            {"frame_id": 3, "path_str_id": 6, "func_str_id": 5,
+                             "line": 30, "end_line": 30}]}),
+            jsonl({"type": "agg", "v": 1, "run_id": run_id,
+                   "kind": "frame", "scope": "final", "samples_total": 3,
+                   "entries": [{"frame_id": 1, "self": 1, "cumulative": 1},
+                               {"frame_id": 2, "self": 1, "cumulative": 1},
+                               {"frame_id": 3, "self": 1, "cumulative": 1}]}),
+            jsonl({"type": "end", "v": 1, "run_id": run_id,
+                   "samples_total": 3}),
+        ]
+
+        for exp in expected:
+            self.assertIn(exp, lines)
+
+        # Test with skip_idle=False - should process all threads
+        collector_no_skip = JsonlCollector(
+            sample_interval_usec=1000, skip_idle=False
+        )
+        collector_no_skip.collect(test_frames)
+
+        run_id = collector_no_skip.run_id
+
+        # Should have functions from all threads
+        with captured_stdout(), captured_stderr():
+            collector_no_skip.export(collapsed_out.name)
+
+        # Check file contents
+        with open(collapsed_out.name, "r") as f:
+            content = f.read()
+
+        lines = content.strip().split("\n")
+        self.assertEqual(len(lines), 5)
+
+        expected = [
+            jsonl({"type": "meta", "v": 1, "run_id": run_id,
+                   "sample_interval_usec": 1000}),
+            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
+                   "defs": [{"str_id": 1, "value": "active_func1"},
+                            {"str_id": 2, "value": "active1.py"},
+                            {"str_id": 3, "value": "idle_func"},
+                            {"str_id": 4, "value": "idle.py"},
+                            {"str_id": 5, "value": "active_func2"},
+                            {"str_id": 6, "value": "active2.py"}]}),
+            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
+                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
+                             "line": 10, "end_line": 10},
+                            {"frame_id": 2, "path_str_id": 4, "func_str_id": 3,
+                             "line": 20, "end_line": 20},
+                            {"frame_id": 3, "path_str_id": 6, "func_str_id": 5,
+                             "line": 30, "end_line": 30}]}),
+            jsonl({"type": "agg", "v": 1, "run_id": run_id,
+                   "kind": "frame", "scope": "final", "samples_total": 3,
+                   "entries": [{"frame_id": 1, "self": 1, "cumulative": 1},
+                               {"frame_id": 2, "self": 1, "cumulative": 1},
+                               {"frame_id": 3, "self": 1, "cumulative": 1}]}),
+            jsonl({"type": "end", "v": 1, "run_id": run_id,
+                   "samples_total": 3}),
+        ]
+
+        for exp in expected:
+            self.assertIn(exp, lines)
+
+        # self.assertIn(active1_key, collector_no_skip.result)
+        # self.assertIn(active2_key, collector_no_skip.result)
+        # self.assertIn(
+        #     idle_key, collector_no_skip.result
+        # )  # Idle thread should be included
 
 @requires_remote_subprocess_debugging()
 class TestGilModeFiltering(unittest.TestCase):

From 9c106a4075ec885fa7e3e89b04d97417bece2e34 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 12:51:16 +0200
Subject: [PATCH 11/41] ok, thx b4fac15613a16f9cd7b2ee32840523b399f4621f

---
 .../test_sampling_profiler/test_modes.py          | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
index 67b82eff091d08..9d792b8d6f20ab 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
@@ -305,24 +305,19 @@ def jsonl(obj):
             jsonl({"type": "str_def", "v": 1, "run_id": run_id,
                    "defs": [{"str_id": 1, "value": "active_func1"},
                             {"str_id": 2, "value": "active1.py"},
-                            {"str_id": 3, "value": "idle_func"},
-                            {"str_id": 4, "value": "idle.py"},
-                            {"str_id": 5, "value": "active_func2"},
-                            {"str_id": 6, "value": "active2.py"}]}),
+                            {"str_id": 3, "value": "active_func2"},
+                            {"str_id": 4, "value": "active2.py"}]}),
             jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
                    "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
                              "line": 10, "end_line": 10},
                             {"frame_id": 2, "path_str_id": 4, "func_str_id": 3,
-                             "line": 20, "end_line": 20},
-                            {"frame_id": 3, "path_str_id": 6, "func_str_id": 5,
                              "line": 30, "end_line": 30}]}),
             jsonl({"type": "agg", "v": 1, "run_id": run_id,
-                   "kind": "frame", "scope": "final", "samples_total": 3,
+                   "kind": "frame", "scope": "final", "samples_total": 2,
                    "entries": [{"frame_id": 1, "self": 1, "cumulative": 1},
-                               {"frame_id": 2, "self": 1, "cumulative": 1},
-                               {"frame_id": 3, "self": 1, "cumulative": 1}]}),
+                               {"frame_id": 2, "self": 1, "cumulative": 1}]}),
             jsonl({"type": "end", "v": 1, "run_id": run_id,
-                   "samples_total": 3}),
+                   "samples_total": 2}),
         ]
 
         for exp in expected:

From 727f05f3eb16f0d0d373d2fc373ae8a0bcdd8910 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:16:36 +0200
Subject: [PATCH 12/41] check if it works fine with (file, loc, func, op)

---
 .../test_sampling_profiler/test_collectors.py | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 4c73c42a811076..9982403c329091 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2471,6 +2471,38 @@ def test_gecko_collector_frame_format(self):
         # Should have recorded 3 functions
         self.assertEqual(thread["funcTable"]["length"], 3)
 
+    def test_jsonl_collector_frame_format(self):
+        """Test JsonlCollector with 4-element frame format."""
+        collector = JsonlCollector(sample_interval_usec=1000)
+        collector.collect(self._make_sample_frames())
+
+        with tempfile.NamedTemporaryFile(delete=False) as f:
+            self.addClassCleanup(close_and_unlink, f)
+            collector.export(f.name)
+
+        with open(f.name, "r", encoding="utf-8") as fp:
+            records = [json.loads(line) for line in fp]
+
+        str_defs = {
+            item["str_id"]: item["value"]
+            for record in records
+            if record["type"] == "str_def"
+            for item in record["defs"]
+        }
+        frame_defs = [
+            item
+            for record in records
+            if record["type"] == "frame_def"
+            for item in record["defs"]
+        ]
+
+        self.assertEqual(len(frame_defs), 3)
+
+        paths = {str_defs[item["path_str_id"]] for item in frame_defs}
+        funcs = {str_defs[item["func_str_id"]] for item in frame_defs}
+
+        self.assertEqual(paths, {"app.py", "utils.py", "lib.py"})
+        self.assertEqual(funcs, {"main", "helper", "process"})
 
 class TestInternalFrameFiltering(unittest.TestCase):
     """Tests for filtering internal profiler frames from output."""

From 1c6f81a45d61b6b53c558343c0edc9589c649fb2 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:19:44 +0200
Subject: [PATCH 13/41] missing new line

---
 .../test_profiling/test_sampling_profiler/test_collectors.py     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 9982403c329091..a407e16b9a61be 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2504,6 +2504,7 @@ def test_jsonl_collector_frame_format(self):
         self.assertEqual(paths, {"app.py", "utils.py", "lib.py"})
         self.assertEqual(funcs, {"main", "helper", "process"})
 
+
 class TestInternalFrameFiltering(unittest.TestCase):
     """Tests for filtering internal profiler frames from output."""
 

From c278f83476b3c412e47721d06040e3cc61c288b5 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:25:20 +0200
Subject: [PATCH 14/41] filter out sync coordinator

---
 .../test_sampling_profiler/test_collectors.py | 51 +++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index a407e16b9a61be..3937fe3b8aa018 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2631,3 +2631,54 @@ def test_collapsed_stack_collector_filters_internal_frames(self):
         for (call_tree, _), _ in collector.stack_counter.items():
             for filename, _, _ in call_tree:
                 self.assertNotIn("_sync_coordinator", filename)
+
+    def test_jsonl_collector_filters_internal_frames(self):
+        """Test that JsonlCollector filters out internal frames."""
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(sample_interval_usec=1000)
+
+        frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("app.py", 50, "run"),
+                            MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"),
+                            MockFrameInfo("<frozen runpy>", 87, "_run_code"),
+                        ],
+                        status=THREAD_STATUS_HAS_GIL,
+                    )
+                ],
+            )
+        ]
+
+        collector.collect(frames)
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        str_defs = {
+            item["str_id"]: item["value"]
+            for record in records
+            if record["type"] == "str_def"
+            for item in record["defs"]
+        }
+        frame_defs = [
+            item
+            for record in records
+            if record["type"] == "frame_def"
+            for item in record["defs"]
+        ]
+
+        paths = {str_defs[item["path_str_id"]] for item in frame_defs}
+
+        self.assertIn("app.py", paths)
+        self.assertIn("<frozen runpy>", paths)
+
+        for path in paths:
+            self.assertNotIn("_sync_coordinator", path)

From bf6b4547dca2cc40ecd0535b8081e7a82a1fa950 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:30:12 +0200
Subject: [PATCH 15/41] s/collapsed_out/jsonl_out/, less copying :D

---
 .../test_sampling_profiler/test_collectors.py | 24 +++++++++----------
 .../test_sampling_profiler/test_modes.py      | 12 +++++-----
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 3937fe3b8aa018..56cc7a2bd1190c 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -1671,8 +1671,8 @@ def test_diff_flamegraph_load_baseline(self):
         self.assertAlmostEqual(cold_node["diff_pct"], -50.0)
 
     def test_jsonl_collector_export(self):
-        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
-        self.addCleanup(close_and_unlink, collapsed_out)
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
 
         collector = JsonlCollector(1000)
         run_id = collector.run_id
@@ -1710,10 +1710,10 @@ def test_jsonl_collector_export(self):
         collector.collect(test_frames3)
 
         with captured_stdout(), captured_stderr():
-            collector.export(collapsed_out.name)
+            collector.export(jsonl_out.name)
 
         # Check file contents
-        with open(collapsed_out.name, "r") as f:
+        with open(jsonl_out.name, "r") as f:
             content = f.read()
 
         lines = content.strip().split("\n")
@@ -2151,8 +2151,8 @@ def test_gecko_collector_with_location_info(self):
 
     def test_jsonl_collector_with_location_info(self):
         """Test JsonlCollector handles LocationInfo properly."""
-        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
-        self.addCleanup(close_and_unlink, collapsed_out)
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
 
         collector = JsonlCollector(sample_interval_usec=1000)
         run_id = collector.run_id
@@ -2168,10 +2168,10 @@ def test_jsonl_collector_with_location_info(self):
 
         # Should extract lineno from location
         with captured_stdout(), captured_stderr():
-            collector.export(collapsed_out.name)
+            collector.export(jsonl_out.name)
 
         # Check file contents
-        with open(collapsed_out.name, "r") as f:
+        with open(jsonl_out.name, "r") as f:
             content = f.read()
 
         lines = content.strip().split("\n")
@@ -2201,8 +2201,8 @@ def jsonl(obj):
 
     def test_jsonl_collector_with_none_location(self):
         """Test JsonlCollector handles None location (synthetic frames)."""
-        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
-        self.addCleanup(close_and_unlink, collapsed_out)
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
 
         collector = JsonlCollector(sample_interval_usec=1000)
         run_id = collector.run_id
@@ -2220,10 +2220,10 @@ def test_jsonl_collector_with_none_location(self):
 
         # Should handle None location as synthetic frame
         with captured_stdout(), captured_stderr():
-            collector.export(collapsed_out.name)
+            collector.export(jsonl_out.name)
 
         # Check file contents
-        with open(collapsed_out.name, "r") as f:
+        with open(jsonl_out.name, "r") as f:
             content = f.read()
 
         lines = content.strip().split("\n")
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
index 9d792b8d6f20ab..a4c7ed857ce7fb 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
@@ -238,8 +238,8 @@ def test_jsonl_collector_rspects_skip_idle(self):
         import tempfile
         import json
 
-        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
-        self.addCleanup(close_and_unlink, collapsed_out)
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
 
         # Create mock frames with different thread statuses
         class MockThreadInfoWithStatus:
@@ -287,10 +287,10 @@ def __init__(self, thread_id, frame_info, status):
 
         # Should only have functions from running threads (status 0)
         with captured_stdout(), captured_stderr():
-            collector_skip.export(collapsed_out.name)
+            collector_skip.export(jsonl_out.name)
 
         # Check file contents
-        with open(collapsed_out.name, "r") as f:
+        with open(jsonl_out.name, "r") as f:
             content = f.read()
 
         lines = content.strip().split("\n")
@@ -333,10 +333,10 @@ def jsonl(obj):
 
         # Should have functions from all threads
         with captured_stdout(), captured_stderr():
-            collector_no_skip.export(collapsed_out.name)
+            collector_no_skip.export(jsonl_out.name)
 
         # Check file contents
-        with open(collapsed_out.name, "r") as f:
+        with open(jsonl_out.name, "r") as f:
             content = f.read()
 
         lines = content.strip().split("\n")

From 9ef96f82e7c7711fa05e0b114a39c46e8f58601b Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:31:24 +0200
Subject: [PATCH 16/41] nicer reading

---
 .../test_profiling/test_sampling_profiler/test_collectors.py   | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 56cc7a2bd1190c..4411bae62fc7ee 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2637,8 +2637,6 @@ def test_jsonl_collector_filters_internal_frames(self):
         jsonl_out = tempfile.NamedTemporaryFile(delete=False)
         self.addCleanup(close_and_unlink, jsonl_out)
 
-        collector = JsonlCollector(sample_interval_usec=1000)
-
         frames = [
             MockInterpreterInfo(
                 0,
@@ -2656,6 +2654,7 @@ def test_jsonl_collector_filters_internal_frames(self):
             )
         ]
 
+        collector = JsonlCollector(sample_interval_usec=1000)
         collector.collect(frames)
         collector.export(jsonl_out.name)
 

From 13dd0f2b03e5b9707d6a250f74a57f73bb366d2a Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:32:23 +0200
Subject: [PATCH 17/41] typo

---
 Lib/test/test_profiling/test_sampling_profiler/test_modes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
index a4c7ed857ce7fb..37cb6c3a5c5ab2 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
@@ -233,7 +233,7 @@ def test_cpu_mode_with_no_samples(self):
         self.assertIn("No samples were collected", output)
         self.assertIn("CPU mode", output)
 
-    def test_jsonl_collector_rspects_skip_idle(self):
+    def test_jsonl_collector_respects_skip_idle(self):
         """Test that frames are actually filtered when skip_idle=True."""
         import tempfile
         import json

From ae7afe1fa4ddb415ead8dc11833cb7dbcb84ad3e Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:37:39 +0200
Subject: [PATCH 18/41] too much copying, left-over

---
 Lib/test/test_profiling/test_sampling_profiler/test_modes.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
index 37cb6c3a5c5ab2..2bac26c37091b0 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
@@ -371,11 +371,6 @@ def jsonl(obj):
         for exp in expected:
             self.assertIn(exp, lines)
 
-        # self.assertIn(active1_key, collector_no_skip.result)
-        # self.assertIn(active2_key, collector_no_skip.result)
-        # self.assertIn(
-        #     idle_key, collector_no_skip.result
-        # )  # Idle thread should be included
 
 @requires_remote_subprocess_debugging()
 class TestGilModeFiltering(unittest.TestCase):

From a8eb46d51ddafd04937cf8b2691fda191a620fd1 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:49:39 +0200
Subject: [PATCH 19/41] just Counter

---
 Lib/profiling/sampling/jsonl_collector.py | 87 +++++++++++------------
 1 file changed, 40 insertions(+), 47 deletions(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 59ab3b865c182c..56539c2a9e2232 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -1,5 +1,6 @@
 """JSONL collector."""
 
+from collections import Counter
 import json
 import uuid
 from itertools import batched
@@ -38,8 +39,8 @@ def __init__(self, sample_interval_usec, *, skip_idle=False, mode=None):
         self._frame_to_id = {}
         self._frames = []
 
-        self._frame_self = {}
-        self._frame_cumulative = {}
+        self._frame_self = Counter()
+        self._frame_cumulative = Counter()
         self._samples_total = 0
 
         self._mode = mode
@@ -56,21 +57,39 @@ def process_frames(self, frames, _thread_id, weight=1):
         ]
         leaf_frame_id = frame_ids[0]
 
-        self._frame_self[leaf_frame_id] = (
-            self._frame_self.get(leaf_frame_id, 0) + weight
-        )
+        self._frame_self[leaf_frame_id] += weight
 
         for frame_id in set(frame_ids):
-            self._frame_cumulative[frame_id] = (
-                self._frame_cumulative.get(frame_id, 0) + weight
-            )
+            self._frame_cumulative[frame_id] += weight
 
     def export(self, filename):
         with open(filename, "w", encoding="utf-8") as output:
             self._write_message(output, self._build_meta_record())
-            self._write_chunked_defs(output, "str_def", self._strings)
-            self._write_chunked_defs(output, "frame_def", self._frames)
-            self._write_chunked_agg(output, self._iter_agg_entries())
+            self._write_chunked_records(
+                output,
+                {"type": "str_def", "v": 1, "run_id": self.run_id},
+                "defs",
+                self._strings,
+            )
+            self._write_chunked_records(
+                output,
+                {"type": "frame_def", "v": 1, "run_id": self.run_id},
+                "defs",
+                self._frames,
+            )
+            self._write_chunked_records(
+                output,
+                {
+                    "type": "agg",
+                    "v": 1,
+                    "run_id": self.run_id,
+                    "kind": "frame",
+                    "scope": "final",
+                    "samples_total": self._samples_total,
+                },
+                "entries",
+                self._iter_agg_entries(),
+            )
             self._write_message(output, self._build_end_record())
 
     def _build_meta_record(self):
@@ -171,44 +190,18 @@ def _normalize_export_location(location):
         return normalized
 
     def _iter_agg_entries(self):
-        entries = []
-        for frame_record in self._frames:
-            frame_id = frame_record["frame_id"]
-            entries.append(
-                {
-                    "frame_id": frame_id,
-                    "self": self._frame_self.get(frame_id, 0),
-                    "cumulative": self._frame_cumulative.get(frame_id, 0),
-                }
-            )
-        return entries
-
-    def _write_chunked_defs(self, output, record_type, entries):
-        for chunk in batched(entries, _CHUNK_SIZE):
-            self._write_message(
-                output,
-                {
-                    "type": record_type,
-                    "v": 1,
-                    "run_id": self.run_id,
-                    "defs": chunk,
-                },
-            )
+        return [
+            {
+                "frame_id": frame_record["frame_id"],
+                "self": self._frame_self[frame_record["frame_id"]],
+                "cumulative": self._frame_cumulative[frame_record["frame_id"]],
+            }
+            for frame_record in self._frames
+        ]
 
-    def _write_chunked_agg(self, output, entries):
+    def _write_chunked_records(self, output, base_record, chunk_field, entries):
         for chunk in batched(entries, _CHUNK_SIZE):
-            self._write_message(
-                output,
-                {
-                    "type": "agg",
-                    "v": 1,
-                    "run_id": self.run_id,
-                    "kind": "frame",
-                    "scope": "final",
-                    "samples_total": self._samples_total,
-                    "entries": chunk,
-                },
-            )
+            self._write_message(output, {**base_record, chunk_field: chunk})
 
     @staticmethod
     def _write_message(output, record):

From 9982bb4283798578c2dcbadb0c3752177ee8593a Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:52:11 +0200
Subject: [PATCH 20/41] ruff

---
 Lib/profiling/sampling/jsonl_collector.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 56539c2a9e2232..244501ba446f07 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -199,7 +199,9 @@ def _iter_agg_entries(self):
             for frame_record in self._frames
         ]
 
-    def _write_chunked_records(self, output, base_record, chunk_field, entries):
+    def _write_chunked_records(
+        self, output, base_record, chunk_field, entries
+    ):
         for chunk in batched(entries, _CHUNK_SIZE):
             self._write_message(output, {**base_record, chunk_field: chunk})
 

From fe298881656505963b2e9d49d3ab3362b7ee3abe Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:56:14 +0200
Subject: [PATCH 21/41] future-proof name

---
 Lib/profiling/sampling/jsonl_collector.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 244501ba446f07..7d7b44c8d89407 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -88,7 +88,7 @@ def export(self, filename):
                     "samples_total": self._samples_total,
                 },
                 "entries",
-                self._iter_agg_entries(),
+                self._iter_final_agg_entries(),
             )
             self._write_message(output, self._build_end_record())
 
@@ -189,7 +189,7 @@ def _normalize_export_location(location):
             normalized["end_col"] = end_col_offset
         return normalized
 
-    def _iter_agg_entries(self):
+    def _iter_final_agg_entries(self):
         return [
             {
                 "frame_id": frame_record["frame_id"],

From a5192b77d2f56391290780a48771906bc21f64d5 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:56:58 +0200
Subject: [PATCH 22/41] future-proof iter for streaming

---
 Lib/profiling/sampling/jsonl_collector.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 7d7b44c8d89407..1b318573425edf 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -190,14 +190,13 @@ def _normalize_export_location(location):
         return normalized
 
     def _iter_final_agg_entries(self):
-        return [
-            {
-                "frame_id": frame_record["frame_id"],
-                "self": self._frame_self[frame_record["frame_id"]],
-                "cumulative": self._frame_cumulative[frame_record["frame_id"]],
+        for frame_record in self._frames:
+            frame_id = frame_record["frame_id"]
+            yield {
+                "frame_id": frame_id,
+                "self": self._frame_self[frame_id],
+                "cumulative": self._frame_cumulative[frame_id],
             }
-            for frame_record in self._frames
-        ]
 
     def _write_chunked_records(
         self, output, base_record, chunk_field, entries

From 1d53e16b5a634fdea156649cd624b702c5947cfe Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 14:13:23 +0200
Subject: [PATCH 23/41] truth to be told, this should be layer above

---
 Lib/profiling/sampling/collector.py           |  5 ++-
 Lib/profiling/sampling/jsonl_collector.py     | 42 +++++++------------
 .../test_sampling_profiler/test_collectors.py |  5 +++
 3 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index 08759b611696b7..ad5be46821c096 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -20,13 +20,16 @@ def normalize_location(location):
     """Normalize location to a 4-tuple format.
 
     Args:
-        location: tuple (lineno, end_lineno, col_offset, end_col_offset) or None
+        location: tuple (lineno, end_lineno, col_offset, end_col_offset),
+            an integer line number, or None
 
     Returns:
         tuple: (lineno, end_lineno, col_offset, end_col_offset)
     """
     if location is None:
         return DEFAULT_LOCATION
+    if isinstance(location, int):
+        return (location, location, -1, -1)
     return location
 
 
diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 1b318573425edf..6c8f2bc2fd3135 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -12,6 +12,7 @@
     PROFILING_MODE_GIL,
     PROFILING_MODE_WALL,
 )
+from .collector import normalize_location
 from .stack_collector import StackTraceCollector
 
 
@@ -117,7 +118,7 @@ def _build_end_record(self):
 
     def _get_or_create_frame_id(self, filename, location, funcname):
         synthetic = location is None
-        location_fields = self._normalize_export_location(location)
+        location_fields = self._location_to_export_fields(location)
         func_str_id = self._intern_string(funcname)
         path_str_id = self._intern_string(filename)
 
@@ -160,34 +161,19 @@ def _intern_string(self, value):
         return string_id
 
     @staticmethod
-    def _normalize_export_location(location):
-        if location is None:
-            return {"line": 0}
-
-        if isinstance(location, int):
-            return {"line": max(location, 0)}
-
-        if not isinstance(location, tuple):
-            lineno = getattr(location, "lineno", 0)
-            location = (
-                lineno,
-                getattr(location, "end_lineno", lineno),
-                getattr(location, "col_offset", -1),
-                getattr(location, "end_col_offset", -1),
-            )
+    def _location_to_export_fields(location):
+        lineno, end_lineno, col_offset, end_col_offset = normalize_location(
+            location
+        )
 
-        lineno, end_lineno, col_offset, end_col_offset = location
-        if not isinstance(lineno, int) or lineno <= 0:
-            return {"line": 0}
-
-        normalized = {"line": lineno}
-        if isinstance(end_lineno, int) and end_lineno > 0:
-            normalized["end_line"] = end_lineno
-        if isinstance(col_offset, int) and col_offset >= 0:
-            normalized["col"] = col_offset
-        if isinstance(end_col_offset, int) and end_col_offset >= 0:
-            normalized["end_col"] = end_col_offset
-        return normalized
+        fields = {"line": lineno}
+        if end_lineno > 0:
+            fields["end_line"] = end_lineno
+        if col_offset >= 0:
+            fields["col"] = col_offset
+        if end_col_offset >= 0:
+            fields["end_col"] = end_col_offset
+        return fields
 
     def _iter_final_agg_entries(self):
         for frame_record in self._frames:
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 4411bae62fc7ee..a458475fc46d58 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -1959,6 +1959,11 @@ def test_extract_lineno_from_none(self):
         """Test extracting lineno from None (synthetic frames)."""
         self.assertEqual(extract_lineno(None), 0)
 
+    def test_normalize_location_with_int(self):
+        """Test normalize_location expands a legacy integer line number."""
+        result = normalize_location(42)
+        self.assertEqual(result, (42, 42, -1, -1))
+
     def test_normalize_location_with_location_info(self):
         """Test normalize_location passes through LocationInfo."""
         loc = LocationInfo(10, 15, 0, 5)

From 4b477c0dacc1eb0ab93342de4757a6fae41c0ebd Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 15:20:48 +0200
Subject: [PATCH 24/41] helper

---
 .../test_sampling_profiler/test_collectors.py | 255 +++++++++---------
 .../test_sampling_profiler/test_modes.py      | 148 +---------
 2 files changed, 130 insertions(+), 273 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index a458475fc46d58..f1d005dff8c976 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -58,6 +58,25 @@ def find_child_by_name(children, strings, substr):
     return None
 
 
+def _jsonl_tables(records):
+    meta = next(record for record in records if record["type"] == "meta")
+    end = next(record for record in records if record["type"] == "end")
+    agg = next(record for record in records if record["type"] == "agg")
+    str_defs = {
+        item["str_id"]: item["value"]
+        for record in records
+        if record["type"] == "str_def"
+        for item in record["defs"]
+    }
+    frame_defs = [
+        item
+        for record in records
+        if record["type"] == "frame_def"
+        for item in record["defs"]
+    ]
+    return meta, str_defs, frame_defs, agg, end
+
+
 class TestSampleProfilerComponents(unittest.TestCase):
     """Unit tests for individual profiler components."""
 
@@ -1670,14 +1689,12 @@ def test_diff_flamegraph_load_baseline(self):
         self.assertAlmostEqual(cold_node["diff"], -1.0)
         self.assertAlmostEqual(cold_node["diff_pct"], -50.0)
 
-    def test_jsonl_collector_export(self):
+    def test_jsonl_collector_export_exact_output(self):
         jsonl_out = tempfile.NamedTemporaryFile(delete=False)
         self.addCleanup(close_and_unlink, jsonl_out)
 
         collector = JsonlCollector(1000)
-        run_id = collector.run_id
-
-        self.assertIsNotNone(run_id)
+        collector.run_id = "run-123"
 
         test_frames1 = [
             MockInterpreterInfo(
@@ -1709,46 +1726,74 @@ def test_jsonl_collector_export(self):
         collector.collect(test_frames2)
         collector.collect(test_frames3)
 
-        with captured_stdout(), captured_stderr():
-            collector.export(jsonl_out.name)
+        collector.export(jsonl_out.name)
 
-        # Check file contents
-        with open(jsonl_out.name, "r") as f:
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
             content = f.read()
 
-        lines = content.strip().split("\n")
-        self.assertEqual(len(lines), 5)
-
-        def jsonl(obj):
-            return json.dumps(obj, separators=(",", ":"))
-
-        expected = [
-            jsonl({"type": "meta", "v": 1, "run_id": run_id,
-                   "sample_interval_usec": 1000}),
-            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
-                   "defs": [{"str_id": 1, "value": "func1"},
-                            {"str_id": 2, "value": "file.py"},
-                            {"str_id": 3, "value": "func2"},
-                            {"str_id": 4, "value": "other_func"},
-                            {"str_id": 5, "value": "other.py"}]}),
-            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
-                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
-                             "line": 10, "end_line": 10},
-                            {"frame_id": 2, "path_str_id": 2, "func_str_id": 3,
-                             "line": 20, "end_line": 20},
-                            {"frame_id": 3, "path_str_id": 5, "func_str_id": 4,
-                             "line": 5, "end_line": 5}]}),
-            jsonl({"type": "agg", "v": 1, "run_id": run_id,
-                   "kind": "frame", "scope": "final", "samples_total": 3,
-                   "entries": [{"frame_id": 1, "self": 2, "cumulative": 2},
-                               {"frame_id": 2, "self": 0, "cumulative": 2},
-                               {"frame_id": 3, "self": 1, "cumulative": 1}]}),
-            jsonl({"type": "end", "v": 1, "run_id": run_id,
-                   "samples_total": 3}),
-        ]
-
-        for exp in expected:
-            self.assertIn(exp, lines)
+        self.assertEqual(
+            content,
+            (
+                '{"type":"meta","v":1,"run_id":"run-123","sample_interval_usec":1000}\n'
+                '{"type":"str_def","v":1,"run_id":"run-123","defs":[{"str_id":1,"value":"func1"},{"str_id":2,"value":"file.py"},{"str_id":3,"value":"func2"},{"str_id":4,"value":"other_func"},{"str_id":5,"value":"other.py"}]}\n'
+                '{"type":"frame_def","v":1,"run_id":"run-123","defs":[{"frame_id":1,"path_str_id":2,"func_str_id":1,"line":10,"end_line":10},{"frame_id":2,"path_str_id":2,"func_str_id":3,"line":20,"end_line":20},{"frame_id":3,"path_str_id":5,"func_str_id":4,"line":5,"end_line":5}]}\n'
+                '{"type":"agg","v":1,"run_id":"run-123","kind":"frame","scope":"final","samples_total":3,"entries":[{"frame_id":1,"self":2,"cumulative":2},{"frame_id":2,"self":0,"cumulative":2},{"frame_id":3,"self":1,"cumulative":1}]}\n'
+                '{"type":"end","v":1,"run_id":"run-123","samples_total":3}\n'
+            ),
+        )
+
+    def test_jsonl_collector_skip_idle_filters_threads(self):
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        active_status = THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU
+        frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [MockFrameInfo("active1.py", 10, "active_func1")],
+                        status=active_status,
+                    ),
+                    MockThreadInfo(
+                        2,
+                        [MockFrameInfo("idle.py", 20, "idle_func")],
+                        status=0,
+                    ),
+                    MockThreadInfo(
+                        3,
+                        [MockFrameInfo("active2.py", 30, "active_func2")],
+                        status=active_status,
+                    ),
+                ],
+            )
+        ]
+
+        def export_summary(skip_idle):
+            collector = JsonlCollector(1000, skip_idle=skip_idle)
+            collector.collect(frames)
+            collector.export(jsonl_out.name)
+
+            with open(jsonl_out.name, "r", encoding="utf-8") as f:
+                records = [json.loads(line) for line in f]
+
+            _, str_defs, frame_defs, agg_record, _ = _jsonl_tables(records)
+            paths = {str_defs[item["path_str_id"]] for item in frame_defs}
+            funcs = {str_defs[item["func_str_id"]] for item in frame_defs}
+            return paths, funcs, agg_record["samples_total"]
+
+        paths, funcs, samples_total = export_summary(skip_idle=True)
+        self.assertEqual(paths, {"active1.py", "active2.py"})
+        self.assertEqual(funcs, {"active_func1", "active_func2"})
+        self.assertEqual(samples_total, 2)
+
+        paths, funcs, samples_total = export_summary(skip_idle=False)
+        self.assertEqual(paths, {"active1.py", "idle.py", "active2.py"})
+        self.assertEqual(
+            funcs, {"active_func1", "idle_func", "active_func2"}
+        )
+        self.assertEqual(samples_total, 3)
 
 
 class TestRecursiveFunctionHandling(unittest.TestCase):
@@ -2160,7 +2205,6 @@ def test_jsonl_collector_with_location_info(self):
         self.addCleanup(close_and_unlink, jsonl_out)
 
         collector = JsonlCollector(sample_interval_usec=1000)
-        run_id = collector.run_id
 
         # Frame with LocationInfo
         frame = MockFrameInfo("test.py", 42, "my_function")
@@ -2171,38 +2215,28 @@ def test_jsonl_collector_with_location_info(self):
         ]
         collector.collect(frames)
 
-        # Should extract lineno from location
-        with captured_stdout(), captured_stderr():
-            collector.export(jsonl_out.name)
+        collector.export(jsonl_out.name)
 
-        # Check file contents
-        with open(jsonl_out.name, "r") as f:
-            content = f.read()
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
 
-        lines = content.strip().split("\n")
-        self.assertEqual(len(lines), 5)
-
-        def jsonl(obj):
-            return json.dumps(obj, separators=(",", ":"))
-
-        expected = [
-            jsonl({"type": "meta", "v": 1, "run_id": run_id,
-                   "sample_interval_usec": 1000}),
-            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
-                   "defs": [{"str_id": 1, "value": "my_function"},
-                            {"str_id": 2, "value": "test.py"}]}),
-            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
-                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
-                             "line": 42, "end_line": 42}]}),
-            jsonl({"type": "agg", "v": 1, "run_id": run_id,
-                   "kind": "frame", "scope": "final", "samples_total": 1,
-                   "entries": [{"frame_id": 1, "self": 1, "cumulative": 1}]}),
-            jsonl({"type": "end", "v": 1, "run_id": run_id,
-                   "samples_total": 1}),
-        ]
-
-        for exp in expected:
-            self.assertIn(exp, lines)
+        meta, str_defs, frame_defs, agg, end = _jsonl_tables(records)
+        self.assertEqual(meta["sample_interval_usec"], 1000)
+        self.assertEqual(agg["samples_total"], 1)
+        self.assertEqual(end["samples_total"], 1)
+        self.assertEqual(len(frame_defs), 1)
+        self.assertEqual(str_defs[frame_defs[0]["path_str_id"]], "test.py")
+        self.assertEqual(str_defs[frame_defs[0]["func_str_id"]], "my_function")
+        self.assertEqual(
+            frame_defs[0],
+            {
+                "frame_id": 1,
+                "path_str_id": frame_defs[0]["path_str_id"],
+                "func_str_id": frame_defs[0]["func_str_id"],
+                "line": 42,
+                "end_line": 42,
+            },
+        )
 
     def test_jsonl_collector_with_none_location(self):
         """Test JsonlCollector handles None location (synthetic frames)."""
@@ -2210,7 +2244,6 @@ def test_jsonl_collector_with_none_location(self):
         self.addCleanup(close_and_unlink, jsonl_out)
 
         collector = JsonlCollector(sample_interval_usec=1000)
-        run_id = collector.run_id
 
         # Create frame with None location (like GC frame)
         frame = MockFrameInfo("~", 0, "<GC>")
@@ -2223,38 +2256,28 @@ def test_jsonl_collector_with_none_location(self):
         ]
         collector.collect(frames)
 
-        # Should handle None location as synthetic frame
-        with captured_stdout(), captured_stderr():
-            collector.export(jsonl_out.name)
-
-        # Check file contents
-        with open(jsonl_out.name, "r") as f:
-            content = f.read()
-
-        lines = content.strip().split("\n")
-        self.assertEqual(len(lines), 5)
-
-        def jsonl(obj):
-            return json.dumps(obj, separators=(",", ":"))
+        collector.export(jsonl_out.name)
 
-        expected = [
-            jsonl({"type": "meta", "v": 1, "run_id": run_id,
-                   "sample_interval_usec": 1000}),
-            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
-                   "defs": [{"str_id": 1, "value": "<GC>"},
-                            {"str_id": 2, "value": "~"}]}),
-            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
-                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
-                             "line": 0, "synthetic": True}]}),
-            jsonl({"type": "agg", "v": 1, "run_id": run_id,
-                   "kind": "frame", "scope": "final", "samples_total": 1,
-                   "entries": [{"frame_id": 1, "self": 1, "cumulative": 1}]}),
-            jsonl({"type": "end", "v": 1, "run_id": run_id,
-                   "samples_total": 1}),
-        ]
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
 
-        for exp in expected:
-            self.assertIn(exp, lines)
+        meta, str_defs, frame_defs, agg, end = _jsonl_tables(records)
+        self.assertEqual(meta["sample_interval_usec"], 1000)
+        self.assertEqual(agg["samples_total"], 1)
+        self.assertEqual(end["samples_total"], 1)
+        self.assertEqual(len(frame_defs), 1)
+        self.assertEqual(str_defs[frame_defs[0]["path_str_id"]], "~")
+        self.assertEqual(str_defs[frame_defs[0]["func_str_id"]], "<GC>")
+        self.assertEqual(
+            frame_defs[0],
+            {
+                "frame_id": 1,
+                "path_str_id": frame_defs[0]["path_str_id"],
+                "func_str_id": frame_defs[0]["func_str_id"],
+                "line": 0,
+                "synthetic": True,
+            },
+        )
 
 
 class TestOpcodeHandling(unittest.TestCase):
@@ -2488,18 +2511,7 @@ def test_jsonl_collector_frame_format(self):
         with open(f.name, "r", encoding="utf-8") as fp:
             records = [json.loads(line) for line in fp]
 
-        str_defs = {
-            item["str_id"]: item["value"]
-            for record in records
-            if record["type"] == "str_def"
-            for item in record["defs"]
-        }
-        frame_defs = [
-            item
-            for record in records
-            if record["type"] == "frame_def"
-            for item in record["defs"]
-        ]
+        _, str_defs, frame_defs, _, _ = _jsonl_tables(records)
 
         self.assertEqual(len(frame_defs), 3)
 
@@ -2666,18 +2678,7 @@ def test_jsonl_collector_filters_internal_frames(self):
         with open(jsonl_out.name, "r", encoding="utf-8") as f:
             records = [json.loads(line) for line in f]
 
-        str_defs = {
-            item["str_id"]: item["value"]
-            for record in records
-            if record["type"] == "str_def"
-            for item in record["defs"]
-        }
-        frame_defs = [
-            item
-            for record in records
-            if record["type"] == "frame_def"
-            for item in record["defs"]
-        ]
+        _, str_defs, frame_defs, _, _ = _jsonl_tables(records)
 
         paths = {str_defs[item["path_str_id"]] for item in frame_defs}
 
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
index 2bac26c37091b0..6cd636593e3db1 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
@@ -9,7 +9,6 @@
     import profiling.sampling
     import profiling.sampling.sample
     from profiling.sampling.pstats_collector import PstatsCollector
-    from profiling.sampling.jsonl_collector import JsonlCollector
     from profiling.sampling.cli import main, _parse_mode
     from profiling.sampling.constants import PROFILING_MODE_EXCEPTION
     from _remote_debugging import (
@@ -21,13 +20,9 @@
         "Test only runs when _remote_debugging is available"
     )
 
-from test.support import (
-    captured_stdout,
-    captured_stderr,
-    requires_remote_subprocess_debugging,
-)
+from test.support import requires_remote_subprocess_debugging
 
-from .helpers import close_and_unlink, test_subprocess
+from .helpers import test_subprocess
 from .mocks import MockFrameInfo, MockInterpreterInfo
 
 
@@ -233,145 +228,6 @@ def test_cpu_mode_with_no_samples(self):
         self.assertIn("No samples were collected", output)
         self.assertIn("CPU mode", output)
 
-    def test_jsonl_collector_respects_skip_idle(self):
-        """Test that frames are actually filtered when skip_idle=True."""
-        import tempfile
-        import json
-
-        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
-        self.addCleanup(close_and_unlink, jsonl_out)
-
-        # Create mock frames with different thread statuses
-        class MockThreadInfoWithStatus:
-            def __init__(self, thread_id, frame_info, status):
-                self.thread_id = thread_id
-                self.frame_info = frame_info
-                self.status = status
-
-        # Create test data: active thread (HAS_GIL | ON_CPU), idle thread (neither), and another active thread
-        ACTIVE_STATUS = (
-            THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU
-        )  # Has GIL and on CPU
-        IDLE_STATUS = 0  # Neither has GIL nor on CPU
-
-        test_frames = [
-            MockInterpreterInfo(
-                0,
-                [
-                    MockThreadInfoWithStatus(
-                        1,
-                        [MockFrameInfo("active1.py", 10, "active_func1")],
-                        ACTIVE_STATUS,
-                    ),
-                    MockThreadInfoWithStatus(
-                        2,
-                        [MockFrameInfo("idle.py", 20, "idle_func")],
-                        IDLE_STATUS,
-                    ),
-                    MockThreadInfoWithStatus(
-                        3,
-                        [MockFrameInfo("active2.py", 30, "active_func2")],
-                        ACTIVE_STATUS,
-                    ),
-                ],
-            )
-        ]
-
-        # Test with skip_idle=True - should only process running threads
-        collector_skip = JsonlCollector(
-            sample_interval_usec=1000, skip_idle=True
-        )
-        collector_skip.collect(test_frames)
-
-        run_id = collector_skip.run_id
-
-        # Should only have functions from running threads (status 0)
-        with captured_stdout(), captured_stderr():
-            collector_skip.export(jsonl_out.name)
-
-        # Check file contents
-        with open(jsonl_out.name, "r") as f:
-            content = f.read()
-
-        lines = content.strip().split("\n")
-        self.assertEqual(len(lines), 5)
-
-        def jsonl(obj):
-            return json.dumps(obj, separators=(",", ":"))
-
-        expected = [
-            jsonl({"type": "meta", "v": 1, "run_id": run_id,
-                   "sample_interval_usec": 1000}),
-            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
-                   "defs": [{"str_id": 1, "value": "active_func1"},
-                            {"str_id": 2, "value": "active1.py"},
-                            {"str_id": 3, "value": "active_func2"},
-                            {"str_id": 4, "value": "active2.py"}]}),
-            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
-                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
-                             "line": 10, "end_line": 10},
-                            {"frame_id": 2, "path_str_id": 4, "func_str_id": 3,
-                             "line": 30, "end_line": 30}]}),
-            jsonl({"type": "agg", "v": 1, "run_id": run_id,
-                   "kind": "frame", "scope": "final", "samples_total": 2,
-                   "entries": [{"frame_id": 1, "self": 1, "cumulative": 1},
-                               {"frame_id": 2, "self": 1, "cumulative": 1}]}),
-            jsonl({"type": "end", "v": 1, "run_id": run_id,
-                   "samples_total": 2}),
-        ]
-
-        for exp in expected:
-            self.assertIn(exp, lines)
-
-        # Test with skip_idle=False - should process all threads
-        collector_no_skip = JsonlCollector(
-            sample_interval_usec=1000, skip_idle=False
-        )
-        collector_no_skip.collect(test_frames)
-
-        run_id = collector_no_skip.run_id
-
-        # Should have functions from all threads
-        with captured_stdout(), captured_stderr():
-            collector_no_skip.export(jsonl_out.name)
-
-        # Check file contents
-        with open(jsonl_out.name, "r") as f:
-            content = f.read()
-
-        lines = content.strip().split("\n")
-        self.assertEqual(len(lines), 5)
-
-        expected = [
-            jsonl({"type": "meta", "v": 1, "run_id": run_id,
-                   "sample_interval_usec": 1000}),
-            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
-                   "defs": [{"str_id": 1, "value": "active_func1"},
-                            {"str_id": 2, "value": "active1.py"},
-                            {"str_id": 3, "value": "idle_func"},
-                            {"str_id": 4, "value": "idle.py"},
-                            {"str_id": 5, "value": "active_func2"},
-                            {"str_id": 6, "value": "active2.py"}]}),
-            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
-                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
-                             "line": 10, "end_line": 10},
-                            {"frame_id": 2, "path_str_id": 4, "func_str_id": 3,
-                             "line": 20, "end_line": 20},
-                            {"frame_id": 3, "path_str_id": 6, "func_str_id": 5,
-                             "line": 30, "end_line": 30}]}),
-            jsonl({"type": "agg", "v": 1, "run_id": run_id,
-                   "kind": "frame", "scope": "final", "samples_total": 3,
-                   "entries": [{"frame_id": 1, "self": 1, "cumulative": 1},
-                               {"frame_id": 2, "self": 1, "cumulative": 1},
-                               {"frame_id": 3, "self": 1, "cumulative": 1}]}),
-            jsonl({"type": "end", "v": 1, "run_id": run_id,
-                   "samples_total": 3}),
-        ]
-
-        for exp in expected:
-            self.assertIn(exp, lines)
-
-
 @requires_remote_subprocess_debugging()
 class TestGilModeFiltering(unittest.TestCase):
     """Test GIL mode filtering functionality (--mode=gil)."""

From e14f6f17a1c2062519db71f8d21ffda159c28a41 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 15:31:15 +0200
Subject: [PATCH 25/41] reorder

---
 Lib/profiling/sampling/jsonl_collector.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 6c8f2bc2fd3135..372205a566afc6 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -116,6 +116,15 @@ def _build_end_record(self):
 
         return record
 
+    def _iter_final_agg_entries(self):
+        for frame_record in self._frames:
+            frame_id = frame_record["frame_id"]
+            yield {
+                "frame_id": frame_id,
+                "self": self._frame_self[frame_id],
+                "cumulative": self._frame_cumulative[frame_id],
+            }
+
     def _get_or_create_frame_id(self, filename, location, funcname):
         synthetic = location is None
         location_fields = self._location_to_export_fields(location)
@@ -175,15 +184,6 @@ def _location_to_export_fields(location):
             fields["end_col"] = end_col_offset
         return fields
 
-    def _iter_final_agg_entries(self):
-        for frame_record in self._frames:
-            frame_id = frame_record["frame_id"]
-            yield {
-                "frame_id": frame_id,
-                "self": self._frame_self[frame_id],
-                "cumulative": self._frame_cumulative[frame_id],
-            }
-
     def _write_chunked_records(
         self, output, base_record, chunk_field, entries
     ):

From cf6aa9e296d42b19ba496bae23ee44b4e26a077f Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 15:46:44 +0200
Subject: [PATCH 26/41] eh, just copy from heatmap

---
 Lib/profiling/sampling/jsonl_collector.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 372205a566afc6..146075b00b121c 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -43,6 +43,7 @@ def __init__(self, sample_interval_usec, *, skip_idle=False, mode=None):
         self._frame_self = Counter()
         self._frame_cumulative = Counter()
         self._samples_total = 0
+        self._seen_frame_ids = set()
 
         self._mode = mode
 
@@ -51,17 +52,21 @@ def process_frames(self, frames, _thread_id, weight=1):
             return
 
         self._samples_total += weight
+        self._seen_frame_ids.clear()
 
-        frame_ids = [
-            self._get_or_create_frame_id(filename, location, funcname)
-            for filename, location, funcname, _opcode in frames
-        ]
-        leaf_frame_id = frame_ids[0]
+        for i, (filename, location, funcname, _opcode) in enumerate(frames):
+            frame_id = self._get_or_create_frame_id(filename, location, funcname)
+            is_leaf = (i == 0)
+            count_cumulative = frame_id not in self._seen_frame_ids
 
-        self._frame_self[leaf_frame_id] += weight
+            if count_cumulative:
+                self._seen_frame_ids.add(frame_id)
 
-        for frame_id in set(frame_ids):
-            self._frame_cumulative[frame_id] += weight
+            if is_leaf:
+                self._frame_self[frame_id] += weight
+
+            if count_cumulative:
+                self._frame_cumulative[frame_id] += weight
 
     def export(self, filename):
         with open(filename, "w", encoding="utf-8") as output:

From 1f4c7660eabae2dbc8b07e72cbf3e71f47006deb Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 15:54:14 +0200
Subject: [PATCH 27/41] smaller chunk; matter of taste

---
 Lib/profiling/sampling/jsonl_collector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 146075b00b121c..12d8e4b3e2e77b 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -16,7 +16,7 @@
 from .stack_collector import StackTraceCollector
 
 
-_CHUNK_SIZE = 1000
+_CHUNK_SIZE = 256
 
 _MODE_NAMES = {
     PROFILING_MODE_WALL: "wall",

From ba5712e2d47eb8b9ffaac180619a7b3dc3ccbd4b Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 15:54:27 +0200
Subject: [PATCH 28/41] test actual chunking

---
 .../test_sampling_profiler/test_collectors.py | 43 +++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index f1d005dff8c976..d34cb33e21b81b 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -1795,6 +1795,49 @@ def export_summary(skip_idle):
         )
         self.assertEqual(samples_total, 3)
 
+    def test_jsonl_collector_splits_large_exports_into_chunks(self):
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000)
+
+        for i in range(257):
+            collector.collect(
+                [
+                    MockInterpreterInfo(
+                        0,
+                        [
+                            MockThreadInfo(
+                                1,
+                                [MockFrameInfo(f"file{i}.py", i + 1, f"func{i}")],
+                            )
+                        ],
+                    )
+                ]
+            )
+
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        run_ids = {record["run_id"] for record in records}
+        self.assertEqual(len(run_ids), 1)
+        self.assertRegex(next(iter(run_ids)), r"^[0-9a-f]{32}$")
+
+        _, str_defs, frame_defs, agg_record, end_record = _jsonl_tables(records)
+        str_chunks = [record for record in records if record["type"] == "str_def"]
+        frame_chunks = [record for record in records if record["type"] == "frame_def"]
+        agg_chunks = [record for record in records if record["type"] == "agg"]
+
+        self.assertEqual([len(record["defs"]) for record in str_chunks], [256, 256, 2])
+        self.assertEqual([len(record["defs"]) for record in frame_chunks], [256, 1])
+        self.assertEqual([len(record["entries"]) for record in agg_chunks], [256, 1])
+        self.assertEqual(len(str_defs), 514)
+        self.assertEqual(len(frame_defs), 257)
+        self.assertEqual(agg_record["samples_total"], 257)
+        self.assertEqual(end_record["samples_total"], 257)
+
 
 class TestRecursiveFunctionHandling(unittest.TestCase):
     """Tests for correct handling of recursive functions in cumulative stats."""

From 3cacc309140da450a9963b7d77c8c0e7ebe55ca9 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 16:00:29 +0200
Subject: [PATCH 29/41] test edge cases

---
 .../test_sampling_profiler/test_collectors.py | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index d34cb33e21b81b..8d80ad16982d7d 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -1742,6 +1742,80 @@ def test_jsonl_collector_export_exact_output(self):
             ),
         )
 
+    def test_jsonl_collector_export_includes_mode_in_meta(self):
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000, mode=PROFILING_MODE_CPU)
+        collector.collect(
+            [
+                MockInterpreterInfo(
+                    0,
+                    [MockThreadInfo(1, [MockFrameInfo("file.py", 10, "func")])],
+                )
+            ]
+        )
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        meta_record = next(record for record in records if record["type"] == "meta")
+        self.assertEqual(meta_record["mode"], "cpu")
+
+    def test_jsonl_collector_export_empty_profile(self):
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000)
+        collector.run_id = "run-123"
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        self.assertEqual([record["type"] for record in records], ["meta", "end"])
+        self.assertEqual(records[0]["sample_interval_usec"], 1000)
+        self.assertEqual(records[0]["run_id"], "run-123")
+        self.assertEqual(records[1]["samples_total"], 0)
+        self.assertEqual(records[1]["run_id"], "run-123")
+
+    def test_jsonl_collector_recursive_frames_counted_once_per_sample(self):
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000)
+        collector.collect(
+            [
+                MockInterpreterInfo(
+                    0,
+                    [
+                        MockThreadInfo(
+                            1,
+                            [
+                                MockFrameInfo("recursive.py", 10, "recursive_func"),
+                                MockFrameInfo("recursive.py", 10, "recursive_func"),
+                                MockFrameInfo("recursive.py", 10, "recursive_func"),
+                            ],
+                        )
+                    ],
+                )
+            ]
+        )
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        _, _, frame_defs, agg_record, end_record = _jsonl_tables(records)
+        self.assertEqual(len(frame_defs), 1)
+        self.assertEqual(
+            agg_record["entries"],
+            [{"frame_id": frame_defs[0]["frame_id"], "self": 1, "cumulative": 1}],
+        )
+        self.assertEqual(agg_record["samples_total"], 1)
+        self.assertEqual(end_record["samples_total"], 1)
+
     def test_jsonl_collector_skip_idle_filters_threads(self):
         jsonl_out = tempfile.NamedTemporaryFile(delete=False)
         self.addCleanup(close_and_unlink, jsonl_out)

From 4d48f5838dc826c93682683a3799d96504810090 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 16:05:30 +0200
Subject: [PATCH 30/41] ruff

---
 Lib/profiling/sampling/jsonl_collector.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 12d8e4b3e2e77b..a1d37df85c2672 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -55,8 +55,10 @@ def process_frames(self, frames, _thread_id, weight=1):
         self._seen_frame_ids.clear()
 
         for i, (filename, location, funcname, _opcode) in enumerate(frames):
-            frame_id = self._get_or_create_frame_id(filename, location, funcname)
-            is_leaf = (i == 0)
+            frame_id = self._get_or_create_frame_id(
+                filename, location, funcname
+            )
+            is_leaf = i == 0
             count_cumulative = frame_id not in self._seen_frame_ids
 
             if count_cumulative:

From 3ea253bba7bcb0b3cc0cc607a817873375e233d0 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 16:16:37 +0200
Subject: [PATCH 31/41] match pep8

---
 Lib/test/test_profiling/test_sampling_profiler/test_modes.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
index 6cd636593e3db1..0b38fb4ad4bcf6 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
@@ -228,6 +228,7 @@ def test_cpu_mode_with_no_samples(self):
         self.assertIn("No samples were collected", output)
         self.assertIn("CPU mode", output)
 
+
 @requires_remote_subprocess_debugging()
 class TestGilModeFiltering(unittest.TestCase):
     """Test GIL mode filtering functionality (--mode=gil)."""

From 308ca86e221d7ab1a289a490bbe341e89a98f051 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 16:44:27 +0200
Subject: [PATCH 32/41] style

---
 Lib/profiling/sampling/binary_reader.py       |   2 +-
 .../test_sampling_profiler/test_collectors.py | 124 +++++++++++++-----
 2 files changed, 94 insertions(+), 32 deletions(-)

diff --git a/Lib/profiling/sampling/binary_reader.py b/Lib/profiling/sampling/binary_reader.py
index 8d1d8eef9155eb..a29dad91ae339d 100644
--- a/Lib/profiling/sampling/binary_reader.py
+++ b/Lib/profiling/sampling/binary_reader.py
@@ -118,7 +118,7 @@ def convert_binary_to_format(input_file, output_file, output_format,
             collector = PstatsCollector(interval)
         elif output_format == 'gecko':
             collector = GeckoCollector(interval)
-        elif output_format == 'jsonl':
+        elif output_format == "jsonl":
             collector = JsonlCollector(interval)
         else:
             raise ValueError(f"Unknown output format: {output_format}")
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 8d80ad16982d7d..b6d09301240e65 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -1701,7 +1701,11 @@ def test_jsonl_collector_export_exact_output(self):
                 0,
                 [
                     MockThreadInfo(
-                        1, [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")]
+                        1,
+                        [
+                            MockFrameInfo("file.py", 10, "func1"),
+                            MockFrameInfo("file.py", 20, "func2"),
+                        ],
                     )
                 ],
             )
@@ -1711,14 +1715,23 @@ def test_jsonl_collector_export_exact_output(self):
                 0,
                 [
                     MockThreadInfo(
-                        1, [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")]
+                        1,
+                        [
+                            MockFrameInfo("file.py", 10, "func1"),
+                            MockFrameInfo("file.py", 20, "func2"),
+                        ],
                     )
                 ],
             )
         ]  # Same stack
         test_frames3 = [
             MockInterpreterInfo(
-                0, [MockThreadInfo(1, [MockFrameInfo("other.py", 5, "other_func")])]
+                0,
+                [
+                    MockThreadInfo(
+                        1, [MockFrameInfo("other.py", 5, "other_func")]
+                    )
+                ],
             )
         ]
 
@@ -1751,7 +1764,11 @@ def test_jsonl_collector_export_includes_mode_in_meta(self):
             [
                 MockInterpreterInfo(
                     0,
-                    [MockThreadInfo(1, [MockFrameInfo("file.py", 10, "func")])],
+                    [
+                        MockThreadInfo(
+                            1, [MockFrameInfo("file.py", 10, "func")]
+                        )
+                    ],
                 )
             ]
         )
@@ -1760,7 +1777,9 @@ def test_jsonl_collector_export_includes_mode_in_meta(self):
         with open(jsonl_out.name, "r", encoding="utf-8") as f:
             records = [json.loads(line) for line in f]
 
-        meta_record = next(record for record in records if record["type"] == "meta")
+        meta_record = next(
+            record for record in records if record["type"] == "meta"
+        )
         self.assertEqual(meta_record["mode"], "cpu")
 
     def test_jsonl_collector_export_empty_profile(self):
@@ -1774,7 +1793,9 @@ def test_jsonl_collector_export_empty_profile(self):
         with open(jsonl_out.name, "r", encoding="utf-8") as f:
             records = [json.loads(line) for line in f]
 
-        self.assertEqual([record["type"] for record in records], ["meta", "end"])
+        self.assertEqual(
+            [record["type"] for record in records], ["meta", "end"]
+        )
         self.assertEqual(records[0]["sample_interval_usec"], 1000)
         self.assertEqual(records[0]["run_id"], "run-123")
         self.assertEqual(records[1]["samples_total"], 0)
@@ -1793,9 +1814,15 @@ def test_jsonl_collector_recursive_frames_counted_once_per_sample(self):
                         MockThreadInfo(
                             1,
                             [
-                                MockFrameInfo("recursive.py", 10, "recursive_func"),
-                                MockFrameInfo("recursive.py", 10, "recursive_func"),
-                                MockFrameInfo("recursive.py", 10, "recursive_func"),
+                                MockFrameInfo(
+                                    "recursive.py", 10, "recursive_func"
+                                ),
+                                MockFrameInfo(
+                                    "recursive.py", 10, "recursive_func"
+                                ),
+                                MockFrameInfo(
+                                    "recursive.py", 10, "recursive_func"
+                                ),
                             ],
                         )
                     ],
@@ -1811,7 +1838,13 @@ def test_jsonl_collector_recursive_frames_counted_once_per_sample(self):
         self.assertEqual(len(frame_defs), 1)
         self.assertEqual(
             agg_record["entries"],
-            [{"frame_id": frame_defs[0]["frame_id"], "self": 1, "cumulative": 1}],
+            [
+                {
+                    "frame_id": frame_defs[0]["frame_id"],
+                    "self": 1,
+                    "cumulative": 1,
+                }
+            ],
         )
         self.assertEqual(agg_record["samples_total"], 1)
         self.assertEqual(end_record["samples_total"], 1)
@@ -1864,9 +1897,7 @@ def export_summary(skip_idle):
 
         paths, funcs, samples_total = export_summary(skip_idle=False)
         self.assertEqual(paths, {"active1.py", "idle.py", "active2.py"})
-        self.assertEqual(
-            funcs, {"active_func1", "idle_func", "active_func2"}
-        )
+        self.assertEqual(funcs, {"active_func1", "idle_func", "active_func2"})
         self.assertEqual(samples_total, 3)
 
     def test_jsonl_collector_splits_large_exports_into_chunks(self):
@@ -1883,7 +1914,11 @@ def test_jsonl_collector_splits_large_exports_into_chunks(self):
                         [
                             MockThreadInfo(
                                 1,
-                                [MockFrameInfo(f"file{i}.py", i + 1, f"func{i}")],
+                                [
+                                    MockFrameInfo(
+                                        f"file{i}.py", i + 1, f"func{i}"
+                                    )
+                                ],
                             )
                         ],
                     )
@@ -1899,14 +1934,26 @@ def test_jsonl_collector_splits_large_exports_into_chunks(self):
         self.assertEqual(len(run_ids), 1)
         self.assertRegex(next(iter(run_ids)), r"^[0-9a-f]{32}$")
 
-        _, str_defs, frame_defs, agg_record, end_record = _jsonl_tables(records)
-        str_chunks = [record for record in records if record["type"] == "str_def"]
-        frame_chunks = [record for record in records if record["type"] == "frame_def"]
+        _, str_defs, frame_defs, agg_record, end_record = _jsonl_tables(
+            records
+        )
+        str_chunks = [
+            record for record in records if record["type"] == "str_def"
+        ]
+        frame_chunks = [
+            record for record in records if record["type"] == "frame_def"
+        ]
         agg_chunks = [record for record in records if record["type"] == "agg"]
 
-        self.assertEqual([len(record["defs"]) for record in str_chunks], [256, 256, 2])
-        self.assertEqual([len(record["defs"]) for record in frame_chunks], [256, 1])
-        self.assertEqual([len(record["entries"]) for record in agg_chunks], [256, 1])
+        self.assertEqual(
+            [len(record["defs"]) for record in str_chunks], [256, 256, 2]
+        )
+        self.assertEqual(
+            [len(record["defs"]) for record in frame_chunks], [256, 1]
+        )
+        self.assertEqual(
+            [len(record["entries"]) for record in agg_chunks], [256, 1]
+        )
         self.assertEqual(len(str_defs), 514)
         self.assertEqual(len(frame_defs), 257)
         self.assertEqual(agg_record["samples_total"], 257)
@@ -2075,7 +2122,9 @@ def test_pstats_collector_cumulative_percentage_cannot_exceed_100(self):
         cumulative_calls = stats[1]
         self.assertEqual(cumulative_calls, 10)
 
-    def test_pstats_collector_different_lines_same_function_counted_separately(self):
+    def test_pstats_collector_different_lines_same_function_counted_separately(
+        self,
+    ):
         """Test that different line numbers in same function are tracked separately."""
         collector = PstatsCollector(sample_interval_usec=1000)
 
@@ -2282,8 +2331,7 @@ def test_flamegraph_collector_with_location_info(self):
         frame = MockFrameInfo("app.py", 100, "process_data")
         frames = [
             MockInterpreterInfo(
-                0,
-                [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+                0, [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
             )
         ]
         collector.collect(frames)
@@ -2291,8 +2339,15 @@ def test_flamegraph_collector_with_location_info(self):
         data = collector._convert_to_flamegraph_format()
         # Verify the function name includes lineno from location
         strings = data.get("strings", [])
-        name_found = any("process_data" in s and "100" in s for s in strings if isinstance(s, str))
-        self.assertTrue(name_found, f"Expected to find 'process_data' with line 100 in {strings}")
+        name_found = any(
+            "process_data" in s and "100" in s
+            for s in strings
+            if isinstance(s, str)
+        )
+        self.assertTrue(
+            name_found,
+            f"Expected to find 'process_data' with line 100 in {strings}",
+        )
 
     def test_gecko_collector_with_location_info(self):
         """Test GeckoCollector handles LocationInfo properly."""
@@ -2301,8 +2356,7 @@ def test_gecko_collector_with_location_info(self):
         frame = MockFrameInfo("server.py", 50, "handle_request")
         frames = [
             MockInterpreterInfo(
-                0,
-                [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+                0, [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
             )
         ]
         collector.collect(frames)
@@ -2565,8 +2619,12 @@ def _make_sample_frames(self):
                         1,
                         [
                             MockFrameInfo("app.py", 100, "main", opcode=90),
-                            MockFrameInfo("utils.py", 50, "helper", opcode=100),
-                            MockFrameInfo("lib.py", 25, "process", opcode=None),
+                            MockFrameInfo(
+                                "utils.py", 50, "helper", opcode=100
+                            ),
+                            MockFrameInfo(
+                                "lib.py", 25, "process", opcode=None
+                            ),
                         ],
                         status=THREAD_STATUS_HAS_GIL,
                     )
@@ -2724,7 +2782,9 @@ def test_flamegraph_collector_filters_internal_frames(self):
                         1,
                         [
                             MockFrameInfo("app.py", 50, "run"),
-                            MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"),
+                            MockFrameInfo(
+                                "/lib/_sync_coordinator.py", 100, "main"
+                            ),
                             MockFrameInfo("<frozen runpy>", 87, "_run_code"),
                         ],
                         status=THREAD_STATUS_HAS_GIL,
@@ -2752,7 +2812,9 @@ def test_collapsed_stack_collector_filters_internal_frames(self):
                         1,
                         [
                             MockFrameInfo("app.py", 50, "run"),
-                            MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"),
+                            MockFrameInfo(
+                                "/lib/_sync_coordinator.py", 100, "main"
+                            ),
                         ],
                         status=THREAD_STATUS_HAS_GIL,
                     )

From 0db38a1bbe2f11702638c78f50a97c59efb5b68a Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 16:49:26 +0200
Subject: [PATCH 33/41] too defensive

---
 Lib/profiling/sampling/jsonl_collector.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index a1d37df85c2672..187c4175da6816 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -48,9 +48,6 @@ def __init__(self, sample_interval_usec, *, skip_idle=False, mode=None):
         self._mode = mode
 
     def process_frames(self, frames, _thread_id, weight=1):
-        if not frames:
-            return
-
         self._samples_total += weight
         self._seen_frame_ids.clear()
 

From 4c768b4f135cfc07549d30220c6a04e160a04a2f Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 17:16:15 +0200
Subject: [PATCH 34/41] too many style changes

---
 .../test_sampling_profiler/test_collectors.py | 21 +++++++------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index b6d09301240e65..3134bc1a946f9e 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2122,9 +2122,7 @@ def test_pstats_collector_cumulative_percentage_cannot_exceed_100(self):
         cumulative_calls = stats[1]
         self.assertEqual(cumulative_calls, 10)
 
-    def test_pstats_collector_different_lines_same_function_counted_separately(
-        self,
-    ):
+    def test_pstats_collector_different_lines_same_function_counted_separately(self):
         """Test that different line numbers in same function are tracked separately."""
         collector = PstatsCollector(sample_interval_usec=1000)
 
@@ -2331,7 +2329,8 @@ def test_flamegraph_collector_with_location_info(self):
         frame = MockFrameInfo("app.py", 100, "process_data")
         frames = [
             MockInterpreterInfo(
-                0, [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+                0,
+                [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
             )
         ]
         collector.collect(frames)
@@ -2339,15 +2338,8 @@ def test_flamegraph_collector_with_location_info(self):
         data = collector._convert_to_flamegraph_format()
         # Verify the function name includes lineno from location
         strings = data.get("strings", [])
-        name_found = any(
-            "process_data" in s and "100" in s
-            for s in strings
-            if isinstance(s, str)
-        )
-        self.assertTrue(
-            name_found,
-            f"Expected to find 'process_data' with line 100 in {strings}",
-        )
+        name_found = any("process_data" in s and "100" in s for s in strings if isinstance(s, str))
+        self.assertTrue(name_found, f"Expected to find 'process_data' with line 100 in {strings}")
 
     def test_gecko_collector_with_location_info(self):
         """Test GeckoCollector handles LocationInfo properly."""
@@ -2356,7 +2348,8 @@ def test_gecko_collector_with_location_info(self):
         frame = MockFrameInfo("server.py", 50, "handle_request")
         frames = [
             MockInterpreterInfo(
-                0, [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+                0,
+                [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
             )
         ]
         collector.collect(frames)

From 5e86f4f8e7cbeeaaf139347a99f0df60d649277e Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 17:18:14 +0200
Subject: [PATCH 35/41] less style

---
 .../test_sampling_profiler/test_collectors.py        | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 3134bc1a946f9e..1bb9884c1e0ed8 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2612,12 +2612,8 @@ def _make_sample_frames(self):
                         1,
                         [
                             MockFrameInfo("app.py", 100, "main", opcode=90),
-                            MockFrameInfo(
-                                "utils.py", 50, "helper", opcode=100
-                            ),
-                            MockFrameInfo(
-                                "lib.py", 25, "process", opcode=None
-                            ),
+                            MockFrameInfo("utils.py", 50, "helper", opcode=100),
+                            MockFrameInfo("lib.py", 25, "process", opcode=None),
                         ],
                         status=THREAD_STATUS_HAS_GIL,
                     )
@@ -2805,9 +2801,7 @@ def test_collapsed_stack_collector_filters_internal_frames(self):
                         1,
                         [
                             MockFrameInfo("app.py", 50, "run"),
-                            MockFrameInfo(
-                                "/lib/_sync_coordinator.py", 100, "main"
-                            ),
+                            MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"),
                         ],
                         status=THREAD_STATUS_HAS_GIL,
                     )

From 25eb558e98a16ca35a0e66e89219d601200955b7 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 17:20:22 +0200
Subject: [PATCH 36/41] ha! even less style...

---
 .../test_profiling/test_sampling_profiler/test_collectors.py  | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 1bb9884c1e0ed8..51c2eb73a20764 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2771,9 +2771,7 @@ def test_flamegraph_collector_filters_internal_frames(self):
                         1,
                         [
                             MockFrameInfo("app.py", 50, "run"),
-                            MockFrameInfo(
-                                "/lib/_sync_coordinator.py", 100, "main"
-                            ),
+                            MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"),
                             MockFrameInfo("<frozen runpy>", 87, "_run_code"),
                         ],
                         status=THREAD_STATUS_HAS_GIL,

From d25b4d506b9c93123389ba9b7c65fdb5e673a84f Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 17:33:18 +0200
Subject: [PATCH 37/41] news

---
 .../Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst     | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst

diff --git a/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst b/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
new file mode 100644
index 00000000000000..d2d7e0d98d158b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
@@ -0,0 +1,3 @@
+The ``profiling.sampling`` module now supports JSONL output format via
+`--jsonl`. Each run emits newline-delimited JSON records suitable for
+streaming or agents.

From 0c0089afa6f506242ff9d5699466181714e2fbc9 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 17:33:34 +0200
Subject: [PATCH 38/41] news: proper formatting

---
 .../next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst b/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
index d2d7e0d98d158b..d270cc14288d8a 100644
--- a/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
+++ b/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
@@ -1,3 +1,3 @@
 The ``profiling.sampling`` module now supports JSONL output format via
-`--jsonl`. Each run emits newline-delimited JSON records suitable for
+``--jsonl``. Each run emits newline-delimited JSON records suitable for
 streaming or agents.

From 5690ddf8354667aa904e6647faef07ca722afbb0 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Sat, 4 Apr 2026 14:42:54 +0200
Subject: [PATCH 39/41] claim credit!

---
 .../next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst b/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
index d270cc14288d8a..f75102eeae16e6 100644
--- a/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
+++ b/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
@@ -1,3 +1,3 @@
 The ``profiling.sampling`` module now supports JSONL output format via
 ``--jsonl``. Each run emits newline-delimited JSON records suitable for
-streaming or agents.
+streaming or agents. Patch by Maurycy Pawłowski-Wieroński.

From 8e1d83bb97942c7b8c0087847eb6e3714c6b7741 Mon Sep 17 00:00:00 2001
From: Pablo Galindo Salgado <pablogsal@gmail.com>
Date: Mon, 4 May 2026 23:51:03 +0100
Subject: [PATCH 40/41] fixup! claim credit!

---
 Lib/profiling/sampling/cli.py                 |   2 +-
 Lib/profiling/sampling/collector.py           |   5 +-
 Lib/profiling/sampling/jsonl_collector.py     |  75 +++++++-
 .../test_sampling_profiler/helpers.py         |  26 +++
 .../test_binary_format.py                     |  70 +++++++-
 .../test_sampling_profiler/test_cli.py        |  77 ++++++++-
 .../test_sampling_profiler/test_collectors.py | 160 +++++++++++++++---
 ...-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst |   5 +-
 Modules/_remote_debugging/binary_io_reader.c  |   6 +-
 9 files changed, 387 insertions(+), 39 deletions(-)

diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py
index ccefd2402edc8e..0648713edc52af 100644
--- a/Lib/profiling/sampling/cli.py
+++ b/Lib/profiling/sampling/cli.py
@@ -496,7 +496,7 @@ def _add_format_options(parser, include_compression=True, include_binary=True):
         action="store_const",
         const="jsonl",
         dest="format",
-        help="Generate JSONL snapshot output for external consumers",
+        help="Generate newline-delimited JSON (JSONL) for programmatic consumers",
     )
     if include_binary:
         format_group.add_argument(
diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index ad5be46821c096..81ec6344ebdea4 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -37,13 +37,16 @@ def extract_lineno(location):
     """Extract lineno from location.
 
     Args:
-        location: tuple (lineno, end_lineno, col_offset, end_col_offset) or None
+        location: tuple (lineno, end_lineno, col_offset, end_col_offset),
+            an integer line number, or None
 
     Returns:
         int: The line number (0 for synthetic frames)
     """
     if location is None:
         return 0
+    if isinstance(location, int):
+        return location
     return location[0]
 
 def _is_internal_frame(frame):
diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 187c4175da6816..bcb98e75d91164 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -1,4 +1,70 @@
-"""JSONL collector."""
+"""JSON Lines (JSONL) collector for the sampling profiler.
+
+Emits a normalized newline-delimited JSON record stream suitable for
+programmatic consumption by external tools, scripts, and agents. Each line
+is one JSON object; consumers can parse the file incrementally line by
+line, but the producer writes the whole file at the end of the run (it is
+not a live/streaming producer).
+
+Record schema
+=============
+
+Every record is a JSON object with at least ``"type"``, ``"v"`` (record
+schema version), and ``"run_id"`` (UUID4 hex tagging the run; allows
+demultiplexing concatenated streams). Records appear in this fixed order:
+
+1. ``meta`` (exactly one, first line)::
+
+      {"type":"meta","v":1,"run_id":"<hex>",
+       "sample_interval_usec":<int>,"mode":"wall|cpu|gil|all|exception"}
+
+   ``mode`` is omitted when not provided.
+
+2. ``str_def`` (zero or more)::
+
+      {"type":"str_def","v":1,"run_id":"<hex>",
+       "defs":[{"str_id":<int>,"value":"<str>"}, ...]}
+
+   Strings (filenames, function names) are interned to keep repeated values
+   compact. Each chunk holds up to ``_CHUNK_SIZE`` entries.
+
+3. ``frame_def`` (zero or more)::
+
+      {"type":"frame_def","v":1,"run_id":"<hex>",
+       "defs":[{"frame_id":<int>,"path_str_id":<int>,"func_str_id":<int>,
+                "line":<int>,"end_line":<int>,"col":<int>,"end_col":<int>,
+                "synthetic":true}, ...]}
+
+   ``end_line``/``col``/``end_col`` are *omitted* when source location data
+   is unavailable (a missing key means "not available", not zero or null).
+   ``synthetic`` is present only on synthetic frames (for example, internal
+   marker frames whose source location is None) and absent otherwise.
+
+4. ``agg`` (zero or more)::
+
+      {"type":"agg","v":1,"run_id":"<hex>","kind":"frame","scope":"final",
+       "samples_total":<int>,
+       "entries":[{"frame_id":<int>,"self":<int>,"cumulative":<int>}, ...]}
+
+   ``self`` counts samples where the frame was the leaf (currently
+   executing); ``cumulative`` counts samples where the frame appeared
+   anywhere in the stack (deduped per sample so recursion does not
+   double-count). ``samples_total`` is the run-wide total, repeated on
+   each chunk so a streaming consumer always knows the denominator.
+
+5. ``end`` (exactly one, last line)::
+
+      {"type":"end","v":1,"run_id":"<hex>","samples_total":<int>}
+
+   Presence of ``end`` is the consumer's signal that the file is complete.
+
+Forward compatibility
+=====================
+
+Consumers MUST ignore unknown record ``"type"`` values and unknown object
+fields. New fields will be added by adding optional keys; an incompatible
+schema change will bump the per-record ``"v"``.
+"""
 
 from collections import Counter
 import json
@@ -28,7 +94,12 @@
 
 
 class JsonlCollector(StackTraceCollector):
-    """Collector that exports finalized profiling data as JSONL."""
+    """Collector that exports finalized profiling data as JSONL.
+
+    See the module docstring for the full record schema. The collector
+    accumulates samples in memory and writes the complete file at
+    ``export()`` time.
+    """
 
     def __init__(self, sample_interval_usec, *, skip_idle=False, mode=None):
         super().__init__(sample_interval_usec, skip_idle=skip_idle)
diff --git a/Lib/test/test_profiling/test_sampling_profiler/helpers.py b/Lib/test/test_profiling/test_sampling_profiler/helpers.py
index 0e32d8dd9eabef..87bdf2c7f778a2 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/helpers.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/helpers.py
@@ -174,3 +174,29 @@ def close_and_unlink(file):
     """Close a file and unlink it from the filesystem."""
     file.close()
     unlink(file.name)
+
+
+def jsonl_tables(records):
+    """Extract the canonical sections of a parsed JSONL profile.
+
+    Returns ``(meta, str_defs, frame_defs, agg, end)`` where ``str_defs`` is a
+    ``{str_id: value}`` dict, ``frame_defs`` is a flat list of all frame
+    definitions across chunks, and ``agg`` is the first agg record (sufficient
+    for tests that only emit one chunk).
+    """
+    meta = next(record for record in records if record["type"] == "meta")
+    end = next(record for record in records if record["type"] == "end")
+    agg = next(record for record in records if record["type"] == "agg")
+    str_defs = {
+        item["str_id"]: item["value"]
+        for record in records
+        if record["type"] == "str_def"
+        for item in record["defs"]
+    }
+    frame_defs = [
+        item
+        for record in records
+        if record["type"] == "frame_def"
+        for item in record["defs"]
+    ]
+    return meta, str_defs, frame_defs, agg, end
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
index 29f83c843561cd..2d8b705be8c2ea 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
@@ -1,5 +1,6 @@
 """Tests for binary format round-trip functionality."""
 
+import json
 import os
 import random
 import tempfile
@@ -21,7 +22,7 @@
         THREAD_STATUS_MAIN_THREAD,
     )
     from profiling.sampling.binary_collector import BinaryCollector
-    from profiling.sampling.binary_reader import BinaryReader
+    from profiling.sampling.binary_reader import BinaryReader, convert_binary_to_format
     from profiling.sampling.gecko_collector import GeckoCollector
 
     ZSTD_AVAILABLE = _remote_debugging.zstd_available()
@@ -30,6 +31,8 @@
         "Test only runs when _remote_debugging is available"
     )
 
+from .helpers import jsonl_tables
+
 
 def make_frame(filename, lineno, funcname, end_lineno=None, column=None,
                end_column=None, opcode=None):
@@ -1211,5 +1214,70 @@ def test_timestamp_preservation_with_rle(self):
         self.assertEqual(ts_collector.all_timestamps, expected_timestamps)
 
 
+class TestBinaryReplayToJsonl(BinaryFormatTestBase):
+    """Tests for binary -> JSONL replay via convert_binary_to_format."""
+
+    def _replay_to_jsonl(self, samples, interval=1000):
+        bin_path = self.create_binary_file(samples, interval=interval)
+        with tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False) as f:
+            jsonl_path = f.name
+        self.temp_files.append(jsonl_path)
+
+        convert_binary_to_format(bin_path, jsonl_path, "jsonl")
+
+        with open(jsonl_path, "r", encoding="utf-8") as f:
+            return [json.loads(line) for line in f]
+
+    def test_binary_replay_to_jsonl_basic(self):
+        """Replay a small .bin to JSONL: meta/end shape, samples_total, run_id."""
+        frame = make_frame("hot.py", 99, "hot_func")
+        samples = [
+            [make_interpreter(0, [make_thread(1, [frame])])]
+            for _ in range(5)
+        ]
+        records = self._replay_to_jsonl(samples, interval=2000)
+        meta, _, frame_defs, _, end = jsonl_tables(records)
+
+        self.assertEqual(meta["sample_interval_usec"], 2000)
+        self.assertEqual(end["samples_total"], 5)
+
+        run_ids = {r["run_id"] for r in records}
+        self.assertEqual(len(run_ids), 1)
+        self.assertRegex(next(iter(run_ids)), r"^[0-9a-f]{32}$")
+
+        self.assertEqual(len(frame_defs), 1)
+        self.assertEqual(frame_defs[0]["line"], 99)
+
+    def test_binary_replay_to_jsonl_rle_weight_propagation(self):
+        """RLE-batched identical samples land as a single agg entry with the right total."""
+        frame = make_frame("rle.py", 42, "rle_func")
+        samples = [
+            [make_interpreter(0, [make_thread(1, [frame])])]
+            for _ in range(50)
+        ]
+        records = self._replay_to_jsonl(samples)
+        _, _, _, agg, end = jsonl_tables(records)
+
+        self.assertEqual(end["samples_total"], 50)
+        self.assertEqual(agg["entries"], [
+            {"frame_id": 1, "self": 50, "cumulative": 50},
+        ])
+
+    def test_binary_replay_to_jsonl_omits_unavailable_columns(self):
+        """Columns the binary recorder did not capture are omitted, not 0."""
+        # make_frame defaults column/end_column to 0; pass column=-1 / end_column=-1
+        # so the binary side records LOCATION_NOT_AVAILABLE.
+        frame = make_frame("nocol.py", 7, "no_col", column=-1, end_column=-1)
+        samples = [[make_interpreter(0, [make_thread(1, [frame])])]]
+        records = self._replay_to_jsonl(samples)
+        _, _, frame_defs, _, _ = jsonl_tables(records)
+
+        self.assertEqual(len(frame_defs), 1)
+        fd = frame_defs[0]
+        self.assertEqual(fd["line"], 7)
+        self.assertNotIn("col", fd)
+        self.assertNotIn("end_col", fd)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
index c522c50d1fd5fa..9c0734ac804e1b 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
@@ -1,6 +1,7 @@
 """Tests for sampling profiler CLI argument parsing and functionality."""
 
 import io
+import json
 import os
 import subprocess
 import sys
@@ -21,9 +22,19 @@
     requires_remote_subprocess_debugging,
 )
 
-from profiling.sampling.cli import main
-from profiling.sampling.constants import PROFILING_MODE_ALL, PROFILING_MODE_WALL
+from profiling.sampling.cli import (
+    FORMAT_EXTENSIONS,
+    _create_collector,
+    _generate_output_filename,
+    main,
+)
+from profiling.sampling.constants import (
+    PROFILING_MODE_ALL,
+    PROFILING_MODE_CPU,
+    PROFILING_MODE_WALL,
+)
 from profiling.sampling.errors import SamplingScriptNotFoundError, SamplingModuleNotFoundError, SamplingUnknownProcessError
+from profiling.sampling.jsonl_collector import JsonlCollector
 
 class TestSampleProfilerCLI(unittest.TestCase):
     def _setup_sync_mocks(self, mock_socket, mock_popen):
@@ -912,3 +923,65 @@ def test_cli_replay_reader_errors_exit_cleanly(self):
             str(cm.exception),
             "Error: Unsupported format version 2",
         )
+
+    def test_cli_jsonl_format_mutually_exclusive_with_pstats(self):
+        """--jsonl and --pstats cannot be combined (mutually exclusive group)."""
+        with (
+            mock.patch(
+                "sys.argv",
+                [
+                    "profiling.sampling.cli",
+                    "attach",
+                    "12345",
+                    "--jsonl",
+                    "--pstats",
+                ],
+            ),
+            mock.patch("sys.stderr", io.StringIO()),
+        ):
+            with self.assertRaises(SystemExit):
+                main()
+
+    def test_cli_jsonl_extension_in_format_extensions(self):
+        """FORMAT_EXTENSIONS maps 'jsonl' -> 'jsonl' so default filenames work."""
+        self.assertEqual(FORMAT_EXTENSIONS["jsonl"], "jsonl")
+        self.assertEqual(_generate_output_filename("jsonl", 12345), "jsonl_12345.jsonl")
+
+    def test_cli_jsonl_create_collector_propagates_mode(self):
+        """_create_collector('jsonl', ..., mode=X) lands X in the meta record."""
+        collector = _create_collector(
+            "jsonl",
+            sample_interval_usec=1000,
+            skip_idle=False,
+            mode=PROFILING_MODE_CPU,
+        )
+        self.assertIsInstance(collector, JsonlCollector)
+
+        with tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False) as f:
+            jsonl_path = f.name
+        self.addCleanup(os.unlink, jsonl_path)
+        collector.export(jsonl_path)
+        with open(jsonl_path, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+        meta = next(r for r in records if r["type"] == "meta")
+        self.assertEqual(meta["mode"], "cpu")
+
+    def test_cli_jsonl_rejects_opcodes_combination(self):
+        """--opcodes is incompatible with --jsonl per opcodes_compatible_formats."""
+        test_args = [
+            "profiling.sampling.cli",
+            "attach",
+            "12345",
+            "--jsonl",
+            "--opcodes",
+        ]
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            mock.patch("profiling.sampling.cli.sample"),
+            self.assertRaises(SystemExit) as cm,
+        ):
+            main()
+
+        self.assertEqual(cm.exception.code, 2)
+        self.assertIn("--opcodes", mock_stderr.getvalue())
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 51c2eb73a20764..bd3461885281e8 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -39,7 +39,7 @@
 from test.support import captured_stdout, captured_stderr
 
 from .mocks import MockFrameInfo, MockThreadInfo, MockInterpreterInfo, LocationInfo, make_diff_collector_with_mock_baseline
-from .helpers import close_and_unlink
+from .helpers import close_and_unlink, jsonl_tables
 
 
 def resolve_name(node, strings):
@@ -58,25 +58,6 @@ def find_child_by_name(children, strings, substr):
     return None
 
 
-def _jsonl_tables(records):
-    meta = next(record for record in records if record["type"] == "meta")
-    end = next(record for record in records if record["type"] == "end")
-    agg = next(record for record in records if record["type"] == "agg")
-    str_defs = {
-        item["str_id"]: item["value"]
-        for record in records
-        if record["type"] == "str_def"
-        for item in record["defs"]
-    }
-    frame_defs = [
-        item
-        for record in records
-        if record["type"] == "frame_def"
-        for item in record["defs"]
-    ]
-    return meta, str_defs, frame_defs, agg, end
-
-
 class TestSampleProfilerComponents(unittest.TestCase):
     """Unit tests for individual profiler components."""
 
@@ -1834,7 +1815,7 @@ def test_jsonl_collector_recursive_frames_counted_once_per_sample(self):
         with open(jsonl_out.name, "r", encoding="utf-8") as f:
             records = [json.loads(line) for line in f]
 
-        _, _, frame_defs, agg_record, end_record = _jsonl_tables(records)
+        _, _, frame_defs, agg_record, end_record = jsonl_tables(records)
         self.assertEqual(len(frame_defs), 1)
         self.assertEqual(
             agg_record["entries"],
@@ -1885,7 +1866,7 @@ def export_summary(skip_idle):
             with open(jsonl_out.name, "r", encoding="utf-8") as f:
                 records = [json.loads(line) for line in f]
 
-            _, str_defs, frame_defs, agg_record, _ = _jsonl_tables(records)
+            _, str_defs, frame_defs, agg_record, _ = jsonl_tables(records)
             paths = {str_defs[item["path_str_id"]] for item in frame_defs}
             funcs = {str_defs[item["func_str_id"]] for item in frame_defs}
             return paths, funcs, agg_record["samples_total"]
@@ -1934,7 +1915,7 @@ def test_jsonl_collector_splits_large_exports_into_chunks(self):
         self.assertEqual(len(run_ids), 1)
         self.assertRegex(next(iter(run_ids)), r"^[0-9a-f]{32}$")
 
-        _, str_defs, frame_defs, agg_record, end_record = _jsonl_tables(
+        _, str_defs, frame_defs, agg_record, end_record = jsonl_tables(
             records
         )
         str_chunks = [
@@ -1959,6 +1940,122 @@ def test_jsonl_collector_splits_large_exports_into_chunks(self):
         self.assertEqual(agg_record["samples_total"], 257)
         self.assertEqual(end_record["samples_total"], 257)
 
+    def test_jsonl_collector_respects_weight_for_rle_batched_samples(self):
+        """weight>1 (from binary replay RLE) is honored in self/cumulative."""
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000)
+        leaf = MockFrameInfo("file.py", 10, "leaf")
+        non_leaf = MockFrameInfo("file.py", 20, "non_leaf")
+
+        collector.process_frames([leaf, non_leaf], _thread_id=1, weight=5)
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        _, str_defs, frame_defs, agg, end = jsonl_tables(records)
+        self.assertEqual(end["samples_total"], 5)
+        self.assertEqual(agg["samples_total"], 5)
+        self.assertEqual(
+            {str_defs[fd["func_str_id"]]: fd["frame_id"] for fd in frame_defs},
+            {"leaf": 1, "non_leaf": 2},
+        )
+        self.assertEqual(agg["entries"], [
+            {"frame_id": 1, "self": 5, "cumulative": 5},
+            {"frame_id": 2, "self": 0, "cumulative": 5},
+        ])
+
+    def test_jsonl_collector_recursion_with_weight(self):
+        """Recursion dedup respects weight, not occurrence count."""
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000)
+        recursive = MockFrameInfo("rec.py", 10, "f")
+
+        collector.process_frames([recursive] * 3, _thread_id=1, weight=3)
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        _, _, frame_defs, agg, _ = jsonl_tables(records)
+        self.assertEqual(len(frame_defs), 1)
+        self.assertEqual(agg["entries"], [
+            {"frame_id": 1, "self": 3, "cumulative": 3},
+        ])
+
+    def test_jsonl_collector_emits_col_and_end_col_when_present(self):
+        """All four location fields are emitted when col/end_col are >= 0."""
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000)
+        frame = MockFrameInfo("test.py", 0, "f")
+        frame.location = LocationInfo(42, 45, 4, 12)
+        frames = [
+            MockInterpreterInfo(
+                0, [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+            )
+        ]
+        collector.collect(frames)
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        _, str_defs, frame_defs, _, _ = jsonl_tables(records)
+        self.assertEqual(frame_defs, [
+            {
+                "frame_id": 1,
+                "path_str_id": 2,
+                "func_str_id": 1,
+                "line": 42,
+                "end_line": 45,
+                "col": 4,
+                "end_col": 12,
+            },
+        ])
+        self.assertEqual(str_defs, {1: "f", 2: "test.py"})
+
+    def test_jsonl_collector_partial_location_elision(self):
+        """Negative col/end_col/end_line fields are individually elided."""
+        # _get_or_create_frame_id interns funcname before filename, so
+        # func_str_id=1 ("f") and path_str_id=2 ("test.py").
+        common = {"frame_id": 1, "path_str_id": 2, "func_str_id": 1}
+        cases = [
+            (LocationInfo(42, 45, -1, 12),
+             {**common, "line": 42, "end_line": 45, "end_col": 12}),
+            (LocationInfo(42, 45, 4, -1),
+             {**common, "line": 42, "end_line": 45, "col": 4}),
+            (LocationInfo(42, 0, 4, 8),
+             {**common, "line": 42, "col": 4, "end_col": 8}),
+        ]
+        for loc, expected_frame_def in cases:
+            with self.subTest(location=loc):
+                jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+                self.addCleanup(close_and_unlink, jsonl_out)
+
+                collector = JsonlCollector(1000)
+                frame = MockFrameInfo("test.py", 0, "f")
+                frame.location = loc
+                frames = [
+                    MockInterpreterInfo(
+                        0,
+                        [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)],
+                    )
+                ]
+                collector.collect(frames)
+                collector.export(jsonl_out.name)
+
+                with open(jsonl_out.name, "r", encoding="utf-8") as f:
+                    records = [json.loads(line) for line in f]
+
+                _, _, frame_defs, _, _ = jsonl_tables(records)
+                self.assertEqual(frame_defs, [expected_frame_def])
+
 
 class TestRecursiveFunctionHandling(unittest.TestCase):
     """Tests for correct handling of recursive functions in cumulative stats."""
@@ -2168,6 +2265,15 @@ def test_extract_lineno_from_none(self):
         """Test extracting lineno from None (synthetic frames)."""
         self.assertEqual(extract_lineno(None), 0)
 
+    def test_extract_lineno_from_int(self):
+        """Test extracting lineno from a bare integer line number.
+
+        Mirrors normalize_location's int contract so callers like the
+        collapsed/flamegraph collectors do not crash on a bare-int location.
+        """
+        self.assertEqual(extract_lineno(42), 42)
+        self.assertEqual(extract_lineno(0), 0)
+
     def test_normalize_location_with_int(self):
         """Test normalize_location expands a legacy integer line number."""
         result = normalize_location(42)
@@ -2384,7 +2490,7 @@ def test_jsonl_collector_with_location_info(self):
         with open(jsonl_out.name, "r", encoding="utf-8") as f:
             records = [json.loads(line) for line in f]
 
-        meta, str_defs, frame_defs, agg, end = _jsonl_tables(records)
+        meta, str_defs, frame_defs, agg, end = jsonl_tables(records)
         self.assertEqual(meta["sample_interval_usec"], 1000)
         self.assertEqual(agg["samples_total"], 1)
         self.assertEqual(end["samples_total"], 1)
@@ -2425,7 +2531,7 @@ def test_jsonl_collector_with_none_location(self):
         with open(jsonl_out.name, "r", encoding="utf-8") as f:
             records = [json.loads(line) for line in f]
 
-        meta, str_defs, frame_defs, agg, end = _jsonl_tables(records)
+        meta, str_defs, frame_defs, agg, end = jsonl_tables(records)
         self.assertEqual(meta["sample_interval_usec"], 1000)
         self.assertEqual(agg["samples_total"], 1)
         self.assertEqual(end["samples_total"], 1)
@@ -2675,7 +2781,7 @@ def test_jsonl_collector_frame_format(self):
         with open(f.name, "r", encoding="utf-8") as fp:
             records = [json.loads(line) for line in fp]
 
-        _, str_defs, frame_defs, _, _ = _jsonl_tables(records)
+        _, str_defs, frame_defs, _, _ = jsonl_tables(records)
 
         self.assertEqual(len(frame_defs), 3)
 
@@ -2842,7 +2948,7 @@ def test_jsonl_collector_filters_internal_frames(self):
         with open(jsonl_out.name, "r", encoding="utf-8") as f:
             records = [json.loads(line) for line in f]
 
-        _, str_defs, frame_defs, _, _ = _jsonl_tables(records)
+        _, str_defs, frame_defs, _, _ = jsonl_tables(records)
 
         paths = {str_defs[item["path_str_id"]] for item in frame_defs}
 
diff --git a/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst b/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
index f75102eeae16e6..636f45ae8d6c70 100644
--- a/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
+++ b/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
@@ -1,3 +1,4 @@
 The ``profiling.sampling`` module now supports JSONL output format via
-``--jsonl``. Each run emits newline-delimited JSON records suitable for
-streaming or agents. Patch by Maurycy Pawłowski-Wieroński.
+``--jsonl``. Each run emits a newline-delimited JSON file that is
+sequentially parseable by external tools, scripts, and programmatic
+consumers. Patch by Maurycy Pawłowski-Wieroński.
diff --git a/Modules/_remote_debugging/binary_io_reader.c b/Modules/_remote_debugging/binary_io_reader.c
index 6c32ef70ac3f65..a3364ce913923e 100644
--- a/Modules/_remote_debugging/binary_io_reader.c
+++ b/Modules/_remote_debugging/binary_io_reader.c
@@ -785,9 +785,9 @@ build_frame_list(RemoteDebuggingState *state, BinaryReader *reader,
         if (frame->lineno != LOCATION_NOT_AVAILABLE) {
             location = Py_BuildValue("(iiii)",
                 frame->lineno,
-                frame->end_lineno != LOCATION_NOT_AVAILABLE ? frame->end_lineno : frame->lineno,
-                frame->column != LOCATION_NOT_AVAILABLE ? frame->column : 0,
-                frame->end_column != LOCATION_NOT_AVAILABLE ? frame->end_column : 0);
+                frame->end_lineno,
+                frame->column,
+                frame->end_column);
             if (!location) {
                 Py_DECREF(frame_info);
                 goto error;

From fb4a7c85cecbad71ac983a3639ebddde34999f4d Mon Sep 17 00:00:00 2001
From: Pablo Galindo Salgado <pablogsal@gmail.com>
Date: Mon, 4 May 2026 23:57:36 +0100
Subject: [PATCH 41/41] fixup! fixup! claim credit!

---
 Lib/profiling/sampling/constants.py           |  8 ++
 Lib/profiling/sampling/jsonl_collector.py     | 81 +++++++++----------
 .../test_sampling_profiler/helpers.py         |  8 +-
 .../test_binary_format.py                     |  2 +-
 .../test_sampling_profiler/test_collectors.py | 44 +++++-----
 5 files changed, 73 insertions(+), 70 deletions(-)

diff --git a/Lib/profiling/sampling/constants.py b/Lib/profiling/sampling/constants.py
index a364d0b8fde1e0..d7c710f943b1b7 100644
--- a/Lib/profiling/sampling/constants.py
+++ b/Lib/profiling/sampling/constants.py
@@ -11,6 +11,14 @@
 PROFILING_MODE_ALL = 3  # Combines GIL + CPU checks
 PROFILING_MODE_EXCEPTION = 4  # Only samples when thread has an active exception
 
+PROFILING_MODE_NAMES = {
+    PROFILING_MODE_WALL: "wall",
+    PROFILING_MODE_CPU: "cpu",
+    PROFILING_MODE_GIL: "gil",
+    PROFILING_MODE_ALL: "all",
+    PROFILING_MODE_EXCEPTION: "exception",
+}
+
 # Sort mode constants
 SORT_MODE_NSAMPLES = 0
 SORT_MODE_TOTTIME = 1
diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index bcb98e75d91164..7d26129b80de86 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -15,34 +15,36 @@
 
 1. ``meta`` (exactly one, first line)::
 
-      {"type":"meta","v":1,"run_id":"<hex>",
+      {"type":"meta","v":0,"run_id":"<hex>",
        "sample_interval_usec":<int>,"mode":"wall|cpu|gil|all|exception"}
 
    ``mode`` is omitted when not provided.
 
-2. ``str_def`` (zero or more)::
+2. ``string_table`` (zero or more)::
 
-      {"type":"str_def","v":1,"run_id":"<hex>",
-       "defs":[{"str_id":<int>,"value":"<str>"}, ...]}
+      {"type":"string_table","v":0,"run_id":"<hex>",
+       "strings":[{"str_id":<int>,"value":"<str>"}, ...]}
 
    Strings (filenames, function names) are interned to keep repeated values
-   compact. Each chunk holds up to ``_CHUNK_SIZE`` entries.
+   compact. IDs are zero-based. Each chunk holds up to ``_CHUNK_SIZE``
+   entries, and each entry carries its explicit ``str_id`` so consumers do
+   not need to infer offsets across chunks.
 
-3. ``frame_def`` (zero or more)::
+3. ``frame_table`` (zero or more)::
 
-      {"type":"frame_def","v":1,"run_id":"<hex>",
-       "defs":[{"frame_id":<int>,"path_str_id":<int>,"func_str_id":<int>,
-                "line":<int>,"end_line":<int>,"col":<int>,"end_col":<int>,
-                "synthetic":true}, ...]}
+      {"type":"frame_table","v":0,"run_id":"<hex>",
+       "frames":[{"frame_id":<int>,"path_str_id":<int>,"func_str_id":<int>,
+                  "line":<int>,"end_line":<int>,"col":<int>,
+                  "end_col":<int>}, ...]}
 
    ``end_line``/``col``/``end_col`` are *omitted* when source location data
    is unavailable (a missing key means "not available", not zero or null).
-   ``synthetic`` is present only on synthetic frames (for example, internal
-   marker frames whose source location is None) and absent otherwise.
+   ``line`` is ``0`` for synthetic frames (for example, internal marker
+   frames whose source location is None). Frame IDs are zero-based.
 
 4. ``agg`` (zero or more)::
 
-      {"type":"agg","v":1,"run_id":"<hex>","kind":"frame","scope":"final",
+      {"type":"agg","v":0,"run_id":"<hex>","kind":"frame","scope":"final",
        "samples_total":<int>,
        "entries":[{"frame_id":<int>,"self":<int>,"cumulative":<int>}, ...]}
 
@@ -54,7 +56,7 @@
 
 5. ``end`` (exactly one, last line)::
 
-      {"type":"end","v":1,"run_id":"<hex>","samples_total":<int>}
+      {"type":"end","v":0,"run_id":"<hex>","samples_total":<int>}
 
    Presence of ``end`` is the consumer's signal that the file is complete.
 
@@ -71,26 +73,13 @@
 import uuid
 from itertools import batched
 
-from .constants import (
-    PROFILING_MODE_ALL,
-    PROFILING_MODE_CPU,
-    PROFILING_MODE_EXCEPTION,
-    PROFILING_MODE_GIL,
-    PROFILING_MODE_WALL,
-)
+from .constants import PROFILING_MODE_NAMES
 from .collector import normalize_location
 from .stack_collector import StackTraceCollector
 
 
 _CHUNK_SIZE = 256
-
-_MODE_NAMES = {
-    PROFILING_MODE_WALL: "wall",
-    PROFILING_MODE_CPU: "cpu",
-    PROFILING_MODE_GIL: "gil",
-    PROFILING_MODE_ALL: "all",
-    PROFILING_MODE_EXCEPTION: "exception",
-}
+_SCHEMA_VERSION = 0
 
 
 class JsonlCollector(StackTraceCollector):
@@ -143,21 +132,29 @@ def export(self, filename):
             self._write_message(output, self._build_meta_record())
             self._write_chunked_records(
                 output,
-                {"type": "str_def", "v": 1, "run_id": self.run_id},
-                "defs",
+                {
+                    "type": "string_table",
+                    "v": _SCHEMA_VERSION,
+                    "run_id": self.run_id,
+                },
+                "strings",
                 self._strings,
             )
             self._write_chunked_records(
                 output,
-                {"type": "frame_def", "v": 1, "run_id": self.run_id},
-                "defs",
+                {
+                    "type": "frame_table",
+                    "v": _SCHEMA_VERSION,
+                    "run_id": self.run_id,
+                },
+                "frames",
                 self._frames,
             )
             self._write_chunked_records(
                 output,
                 {
                     "type": "agg",
-                    "v": 1,
+                    "v": _SCHEMA_VERSION,
                     "run_id": self.run_id,
                     "kind": "frame",
                     "scope": "final",
@@ -171,20 +168,22 @@ def export(self, filename):
     def _build_meta_record(self):
         record = {
             "type": "meta",
-            "v": 1,
+            "v": _SCHEMA_VERSION,
             "run_id": self.run_id,
             "sample_interval_usec": self.sample_interval_usec,
         }
 
         if self._mode is not None:
-            record["mode"] = _MODE_NAMES.get(self._mode, str(self._mode))
+            record["mode"] = PROFILING_MODE_NAMES.get(
+                self._mode, str(self._mode)
+            )
 
         return record
 
     def _build_end_record(self):
         record = {
             "type": "end",
-            "v": 1,
+            "v": _SCHEMA_VERSION,
             "run_id": self.run_id,
             "samples_total": self._samples_total,
         }
@@ -201,7 +200,6 @@ def _iter_final_agg_entries(self):
             }
 
     def _get_or_create_frame_id(self, filename, location, funcname):
-        synthetic = location is None
         location_fields = self._location_to_export_fields(location)
         func_str_id = self._intern_string(funcname)
         path_str_id = self._intern_string(filename)
@@ -213,21 +211,18 @@ def _get_or_create_frame_id(self, filename, location, funcname):
             location_fields.get("end_line"),
             location_fields.get("col"),
             location_fields.get("end_col"),
-            synthetic,
         )
 
         if (frame_id := self._frame_to_id.get(frame_key)) is not None:
             return frame_id
 
-        frame_id = len(self._frames) + 1
+        frame_id = len(self._frames)
         frame_record = {
             "frame_id": frame_id,
             "path_str_id": path_str_id,
             "func_str_id": func_str_id,
             **location_fields,
         }
-        if synthetic:
-            frame_record["synthetic"] = True
 
         self._frame_to_id[frame_key] = frame_id
         self._frames.append(frame_record)
@@ -239,7 +234,7 @@ def _intern_string(self, value):
         if (string_id := self._string_to_id.get(value)) is not None:
             return string_id
 
-        string_id = len(self._strings) + 1
+        string_id = len(self._strings)
         self._string_to_id[value] = string_id
         self._strings.append({"str_id": string_id, "value": value})
         return string_id
diff --git a/Lib/test/test_profiling/test_sampling_profiler/helpers.py b/Lib/test/test_profiling/test_sampling_profiler/helpers.py
index 87bdf2c7f778a2..b07776d415bb29 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/helpers.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/helpers.py
@@ -190,13 +190,13 @@ def jsonl_tables(records):
     str_defs = {
         item["str_id"]: item["value"]
         for record in records
-        if record["type"] == "str_def"
-        for item in record["defs"]
+        if record["type"] == "string_table"
+        for item in record["strings"]
     }
     frame_defs = [
         item
         for record in records
-        if record["type"] == "frame_def"
-        for item in record["defs"]
+        if record["type"] == "frame_table"
+        for item in record["frames"]
     ]
     return meta, str_defs, frame_defs, agg, end
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
index 2d8b705be8c2ea..fd674589b3135d 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
@@ -1260,7 +1260,7 @@ def test_binary_replay_to_jsonl_rle_weight_propagation(self):
 
         self.assertEqual(end["samples_total"], 50)
         self.assertEqual(agg["entries"], [
-            {"frame_id": 1, "self": 50, "cumulative": 50},
+            {"frame_id": 0, "self": 50, "cumulative": 50},
         ])
 
     def test_binary_replay_to_jsonl_omits_unavailable_columns(self):
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index bd3461885281e8..b42e7aa579f40c 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -1728,11 +1728,11 @@ def test_jsonl_collector_export_exact_output(self):
         self.assertEqual(
             content,
             (
-                '{"type":"meta","v":1,"run_id":"run-123","sample_interval_usec":1000}\n'
-                '{"type":"str_def","v":1,"run_id":"run-123","defs":[{"str_id":1,"value":"func1"},{"str_id":2,"value":"file.py"},{"str_id":3,"value":"func2"},{"str_id":4,"value":"other_func"},{"str_id":5,"value":"other.py"}]}\n'
-                '{"type":"frame_def","v":1,"run_id":"run-123","defs":[{"frame_id":1,"path_str_id":2,"func_str_id":1,"line":10,"end_line":10},{"frame_id":2,"path_str_id":2,"func_str_id":3,"line":20,"end_line":20},{"frame_id":3,"path_str_id":5,"func_str_id":4,"line":5,"end_line":5}]}\n'
-                '{"type":"agg","v":1,"run_id":"run-123","kind":"frame","scope":"final","samples_total":3,"entries":[{"frame_id":1,"self":2,"cumulative":2},{"frame_id":2,"self":0,"cumulative":2},{"frame_id":3,"self":1,"cumulative":1}]}\n'
-                '{"type":"end","v":1,"run_id":"run-123","samples_total":3}\n'
+                '{"type":"meta","v":0,"run_id":"run-123","sample_interval_usec":1000}\n'
+                '{"type":"string_table","v":0,"run_id":"run-123","strings":[{"str_id":0,"value":"func1"},{"str_id":1,"value":"file.py"},{"str_id":2,"value":"func2"},{"str_id":3,"value":"other_func"},{"str_id":4,"value":"other.py"}]}\n'
+                '{"type":"frame_table","v":0,"run_id":"run-123","frames":[{"frame_id":0,"path_str_id":1,"func_str_id":0,"line":10,"end_line":10},{"frame_id":1,"path_str_id":1,"func_str_id":2,"line":20,"end_line":20},{"frame_id":2,"path_str_id":4,"func_str_id":3,"line":5,"end_line":5}]}\n'
+                '{"type":"agg","v":0,"run_id":"run-123","kind":"frame","scope":"final","samples_total":3,"entries":[{"frame_id":0,"self":2,"cumulative":2},{"frame_id":1,"self":0,"cumulative":2},{"frame_id":2,"self":1,"cumulative":1}]}\n'
+                '{"type":"end","v":0,"run_id":"run-123","samples_total":3}\n'
             ),
         )
 
@@ -1919,18 +1919,19 @@ def test_jsonl_collector_splits_large_exports_into_chunks(self):
             records
         )
         str_chunks = [
-            record for record in records if record["type"] == "str_def"
+            record for record in records if record["type"] == "string_table"
         ]
         frame_chunks = [
-            record for record in records if record["type"] == "frame_def"
+            record for record in records if record["type"] == "frame_table"
         ]
         agg_chunks = [record for record in records if record["type"] == "agg"]
 
         self.assertEqual(
-            [len(record["defs"]) for record in str_chunks], [256, 256, 2]
+            [len(record["strings"]) for record in str_chunks],
+            [256, 256, 2],
         )
         self.assertEqual(
-            [len(record["defs"]) for record in frame_chunks], [256, 1]
+            [len(record["frames"]) for record in frame_chunks], [256, 1]
         )
         self.assertEqual(
             [len(record["entries"]) for record in agg_chunks], [256, 1]
@@ -1960,11 +1961,11 @@ def test_jsonl_collector_respects_weight_for_rle_batched_samples(self):
         self.assertEqual(agg["samples_total"], 5)
         self.assertEqual(
             {str_defs[fd["func_str_id"]]: fd["frame_id"] for fd in frame_defs},
-            {"leaf": 1, "non_leaf": 2},
+            {"leaf": 0, "non_leaf": 1},
         )
         self.assertEqual(agg["entries"], [
-            {"frame_id": 1, "self": 5, "cumulative": 5},
-            {"frame_id": 2, "self": 0, "cumulative": 5},
+            {"frame_id": 0, "self": 5, "cumulative": 5},
+            {"frame_id": 1, "self": 0, "cumulative": 5},
         ])
 
     def test_jsonl_collector_recursion_with_weight(self):
@@ -1984,7 +1985,7 @@ def test_jsonl_collector_recursion_with_weight(self):
         _, _, frame_defs, agg, _ = jsonl_tables(records)
         self.assertEqual(len(frame_defs), 1)
         self.assertEqual(agg["entries"], [
-            {"frame_id": 1, "self": 3, "cumulative": 3},
+            {"frame_id": 0, "self": 3, "cumulative": 3},
         ])
 
     def test_jsonl_collector_emits_col_and_end_col_when_present(self):
@@ -2009,22 +2010,22 @@ def test_jsonl_collector_emits_col_and_end_col_when_present(self):
         _, str_defs, frame_defs, _, _ = jsonl_tables(records)
         self.assertEqual(frame_defs, [
             {
-                "frame_id": 1,
-                "path_str_id": 2,
-                "func_str_id": 1,
+                "frame_id": 0,
+                "path_str_id": 1,
+                "func_str_id": 0,
                 "line": 42,
                 "end_line": 45,
                 "col": 4,
                 "end_col": 12,
             },
         ])
-        self.assertEqual(str_defs, {1: "f", 2: "test.py"})
+        self.assertEqual(str_defs, {0: "f", 1: "test.py"})
 
     def test_jsonl_collector_partial_location_elision(self):
         """Negative col/end_col/end_line fields are individually elided."""
         # _get_or_create_frame_id interns funcname before filename, so
-        # func_str_id=1 ("f") and path_str_id=2 ("test.py").
-        common = {"frame_id": 1, "path_str_id": 2, "func_str_id": 1}
+        # func_str_id=0 ("f") and path_str_id=1 ("test.py").
+        common = {"frame_id": 0, "path_str_id": 1, "func_str_id": 0}
         cases = [
             (LocationInfo(42, 45, -1, 12),
              {**common, "line": 42, "end_line": 45, "end_col": 12}),
@@ -2500,7 +2501,7 @@ def test_jsonl_collector_with_location_info(self):
         self.assertEqual(
             frame_defs[0],
             {
-                "frame_id": 1,
+                "frame_id": 0,
                 "path_str_id": frame_defs[0]["path_str_id"],
                 "func_str_id": frame_defs[0]["func_str_id"],
                 "line": 42,
@@ -2541,11 +2542,10 @@ def test_jsonl_collector_with_none_location(self):
         self.assertEqual(
             frame_defs[0],
             {
-                "frame_id": 1,
+                "frame_id": 0,
                 "path_str_id": frame_defs[0]["path_str_id"],
                 "func_str_id": frame_defs[0]["func_str_id"],
                 "line": 0,
-                "synthetic": True,
             },
         )