From d48dd67d570c524a5448d5d8d786b8a80cb5a810 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Wed, 29 Apr 2026 12:29:20 +0200 Subject: [PATCH 1/3] that's it? --- Modules/_remote_debugging/binary_io_reader.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Modules/_remote_debugging/binary_io_reader.c b/Modules/_remote_debugging/binary_io_reader.c index 6c32ef70ac3f65..b90d9a53cb18a2 100644 --- a/Modules/_remote_debugging/binary_io_reader.c +++ b/Modules/_remote_debugging/binary_io_reader.c @@ -563,6 +563,14 @@ reader_get_or_create_thread_state(BinaryReader *reader, uint64_t thread_id, } } + if (reader->thread_state_count >= reader->thread_count) { + PyErr_Format(PyExc_ValueError, + "Invalid thread count: sample data contains more unique threads than declared in header " + "(declared %u, found at least %zu)", + reader->thread_count, reader->thread_state_count + 1); + return NULL; + } + if (!reader->thread_states) { reader->thread_state_capacity = 16; reader->thread_states = PyMem_Calloc(reader->thread_state_capacity, sizeof(ReaderThreadState)); From a0488e0ccf18f07afee683bc045279b60531c18f Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Wed, 29 Apr 2026 13:04:01 +0200 Subject: [PATCH 2/3] test --- .../test_binary_format.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py index 29f83c843561cd..ba13dfe1f6f7bf 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py @@ -2,6 +2,7 @@ import os import random +import struct import tempfile import unittest from collections import defaultdict @@ -806,6 +807,35 @@ def test_invalid_file_path(self): reader.replay_samples(RawCollector()) +class TestBinaryFormatValidation(BinaryFormatTestBase): + """Tests for malformed binary files.""" + + HDR_OFF_THREADS = 32 + + def test_replay_rejects_more_threads_than_declared(self): + """Replay rejects files with more unique threads than the header declares.""" + threads = [ + make_thread(1, [make_frame("t1.py", 10, "t1")]), + make_thread(2, [make_frame("t2.py", 20, "t2")]), + ] + samples = [[make_interpreter(0, threads)]] + filename = self.create_binary_file(samples, compression="none") + + with open(filename, "r+b") as raw: + raw.seek(self.HDR_OFF_THREADS) + raw.write(struct.pack("=I", 1)) + + with BinaryReader(filename) as reader: + self.assertEqual(reader.get_info()["thread_count"], 1) + with self.assertRaises(ValueError) as cm: + reader.replay_samples(RawCollector()) + self.assertEqual( + str(cm.exception), + "Invalid thread count: sample data contains more unique " + "threads than declared in header (declared 1, found at least 2)", + ) + + class TestBinaryEncodings(BinaryFormatTestBase): """Tests specifically targeting different stack encodings.""" From 12b05f33c47b77b3bb85c01ac0afb72651b82c30 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Wed, 29 Apr 2026 13:08:48 +0200 Subject: [PATCH 3/3] blurb --- .../Library/2026-04-29-13-08-46.gh-issue-149009.rek3Tw.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2026-04-29-13-08-46.gh-issue-149009.rek3Tw.rst diff --git a/Misc/NEWS.d/next/Library/2026-04-29-13-08-46.gh-issue-149009.rek3Tw.rst b/Misc/NEWS.d/next/Library/2026-04-29-13-08-46.gh-issue-149009.rek3Tw.rst new file mode 100644 index 00000000000000..e2f078742760a5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-29-13-08-46.gh-issue-149009.rek3Tw.rst @@ -0,0 +1,3 @@ +Validate that :mod:`profiling.sampling` binary profiles do not contain more +unique (thread, interpreter) pairs than declared in the header. Patch by +Maurycy Pawłowski-Wieroński.