Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ingestify/infra/fetch/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def retrieve_http(
)
stream = BufferedStream.from_stream(BytesIO(content_bytes))
content_length = len(content_bytes)
content_compression_method = None
else:
# Stream response body directly into BufferedStream, hashing on the fly
raw_stream = BufferedStream()
Expand Down
31 changes: 31 additions & 0 deletions ingestify/tests/test_http_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,34 @@ def test_plain_content_has_no_compression_method():
result = retrieve_http("https://example.com/data.json", **FILE_KWARGS)

assert result.content_compression_method is None


def make_pager_response(page_data):
mock = MagicMock()
mock.status_code = 200
mock.headers = MagicMock()
mock.headers.get = lambda key, default=None: default
mock.headers.__contains__ = lambda self, key: False
mock.raise_for_status = MagicMock()
mock.json = lambda: page_data
return mock


def test_pager_returns_draft_file_without_compression_method():
"""Regression test: pager branch used to leave content_compression_method
unbound, raising UnboundLocalError when building the DraftFile. Pager output
is freshly JSON-encoded (never compressed), so None is the correct value."""
page = {"items": [{"id": 1}, {"id": 2}]}

with patch("ingestify.infra.fetch.http.get_session") as mock_session:
mock_session.return_value.get.return_value = make_pager_response(page)

result = retrieve_http(
"https://example.com/data.json",
pager=("items", lambda url, data: None),
**FILE_KWARGS,
)

assert result.content_compression_method is None
assert result.stream.read() == b'{"items": [{"id": 1}, {"id": 2}]}'
assert result.size == len(b'{"items": [{"id": 1}, {"id": 2}]}')
Loading