Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions parallel_web_tools/cli/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -871,7 +871,7 @@ def extract(

results_list = []
for r in result.results:
result_dict: dict[str, Any] = {"url": r.url, "title": r.title}
result_dict: dict[str, Any] = {"url": r.url, "title": r.title, "publish_date": r.publish_date}
if hasattr(r, "excerpts") and r.excerpts:
result_dict["excerpts"] = r.excerpts
if hasattr(r, "full_content") and r.full_content:
Expand All @@ -884,8 +884,9 @@ def extract(
errors_list.append(
{
"url": getattr(e, "url", None),
"error": str(getattr(e, "error", "")),
"status_code": getattr(e, "status_code", None),
"error_type": getattr(e, "error_type", None),
"http_status_code": getattr(e, "http_status_code", None),
"content": getattr(e, "content", None),
}
)

Expand All @@ -894,6 +895,11 @@ def extract(
"status": "ok",
"results": results_list,
"errors": errors_list,
"warnings": [
{"type": w.type, "message": w.message, "detail": getattr(w, "detail", None)} for w in result.warnings
]
if hasattr(result, "warnings") and result.warnings
else [],
}

write_json_output(output_data, output_file, output_json)
Expand Down
76 changes: 76 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1111,10 +1111,12 @@ def test_extract_successful_json_output(self, runner):
mock_page = mock.MagicMock()
mock_page.url = "https://example.com"
mock_page.title = "Example Page"
mock_page.publish_date = "2025-01-15"
mock_page.excerpts = ["Some excerpt"]
mock_page.full_content = None
mock_extract_result.results = [mock_page]
mock_extract_result.errors = []
mock_extract_result.warnings = None

with mock.patch("parallel_web_tools.cli.commands.get_api_key", return_value="test-key"):
with mock.patch.dict("sys.modules"):
Expand All @@ -1134,6 +1136,80 @@ def test_extract_successful_json_output(self, runner):
assert output["status"] == "ok"
assert len(output["results"]) == 1
assert output["results"][0]["url"] == "https://example.com"
assert output["results"][0]["publish_date"] == "2025-01-15"
assert output["warnings"] == []

def test_extract_warnings_serialized_in_json_output(self, runner):
"""Should serialize SDK Warning objects as dicts in JSON output."""
mock_extract_result = mock.MagicMock()
mock_extract_result.extract_id = "ext_456"
mock_page = mock.MagicMock()
mock_page.url = "https://example.com"
mock_page.title = "Example"
mock_page.publish_date = None
mock_page.excerpts = ["An excerpt"]
mock_page.full_content = None
mock_extract_result.results = [mock_page]
mock_extract_result.errors = []
warning_obj = mock.MagicMock()
warning_obj.type = "input_validation_warning"
warning_obj.message = "Excerpts truncated"
warning_obj.detail = {"max_chars_total": 500}
mock_extract_result.warnings = [warning_obj]

with mock.patch("parallel_web_tools.cli.commands.get_api_key", return_value="test-key"):
with mock.patch.dict("sys.modules"):
mock_parallel_mod = mock.MagicMock()
mock_client = mock.MagicMock()
mock_client.beta.extract.return_value = mock_extract_result
mock_parallel_mod.Parallel.return_value = mock_client
sys.modules["parallel"] = mock_parallel_mod

result = runner.invoke(main, ["extract", "https://example.com", "--json"])

del sys.modules["parallel"]

assert result.exit_code == 0
output = json.loads(result.output)
assert len(output["warnings"]) == 1
warning = output["warnings"][0]
assert warning["type"] == "input_validation_warning"
assert warning["message"] == "Excerpts truncated"
assert warning["detail"] == {"max_chars_total": 500}

def test_extract_errors_serialized_in_json_output(self, runner):
"""Should serialize extract errors with correct API field names."""
mock_extract_result = mock.MagicMock()
mock_extract_result.extract_id = "ext_789"
mock_extract_result.results = []
mock_error = mock.MagicMock()
mock_error.url = "https://example.com/broken"
mock_error.error_type = "fetch_error"
mock_error.http_status_code = 500
mock_error.content = "Internal Server Error"
mock_extract_result.errors = [mock_error]
mock_extract_result.warnings = None

with mock.patch("parallel_web_tools.cli.commands.get_api_key", return_value="test-key"):
with mock.patch.dict("sys.modules"):
mock_parallel_mod = mock.MagicMock()
mock_client = mock.MagicMock()
mock_client.beta.extract.return_value = mock_extract_result
mock_parallel_mod.Parallel.return_value = mock_client
sys.modules["parallel"] = mock_parallel_mod

result = runner.invoke(main, ["extract", "https://example.com/broken", "--json"])

del sys.modules["parallel"]

assert result.exit_code == 0
output = json.loads(result.output)
assert len(output["errors"]) == 1
error = output["errors"][0]
assert error["url"] == "https://example.com/broken"
assert error["error_type"] == "fetch_error"
assert error["http_status_code"] == 500
assert error["content"] == "Internal Server Error"


class TestEnrichDeploySnowflake:
Expand Down
Loading