Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.9
39 changes: 39 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
.PHONY: install sync lint format test check type-check all

# Install dependencies
install:
uv sync

# Sync dependencies (remove old and install fresh)
sync:
uv sync

# Run ruff linter
lint:
uv run ruff check half_json tests

# Run ruff linter with auto-fix
lint-fix:
uv run ruff check half_json tests --fix

# Run ruff formatter
format:
uv run ruff format half_json tests

# Run ruff formatter check (CI mode)
format-check:
uv run ruff format half_json tests --check

# Run type checker
type-check:
uv run mypy half_json

# Run tests
test:
uv run pytest

# Run all checks (CI mode)
check: lint format-check type-check test

# Run all checks with auto-fix (local development)
all: lint-fix format type-check test
8 changes: 3 additions & 5 deletions half_json/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,16 @@ def build_bracket_stack(text: str, end: int | None = None) -> tuple[str, ...]:
if escape:
escape = False
continue
if ch == '\\' and in_string:
if ch == "\\" and in_string:
escape = True
continue
if ch == '"':
in_string = not in_string
continue
if in_string:
continue
if ch in ('{', '['):
if ch in ("{", "["):
stack.append(ch)
elif ch == '}' and stack and stack[-1] == '{':
stack.pop()
elif ch == ']' and stack and stack[-1] == '[':
elif ch == "}" and stack and stack[-1] == "{" or ch == "]" and stack and stack[-1] == "[":
stack.pop()
return tuple(stack)
26 changes: 18 additions & 8 deletions half_json/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,26 @@ def main(argv: list[str] | None = None) -> None:
prog="jsonfixer",
description="Fix invalid / truncated JSON.",
)
parser.add_argument("infile", nargs="?", type=argparse.FileType("r"),
default=sys.stdin, help="input file (default: stdin)")
parser.add_argument("outfile", nargs="?", type=argparse.FileType("w"),
default=sys.stdout, help="output file (default: stdout)")
parser.add_argument(
"infile",
nargs="?",
type=argparse.FileType("r"),
default=sys.stdin,
help="input file (default: stdin)",
)
parser.add_argument(
"outfile",
nargs="?",
type=argparse.FileType("w"),
default=sys.stdout,
help="output file (default: stdout)",
)
parser.add_argument("--strict", dest="strict", action="store_true", default=True)
parser.add_argument("--no-strict", dest="strict", action="store_false")
parser.add_argument("--js-style", action="store_true", default=False)
parser.add_argument("--single", action="store_true", default=False,
help="treat entire input as one JSON value")
parser.add_argument(
"--single", action="store_true", default=False, help="treat entire input as one JSON value"
)
args = parser.parse_args(argv)

fixer = JSONFixer(js_style=args.js_style)
Expand All @@ -45,8 +56,7 @@ def main(argv: list[str] | None = None) -> None:
else:
print(result, file=sys.stderr)
if total:
print(f"total is {total} and hit {hit} --> ratio:{hit * 1.0 / total}",
file=sys.stderr)
print(f"total is {total} and hit {hit} --> ratio:{hit * 1.0 / total}", file=sys.stderr)


# Backward-compatible entry point (same signature as old main.py:fixjson)
Expand Down
2 changes: 1 addition & 1 deletion half_json/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import NamedTuple

from half_json.diagnosis import diagnose
from half_json.rules import FixCandidate, RuleRegistry
from half_json.rules import RuleRegistry
from half_json.rules.array_rules import CloseOrCommaArray, FixArrayElement
from half_json.rules.js_rules import FixJSStyleKey
from half_json.rules.object_rules import (
Expand Down
64 changes: 43 additions & 21 deletions half_json/diagnosis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import json.decoder
from dataclasses import dataclass
from enum import Enum, auto
from json.decoder import JSONDecoder, py_scanstring
from json.scanner import py_make_scanner
from json.decoder import JSONDecoder, py_scanstring # type: ignore[attr-defined]
from json.scanner import py_make_scanner # type: ignore[attr-defined]
from typing import Any


Expand All @@ -31,7 +31,11 @@ class ErrorType(Enum):
("py_scanstring", "Invalid \\uXXXX escape", ErrorType.STRING_INVALID_UXXXX),
("py_scanstring", "Invalid \\escape", ErrorType.STRING_INVALID_ESCAPE),
("py_scanstring", "Invalid control character", ErrorType.STRING_INVALID_CONTROL),
("JSONObject", "Expecting property name enclosed in double quotes", ErrorType.OBJECT_EXPECT_KEY),
(
"JSONObject",
"Expecting property name enclosed in double quotes",
ErrorType.OBJECT_EXPECT_KEY,
),
("JSONObject", "Expecting ':' delimiter", ErrorType.OBJECT_EXPECT_COLON),
("JSONObject", "Expecting value", ErrorType.OBJECT_EXPECT_VALUE),
("JSONObject", "Expecting ',' delimiter", ErrorType.OBJECT_EXPECT_COMMA),
Expand All @@ -43,6 +47,7 @@ class ErrorType(Enum):
@dataclass(frozen=True)
class ParseContext:
"""All context a fix rule needs."""

input: str
error_type: ErrorType
pos: int
Expand All @@ -56,13 +61,15 @@ class ParseContext:

def _record_parser_name(parser: Any) -> Any:
"""Decorator that attaches parser name to exceptions."""

def wrapper(*args: Any, **kwargs: Any) -> Any:
try:
return parser(*args, **kwargs)
except Exception as e:
if "parser" not in e.__dict__:
e.__dict__["parser"] = parser.__name__
raise

wrapper.__name__ = parser.__name__
return wrapper

Expand All @@ -74,16 +81,16 @@ def _make_decoder(*, strict: bool = True) -> JSONDecoder:
JSONObject references it from module scope — no way to inject per-decoder.
"""
decoder = JSONDecoder(strict=strict)
decoder.parse_string = _record_parser_name(py_scanstring)
decoder.parse_object = _record_parser_name(decoder.parse_object)
decoder.parse_array = _record_parser_name(decoder.parse_array)
decoder.scan_once = py_make_scanner(decoder)
decoder.parse_string = _record_parser_name(py_scanstring) # type: ignore[attr-defined]
decoder.parse_object = _record_parser_name(decoder.parse_object) # type: ignore[attr-defined]
decoder.parse_array = _record_parser_name(decoder.parse_array) # type: ignore[attr-defined]
decoder.scan_once = py_make_scanner(decoder) # type: ignore[attr-defined]
return decoder


# Patch json.decoder.scanstring once so JSONObject uses our tracked version.
# This is unavoidable: JSONObject hard-references the module-level scanstring.
json.decoder.scanstring = _record_parser_name(py_scanstring)
json.decoder.scanstring = _record_parser_name(py_scanstring) # type: ignore[attr-defined]

_decoder_strict = _make_decoder(strict=True)
_decoder_unstrict = _make_decoder(strict=False)
Expand All @@ -102,40 +109,55 @@ def diagnose(text: str, *, strict: bool = True) -> ParseContext | None:

if not text.strip():
return ParseContext(
input=text, error_type=ErrorType.EMPTY_INPUT, pos=0,
message="empty input", bracket_stack=(), nextchar="", lastchar="",
input=text,
error_type=ErrorType.EMPTY_INPUT,
pos=0,
message="empty input",
bracket_stack=(),
nextchar="",
lastchar="",
)

decoder = _decoder_strict if strict else _decoder_unstrict
try:
obj, end = decoder.scan_once(text, 0)
obj, end = decoder.scan_once(text, 0) # type: ignore[attr-defined]
if end == len(text):
return None # valid JSON
# Partial parse — decoded something but there's leftover
remaining = text[end:].strip()
return ParseContext(
input=text, error_type=ErrorType.PARTIAL_PARSE, pos=end,
input=text,
error_type=ErrorType.PARTIAL_PARSE,
pos=end,
message="partial parse",
bracket_stack=build_bracket_stack(text, end),
nextchar=remaining[:1], lastchar=text[end - 1: end],
partial_result=obj, consumed_end=end,
nextchar=remaining[:1],
lastchar=text[end - 1 : end],
partial_result=obj,
consumed_end=end,
)
except StopIteration:
return ParseContext(
input=text, error_type=ErrorType.UNEXPECTED_TOKEN, pos=0,
input=text,
error_type=ErrorType.UNEXPECTED_TOKEN,
pos=0,
message="unexpected token",
bracket_stack=build_bracket_stack(text),
nextchar=text[:1], lastchar="",
nextchar=text[:1],
lastchar="",
)
except ValueError as e:
parser = e.__dict__.get("parser", "")
etype = _classify_error(parser, e.msg)
etype = _classify_error(parser, e.msg) # type: ignore[attr-defined]
if etype is None:
return None # unknown error, treat as unfixable
pos = e.pos
pos = e.pos # type: ignore[attr-defined]
return ParseContext(
input=text, error_type=etype, pos=pos,
message=e.msg,
input=text,
error_type=etype,
pos=pos,
message=e.msg, # type: ignore[attr-defined]
bracket_stack=build_bracket_stack(text, pos),
nextchar=text[pos: pos + 1], lastchar=text[pos - 1: pos],
nextchar=text[pos : pos + 1],
lastchar=text[pos - 1 : pos],
)
1 change: 1 addition & 0 deletions half_json/rules/js_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

class FixJSStyleKey:
"""Convert JS-style bare or single-quoted keys to double-quoted."""

name = "fix_js_style_key"

def applies_to(self, ctx: ParseContext) -> bool:
Expand Down
2 changes: 2 additions & 0 deletions half_json/rules/structural_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def _guess_left(line: str) -> str:

class PrependMissingBracket:
"""Handle StopIteration — the scanner couldn't start parsing at all."""

name = "prepend_missing_bracket"

def __init__(self) -> None:
Expand Down Expand Up @@ -67,6 +68,7 @@ def reset(self) -> None:

class WrapPartialParse:
"""Handle partial parse — decoded something but leftover remains."""

name = "wrap_partial_parse"

def __init__(self) -> None:
Expand Down
9 changes: 0 additions & 9 deletions pdm.lock

This file was deleted.

68 changes: 65 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

[project]
name = "jsonfixer"
version = "0.3.0"
Expand All @@ -23,6 +22,69 @@ Homepage = "https://github.com/half-pie/half-json"
jsonfixer = "half_json.cli:fixjson"

[build-system]
requires = ["pdm-backend"]
build-backend = "pdm.backend"
requires = ["hatchling"]
build-backend = "hatchling.build"

[dependency-groups]
dev = [
"pytest>=8.0",
"ruff>=0.9.0",
"mypy>=1.14.0",
]

# Ruff configuration
[tool.ruff]
target-version = "py39"
line-length = 100

[tool.ruff.lint]
select = [
"E", # pycodestyle errors
"W", # pycodestyle warnings
"F", # Pyflakes
"I", # isort
"N", # pep8-naming
"UP", # pyupgrade
"B", # flake8-bugbear
"C4", # flake8-comprehensions
"SIM", # flake8-simplify
]
ignore = [
"E501", # Line too long (handled by formatter)
]

[tool.ruff.lint.pydocstyle]
convention = "google"

[tool.ruff.format]
quote-style = "double"
indent-style = "space"
skip-magic-trailing-comma = false
line-ending = "auto"

# Mypy configuration
[tool.mypy]
python_version = "3.9"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
disallow_incomplete_defs = true
check_untyped_defs = true
warn_redundant_casts = true
warn_unused_ignores = true
show_error_codes = true

[[tool.mypy.overrides]]
module = "tests.*"
disallow_untyped_defs = false

# Hatch build configuration
[tool.hatch.build.targets.wheel]
packages = ["half_json"]

# Pytest configuration
[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
Loading