Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
228 changes: 228 additions & 0 deletions .github/scripts/check_reference_conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
#!/usr/bin/env python3
Comment thread
bladehan1 marked this conversation as resolved.
"""Validate java-tron reference.conf key names and hierarchy depth.

Rules enforced:
1. Every user-defined segment of every key path must match ^[a-z][a-zA-Z0-9]*$
(lowerCamelCase: starts lowercase, letters/digits only).
2. Total path depth must be <= MAX_DEPTH (5). Each list/array step counts
as one additional level. For example `rate.limiter.http[].component`
is 5 levels deep (rate=1, limiter=2, http=3, []=4, component=5).
3. ALLOWLIST entries are exempt from the format rule (legacy keys that ship
in user configs; renaming would break compatibility).

Parsing strategy: delegated to pyhocon (https://github.com/chimpler/pyhocon),
the reference Python HOCON implementation. This avoids hand-rolled scanner
pitfalls (key = { ... } prefix loss, triple-strings, substitutions, includes,
+= operator, block comments). pyhocon returns a fully-merged ConfigTree where
dotted-form keys are expanded into nested objects — i.e. the same canonical
key set Typesafe Config / ConfigBeanFactory will see at runtime.

Array handling: keys inside object-elements of arrays are also user-defined
Comment thread
bladehan1 marked this conversation as resolved.
config keys (e.g. each entry in `rate.limiter.rpc = [{ component=..., ... }]`
is parsed by RateLimiterConfig). The walker recurses into list elements and
treats the array step as a synthetic `[]` segment that contributes to depth
but is not itself validated as a name. Element keys are deduplicated across
list entries because well-formed arrays use homogeneous object shapes.

Debug mode: pass `--debug` to print every parsed key with its depth, in
walk order (which mirrors the file top-to-bottom). Use this to eyeball the
parser's view against reference.conf.

Exit code: 0 if clean, 1 if any violation remains after allowlist filtering,
2 on environment errors (missing pyhocon, file not found, parse failure).

CI integration: invoked by the `Validate reference.conf key names and depth`
step of the `checkstyle` job in `.github/workflows/pr-check.yml`. The non-zero
exit on violations is what makes that step fail — there is intentionally NO
extra `exit 1` in the workflow shell wrapper. A single GHA `::error` workflow
command is also emitted unconditionally (not gated on the GITHUB_ACTIONS env
var) so local runs produce the same output as CI; the leading `::` is
harmless noise locally.
"""
import re
import sys
from pathlib import Path

try:
from pyhocon import ConfigFactory, ConfigTree
except ImportError:
print(
"error: pyhocon is required. Install with `pip install pyhocon`.",
file=sys.stderr,
)
sys.exit(2)

# Set at the current max depth of reference.conf (5). No buffer: a mature
# project should not allow silent drift, so any new key going deeper must
# bump MAX_DEPTH via an explicit, reviewed change (deeper trees hurt
# readability and complicate ConfigBeanFactory mapping).
MAX_DEPTH = 5
KEY_REGEX = re.compile(r'^[a-z][a-zA-Z0-9]*$')
# Legacy keys grandfathered to keep user `config.conf` files compatible.
# Do NOT extend this list for new keys — every new key must be lowerCamelCase.
# A future rename + deprecation cycle can shrink this set back to empty.
ALLOWLIST = {
"node.http.PBFTEnable",
"node.http.PBFTPort",
"node.rpc.PBFTEnable",
"node.rpc.PBFTPort",
}


def walk(node, path, depth):
"""Yield (full_path, depth, is_leaf) for every reachable user-defined key.

- ConfigTree key adds one depth level and contributes a name segment.
Comment thread
bladehan1 marked this conversation as resolved.
- list step adds one synthetic level rendered as `[]`. Element-internal
keys are walked once per unique sub-path (homogeneous object arrays
otherwise yield each field N times).
- Scalars / null / list-of-scalars produce no further keys.

`depth` includes the array `[]` steps. `is_leaf` is True when the value
at this path is a scalar/list/null — i.e. not another ConfigTree — so
callers can filter leaves vs namespace intermediates.
"""
if isinstance(node, ConfigTree):
for k, v in node.items():
new_path = f"{path}.{k}" if path else k
new_depth = depth + 1
is_leaf = not isinstance(v, ConfigTree)
yield new_path, new_depth, is_leaf
yield from walk(v, new_path, new_depth)
elif isinstance(node, list):
array_path = f"{path}[]"
array_depth = depth + 1
seen = set()
for elem in node:
# Object element: walk its keys. Nested list element (HOCON allows
# list-of-list, e.g. `a = [[{x=1}]]`): recurse so each inner [] step
# also contributes to depth. Scalar elements have no sub-keys.
if isinstance(elem, (ConfigTree, list)):
for sub_path, sub_depth, sub_leaf in walk(elem, array_path, array_depth):
if sub_path in seen:
continue
seen.add(sub_path)
yield sub_path, sub_depth, sub_leaf


def main(argv):
debug = False
args = list(argv[1:])
if args and args[0] == "--debug":
debug = True
args = args[1:]
if len(args) != 1:
print(f"usage: {argv[0]} [--debug] <path/to/reference.conf>", file=sys.stderr)
return 2
path = Path(args[0])
if not path.is_file():
print(f"error: file not found: {path}", file=sys.stderr)
return 2

try:
tree = ConfigFactory.parse_file(str(path))
except Exception as e:
print(f"error: failed to parse {path}: {e}", file=sys.stderr)
# Mirror the violation path: emit a single GHA annotation so the
# parse failure surfaces in the PR check summary, not just the log.
print(f"::error file={path},title=reference.conf::failed to parse: {e}")
return 2

keys = list(walk(tree, "", 0))

if debug:
# Keys are yielded in pyhocon insertion order, which mirrors the
Comment thread
bladehan1 marked this conversation as resolved.
# source file top-to-bottom. Eyeball this against reference.conf to
# confirm coverage; the depth column makes the array `[]` steps
# explicit so MAX_DEPTH math is verifiable by inspection. Trailing
# `/` marks namespace intermediates (have children); bare names are
# leaves — `grep -v '/$'` filters to just leaves.
leaf_count = sum(1 for _, _, lf in keys if lf)
print(
f"DEBUG: {len(keys)} parsed keys "
f"({leaf_count} leaves + {len(keys) - leaf_count} intermediates), "
f"walk order:"
)
for full_path, depth, is_leaf in keys:
label = full_path if is_leaf else full_path + "/"
print(f" d={depth} {label}")
print()

format_violations = []
depth_violations = []

# Only check leaves: pyhocon expands a dotted-form declaration like
# `a.b.c = X` into intermediate ConfigTree nodes for `a` and `a.b`. A
# single user-written bad key would otherwise be reported once per
# intermediate AND once as the leaf, multiplying noise. The leaf path
# carries every segment, so checking just leaves covers all segments.
for full_path, depth, is_leaf in keys:
if not is_leaf:
continue
if full_path not in ALLOWLIST:
for seg in full_path.split('.'):
# Strip any number of trailing `[]` markers — nested arrays
# produce segments like `a[][]`.
while seg.endswith('[]'):
seg = seg[:-2]
if seg and not KEY_REGEX.match(seg):
format_violations.append((full_path, seg))
break

if depth > MAX_DEPTH:
depth_violations.append((full_path, depth))

format_violations.sort()
depth_violations.sort()
Comment thread
bladehan1 marked this conversation as resolved.

if format_violations or depth_violations:
lines_out = []
if format_violations:
lines_out.append(
f"Format violations ({len(format_violations)}) — "
f"each segment must match {KEY_REGEX.pattern}:"
)
for full_path, seg in format_violations:
lines_out.append(f" format: {full_path} (segment: '{seg}')")
if depth_violations:
if lines_out:
lines_out.append("")
lines_out.append(
f"Depth violations ({len(depth_violations)}) — max depth is {MAX_DEPTH} "
f"(each `[]` array step counts as one level):"
)
for full_path, depth in depth_violations:
lines_out.append(
f" depth: {full_path} (depth={depth}, max={MAX_DEPTH})"
)
print("\n".join(lines_out))
print()

# Emit ONE consolidated GHA workflow annotation. All offending entries
# are packed into the annotation body via %0A (GHA's newline escape)
# so the entries are visible in the annotation summary, not just in
# the job log.
entries = []
for full_path, seg in format_violations:
entries.append(f"format: {full_path} (segment '{seg}')")
for full_path, depth in depth_violations:
entries.append(f"depth: {full_path} (depth={depth}, max={MAX_DEPTH})")
body = (
f"reference.conf has {len(format_violations)} format + "
f"{len(depth_violations)} depth violation(s):%0A"
+ "%0A".join(entries)
)
print(f"::error file={path},title=reference.conf::{body}")
print(
f"FAIL: {len(format_violations)} format + {len(depth_violations)} depth "
f"violation(s) in {path}",
file=sys.stderr,
)
return 1

print(f"OK: {path} — {len(keys)} keys, all lowerCamelCase, depth <= {MAX_DEPTH}")
return 0


if __name__ == "__main__":
sys.exit(main(sys.argv))
14 changes: 14 additions & 0 deletions .github/workflows/pr-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,20 @@ jobs:
steps:
- uses: actions/checkout@v5

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Install pyhocon
run: pip install --quiet pyhocon

Comment thread
bladehan1 marked this conversation as resolved.
- name: Validate reference.conf key names and depth
shell: bash
run: |
python3 .github/scripts/check_reference_conf.py \
common/src/main/resources/reference.conf

- name: Set up JDK 17
uses: actions/setup-java@v5
with:
Expand Down
Loading