-
Notifications
You must be signed in to change notification settings - Fork 59
tools: ast-grep: add static analysis rules #502
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3dd68db
9553468
99d10aa
dc696f2
6e4c9ff
3b9d81c
2ca0a80
75a4fc2
d852cb1
a8761a4
1f4eb42
32687cd
a81bca0
680a782
781ccf9
4f83382
e3f4fe0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -110,6 +110,11 @@ jobs: | |
| key: tests-results-${{ github.run_id }} | ||
| - name: Mount bpffs | ||
| run: mount bpffs /sys/fs/bpf -t bpf | ||
| - name: Install ast-grep | ||
| run: | | ||
| dnf install -y pipx python3-pyyaml | ||
| pipx install --global ast-grep-cli | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Claude: suggestion: |
||
| ast-grep --version | ||
| - name: Configure the build | ||
| run: cmake -S $GITHUB_WORKSPACE -B $GITHUB_WORKSPACE/build -DWITH_COVERAGE=1 | ||
| - name: Build tests | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -51,6 +51,56 @@ set_tests_properties("check.checkstyle" PROPERTIES | |
| LABELS "check" | ||
| ) | ||
|
|
||
| find_program(SG_BIN NAMES ast-grep) | ||
|
|
||
| set(SG_PYYAML_RC 1) | ||
| if(SG_BIN) | ||
| execute_process( | ||
| COMMAND python3 -c "import yaml" | ||
| RESULT_VARIABLE SG_PYYAML_RC | ||
| OUTPUT_QUIET ERROR_QUIET | ||
| ) | ||
| if(NOT SG_PYYAML_RC EQUAL 0) | ||
| message(WARNING | ||
| "ast-grep: python3 cannot import yaml; " | ||
| "skipping check.astgrep. Install python3-pyyaml (dnf) or " | ||
| "python3-yaml (apt) to enable it.") | ||
| endif() | ||
| endif() | ||
|
|
||
| if(SG_BIN AND SG_PYYAML_RC EQUAL 0) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Claude: suggestion: When set(SG_PYYAML_RC 1)
if(SG_BIN)
execute_process(...)
endif() |
||
| set(SG_CONFIG ${CMAKE_SOURCE_DIR}/tools/ast-grep/sgconfig.yml) | ||
| set(SG_KNOWN_ISSUES ${CMAKE_SOURCE_DIR}/tools/ast-grep/known_issues) | ||
| set(SG_FILTER ${CMAKE_SOURCE_DIR}/tools/ast-grep/ignore_known_issues.py) | ||
|
|
||
| add_test(NAME "check.astgrep" | ||
| COMMAND bash -c "set -o pipefail; \ | ||
| ${SG_BIN} test --config ${SG_CONFIG} && \ | ||
| ${SG_BIN} scan --config ${SG_CONFIG} --json=stream \ | ||
| ${CMAKE_SOURCE_DIR}/src/libbpfilter \ | ||
| ${CMAKE_SOURCE_DIR}/src/bfcli \ | ||
| | python3 ${SG_FILTER} --check \ | ||
| --known-issues-dir ${SG_KNOWN_ISSUES} \ | ||
| --repo-root ${CMAKE_SOURCE_DIR}" | ||
| ) | ||
|
|
||
| set_tests_properties("check.astgrep" PROPERTIES | ||
| LABELS "check" | ||
| ) | ||
|
|
||
| add_custom_target(astgrep-known-issues | ||
| COMMAND bash -c "set -o pipefail; \ | ||
| ${SG_BIN} scan --config ${SG_CONFIG} --json=stream \ | ||
| ${CMAKE_SOURCE_DIR}/src/libbpfilter \ | ||
| ${CMAKE_SOURCE_DIR}/src/bfcli \ | ||
| | python3 ${SG_FILTER} --regen \ | ||
| --known-issues-dir ${SG_KNOWN_ISSUES} \ | ||
| --repo-root ${CMAKE_SOURCE_DIR}" | ||
| COMMENT "Regenerating ast-grep known_issues/ from current scan" | ||
| VERBATIM | ||
| ) | ||
| endif() | ||
|
|
||
| add_custom_target(fixstyle | ||
| COMMAND | ||
| ${CLANG_FORMAT_BIN} | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,189 @@ | ||
| #!/usr/bin/env python3 | ||
| # SPDX-License-Identifier: GPL-2.0-only | ||
| # Copyright (c) 2026 Meta Platforms, Inc. and affiliates. | ||
|
|
||
| """Filter ast-grep scan output against per-rule known-issues baselines. | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This script is vibecoded, but it is a temporary thing for migration. Once we fix all linter issues, I want to delete it. |
||
|
|
||
| Two modes: | ||
| --check (default): read ast-grep JSON stream from stdin, drop matches that | ||
| appear in tools/ast-grep/known_issues/<rule>.yml, print | ||
| the remainder to stderr, exit nonzero iff the remainder | ||
| is non-empty. | ||
| --regen : same input, but overwrite the known_issues tree with | ||
| exactly the set of currently-observed violations. | ||
|
|
||
| The identity key for an entry is (file, textSha, occurrence). Rule is implicit | ||
| from the filename stem. See the header comment emitted at the top of each YAML | ||
| file for the schema. | ||
| """ | ||
|
|
||
| import argparse | ||
| import hashlib | ||
| import json | ||
| import pathlib | ||
| import sys | ||
| from collections import defaultdict | ||
|
|
||
| try: | ||
| import yaml | ||
| except ImportError: | ||
| sys.stderr.write( | ||
| "ERROR: PyYAML is required to run ignore_known_issues.py.\n" | ||
| " Fedora/RHEL : dnf install -y python3-pyyaml\n" | ||
| " Debian/Ubuntu: apt-get install -y python3-yaml\n" | ||
| " pip : pip install pyyaml\n" | ||
| ) | ||
| sys.exit(2) | ||
|
|
||
|
|
||
| HEADER_TEMPLATE = """\ | ||
| # Known issues for ast-grep rule `{rule}`. | ||
| # | ||
| # Suppressed during check.astgrep so only NEW violations fail CI. Regenerate | ||
| # this file (and all siblings) by running: make -C build astgrep-known-issues | ||
| # | ||
| # Schema: list of | ||
| # file: path relative to repo root | ||
| # textSha: first 12 hex chars of sha256(match.text) | ||
| # occurrence: 0-based index among identical (file, textSha) matches | ||
| # | ||
| # Do not hand-edit — the directory is regenerated deterministically. | ||
|
|
||
| """ | ||
|
|
||
|
|
||
| def parse_args(argv): | ||
| p = argparse.ArgumentParser(description=__doc__.splitlines()[0]) | ||
| mode = p.add_mutually_exclusive_group() | ||
| mode.add_argument("--check", action="store_true", | ||
| help="filter stdin against known_issues/ (default)") | ||
| mode.add_argument("--regen", action="store_true", | ||
| help="rewrite known_issues/ from stdin") | ||
| p.add_argument("--known-issues-dir", required=True, type=pathlib.Path, | ||
| help="path to the known_issues/ directory") | ||
| p.add_argument("--repo-root", required=True, type=pathlib.Path, | ||
| help="repo root used to emit relative paths in ast-grep output") | ||
| args = p.parse_args(argv) | ||
| if not args.regen: | ||
| args.check = True | ||
| return args | ||
|
|
||
|
|
||
| def read_observations(stream, repo_root): | ||
| prefix = str(repo_root).rstrip("/") + "/" | ||
| obs = [] | ||
| for raw in stream: | ||
| raw = raw.strip() | ||
| if not raw: | ||
| continue | ||
| d = json.loads(raw) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Claude: suggestion: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Claude: suggestion: try:
d = json.loads(raw)
except json.JSONDecodeError:
sys.stderr.write(f"warning: skipping non-JSON line: {raw!r}\n")
continue |
||
| rule = d["ruleId"] | ||
| path = d["file"] | ||
| if path.startswith(prefix): | ||
| path = path[len(prefix):] | ||
| text = d["text"] | ||
| text_sha = hashlib.sha256(text.encode("utf-8")).hexdigest()[:12] | ||
| byte_off = d["range"]["byteOffset"]["start"] | ||
| line_num = d["range"]["start"]["line"] + 1 | ||
| message = d.get("message", "") | ||
| obs.append({ | ||
| "rule": rule, | ||
| "file": path, | ||
| "textSha": text_sha, | ||
| "byteOffset": byte_off, | ||
| "line": line_num, | ||
| "message": message, | ||
| }) | ||
| return obs | ||
|
|
||
|
|
||
| def assign_occurrences(observations): | ||
| groups = defaultdict(list) | ||
| for o in observations: | ||
| groups[(o["rule"], o["file"], o["textSha"])].append(o) | ||
| out = [] | ||
| for items in groups.values(): | ||
| items.sort(key=lambda o: o["byteOffset"]) | ||
| for i, o in enumerate(items): | ||
| o["occurrence"] = i | ||
| out.append(o) | ||
| return out | ||
|
|
||
|
|
||
| def load_all_known_issues(directory): | ||
| known = {} | ||
| if not directory.exists(): | ||
| return known | ||
| for path in sorted(directory.glob("*.yml")): | ||
| with open(path) as f: | ||
| data = yaml.safe_load(f) or [] | ||
| rule = path.stem | ||
| known[rule] = {(e["file"], e["textSha"], e["occurrence"]) for e in data} | ||
| return known | ||
|
|
||
|
|
||
| def write_known_issues_file(path, rule, entries): | ||
| header = HEADER_TEMPLATE.format(rule=rule) | ||
| body = yaml.safe_dump(entries, default_flow_style=False, sort_keys=False, | ||
| width=200, allow_unicode=True) | ||
| path.write_text(header + body) | ||
|
|
||
|
|
||
| def cmd_check(args): | ||
| known = load_all_known_issues(args.known_issues_dir) | ||
| observed = assign_occurrences(read_observations(sys.stdin, args.repo_root)) | ||
| leftovers = [] | ||
| for o in observed: | ||
| key = (o["file"], o["textSha"], o["occurrence"]) | ||
| if key in known.get(o["rule"], set()): | ||
| continue | ||
| leftovers.append(o) | ||
| if not leftovers: | ||
| return 0 | ||
| leftovers.sort(key=lambda o: (o["file"], o["line"], o["rule"])) | ||
| for o in leftovers: | ||
| sys.stderr.write( | ||
| f"{o['file']}:{o['line']} [{o['rule']}] {o['message']}\n" | ||
| ) | ||
| sys.stderr.write( | ||
| f"\n{len(leftovers)} new ast-grep violation(s) not in known_issues/. " | ||
| "Run `make -C build astgrep-known-issues` to refresh the baseline " | ||
| "once these are intentional.\n" | ||
| ) | ||
| return 1 | ||
|
|
||
|
|
||
| def cmd_regen(args): | ||
| observed = assign_occurrences(read_observations(sys.stdin, args.repo_root)) | ||
| by_rule = defaultdict(list) | ||
| for o in observed: | ||
| by_rule[o["rule"]].append({ | ||
| "file": o["file"], | ||
| "textSha": o["textSha"], | ||
| "occurrence": o["occurrence"], | ||
| }) | ||
| for rule in by_rule: | ||
| by_rule[rule].sort( | ||
| key=lambda e: (e["file"], e["textSha"], e["occurrence"]) | ||
| ) | ||
| args.known_issues_dir.mkdir(parents=True, exist_ok=True) | ||
| existing = {p.stem: p for p in args.known_issues_dir.glob("*.yml")} | ||
| for stem, path in existing.items(): | ||
| if stem not in by_rule: | ||
| path.unlink() | ||
| for rule, entries in by_rule.items(): | ||
| write_known_issues_file( | ||
| args.known_issues_dir / f"{rule}.yml", rule, entries | ||
| ) | ||
| return 0 | ||
|
|
||
|
|
||
| def main(argv): | ||
| args = parse_args(argv) | ||
| if args.regen: | ||
| return cmd_regen(args) | ||
| return cmd_check(args) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| sys.exit(main(sys.argv[1:])) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| # ast-grep known issues | ||
|
|
||
| Per-rule baselines of currently-known violations. One YAML file per rule that | ||
| has at least one violation in the tree; filename is `<rule-id>.yml`. | ||
|
|
||
| `check.astgrep` subtracts these entries from each scan so only NEW violations | ||
| fail CI. To refresh the baseline after fixing (or intentionally introducing) | ||
| violations, run: | ||
|
|
||
| make -C build astgrep-known-issues | ||
|
|
||
| The directory is regenerated deterministically — do not hand-edit. Entries in | ||
| a file for which no current violation matches are silently ignored, so stale | ||
| entries never break CI; they are cleaned up on the next regeneration. | ||
|
|
||
| See `../ignore_known_issues.py` for the filter implementation. | ||
|
|
||
| ## Severity interaction | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Claude: nit: The README states that |
||
| This suppression mechanism only works for rules declared `severity: warning`. | ||
| `ast-grep scan` exits non-zero on any `severity: error` match regardless of | ||
| the baseline, and `set -o pipefail` on the CI command then fails the pipeline | ||
| before `ignore_known_issues.py` gets a chance to subtract known entries. A | ||
| rule promoted to `error` is therefore zero-tolerance: new violations must be | ||
| fixed (or the rule's exclusion regex extended), not baselined. | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Claude: suggestion: Consider baking
pipx,python3-pyyaml, andast-grep-cliinto the Fedora Dockerfile rather than installing at CI runtime. Other build dependencies (clang-format, cmocka, etc.) are already pre-installed in the container image. Installing at runtime adds network latency and a failure point on every run.