diff --git a/.github/workflows/allowed-symbols.yml b/.github/workflows/analysis.yml similarity index 68% rename from .github/workflows/allowed-symbols.yml rename to .github/workflows/analysis.yml index 9a341035..c0f0241e 100644 --- a/.github/workflows/allowed-symbols.yml +++ b/.github/workflows/analysis.yml @@ -1,4 +1,4 @@ -name: Allowed Symbols Verification +name: Static Analysis Verification # Use pull_request_target so the workflow definition always comes from the # base branch (main). This prevents a PR from editing the workflow file to @@ -14,8 +14,8 @@ permissions: pull-requests: read jobs: - check-allowed-symbols: - name: Allowed Symbols Label Check + check-analysis: + name: Static Analysis Label Check runs-on: ubuntu-latest timeout-minutes: 10 steps: @@ -28,27 +28,27 @@ jobs: - name: Fetch PR head run: git fetch origin "refs/pull/${{ github.event.pull_request.number }}/head" - - name: Check for allowedsymbols changes + - name: Check for analysis changes id: changes run: | - changed=$(git diff --name-only "${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }}" -- 'allowedsymbols/') + changed=$(git diff --name-only "${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }}" -- 'analysis/') if [ -n "$changed" ]; then echo "modified=true" >> "$GITHUB_OUTPUT" - echo "Changed allowedsymbols files:" + echo "Changed analysis files:" echo "$changed" else echo "modified=false" >> "$GITHUB_OUTPUT" - echo "No allowedsymbols files changed." + echo "No analysis files changed." fi - - name: Require verified/allowed_symbols label + - name: Require verified/analysis label if: steps.changes.outputs.modified == 'true' env: PR_LABELS: ${{ toJSON(github.event.pull_request.labels.*.name) }} run: | - if echo "$PR_LABELS" | jq -e 'index("verified/allowed_symbols")' > /dev/null 2>&1; then - echo "Label 'verified/allowed_symbols' is present. Check passed." + if echo "$PR_LABELS" | jq -e 'index("verified/analysis") // index("verified/allowed_symbols")' > /dev/null 2>&1; then + echo "Label present. Check passed." else - echo "::error::Files in allowedsymbols/ have been modified. A human must review the changes and add the 'verified/allowed_symbols' label to this PR before it can be merged." + echo "::error::Files in analysis/ have been modified. A human must review the changes and add the 'verified/analysis' label to this PR before it can be merged." exit 1 fi diff --git a/allowedsymbols/README.md b/analysis/README.md similarity index 51% rename from allowedsymbols/README.md rename to analysis/README.md index ccca2139..baa207ff 100644 --- a/allowedsymbols/README.md +++ b/analysis/README.md @@ -1,4 +1,4 @@ -# allowedsymbols +# analysis Static enforcement of symbol-level import restrictions for the rshell interpreter. @@ -56,8 +56,70 @@ The tests in this package use Go's `go/parser` and `go/ast` to walk source files Verification tests additionally inject banned imports or unlisted symbols into a temporary copy of the repo and assert the checker catches them. +## Structural Rules + +In addition to symbol-level allowlist checking, the package enforces **structural rules** — code patterns that must (or must not) appear together in the same function scope. These are checked by `checkFileScannerBuffer` and `checkFileOpenFileClose` in `structural.go` and are applied automatically to every file that passes through `checkAllowedSymbols`. + +Both rules are also exposed as standalone `go/analysis` analyzers (`ScannerBufferAnalyzer`, `OpenFileCloseAnalyzer`) that can be registered with `go vet` or gopls. + +### Rule 1 — `bufio.NewScanner` must call `.Buffer()` + +**Why:** `bufio.Scanner` has a fixed default buffer of 64 KiB. Any line longer than that causes `Scanner.Scan()` to return `false` and `Scanner.Err()` to return `bufio.ErrTooLong`. In a shell that must handle arbitrary user input this is a reliability and DoS risk — a single long line silently truncates or aborts processing. + +**What is checked:** Every variable assigned from `bufio.NewScanner(...)` must have `.Buffer(...)` called on it within the same function body. Nested function literals (`func() { ... }`) are treated as independent scopes. + +**Compliant:** +```go +sc := bufio.NewScanner(r) +sc.Buffer(make([]byte, 4096), maxLineBytes) +for sc.Scan() { ... } +``` + +**Violation:** +```go +sc := bufio.NewScanner(r) // flagged: no sc.Buffer() call +for sc.Scan() { ... } +``` + +### Rule 2 — `callCtx.OpenFile` results must be closed + +**Why:** Every open file descriptor consumes a kernel resource. Over repeated script executions, unclosed handles exhaust the process file-descriptor limit and cause all subsequent I/O to fail. + +**What is checked:** Every variable assigned from a `.OpenFile(...)` call (any receiver — the check matches the method name, not the receiver type) must have `.Close()` called on it — directly or via `defer` — within the same function body. The checker also tracks **hand-off** assignments: if `f` is reassigned to `rc` and `rc.Close()` is called, `f` is considered closed. + +**Compliant — direct close:** +```go +f, err := callCtx.OpenFile(ctx, path, os.O_RDONLY, 0) +if err != nil { return err } +defer f.Close() +``` + +**Compliant — hand-off pattern:** +```go +f, err := callCtx.OpenFile(ctx, path, os.O_RDONLY, 0) +if err != nil { return err } +rc = f // hand off to rc +defer rc.Close() // closes f transitively +``` + +**Compliant — return ownership transfer:** +```go +func openHelper(callCtx cc) (io.ReadCloser, error) { + f, err := callCtx.OpenFile(ctx, path, os.O_RDONLY, 0) + if err != nil { return nil, err } + return f, nil // caller is responsible for closing +} +``` + +**Violation:** +```go +f, err := callCtx.OpenFile(ctx, path, os.O_RDONLY, 0) +if err != nil { return err } +_ = f // flagged: f is never closed +``` + ## Adding a New Symbol 1. Add a line to the appropriate allowlist (and the per-command sublist if it's a builtin). 2. Prefix the comment with the correct safety emoji. -3. Run `go test ./allowedsymbols/` to verify the entry is valid and used. +3. Run `go test ./analysis/` to verify the entry is valid and used. diff --git a/allowedsymbols/analyzer.go b/analysis/analyzer.go similarity index 92% rename from allowedsymbols/analyzer.go rename to analysis/analyzer.go index 6afb25ea..c4bbbfc8 100644 --- a/allowedsymbols/analyzer.go +++ b/analysis/analyzer.go @@ -3,14 +3,14 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2026-present Datadog, Inc. -// Package allowedsymbols provides a go/analysis.Analyzer that enforces +// Package analysis provides a go/analysis.Analyzer that enforces // symbol-level import restrictions on Go source files. // // The analyzer checks that every imported symbol is in a given allowlist, that // no permanently banned packages are imported, and that every symbol in the // allowlist is actually used. It reports violations with file:line:col // diagnostics. -package allowedsymbols +package analysis import ( "fmt" @@ -39,10 +39,16 @@ type AnalyzerConfig struct { // // NewAnalyzer panics if any entry in cfg.Symbols is malformed (no dot // separator), matching the behaviour of the test-harness variant. +// +// NOTE: This analyzer only enforces symbol-level allowlist restrictions. For +// full static analysis coverage, callers should also register +// ScannerBufferAnalyzer and OpenFileCloseAnalyzer alongside this one. The +// test-harness path (checkAllowedSymbols) already applies all three checks +// automatically. func NewAnalyzer(cfg AnalyzerConfig) *analysis.Analyzer { for _, entry := range cfg.Symbols { if strings.LastIndexByte(entry, '.') <= 0 { - panic(fmt.Sprintf("allowedsymbols.NewAnalyzer: malformed allowlist entry (no dot): %q", entry)) + panic(fmt.Sprintf("analysis.NewAnalyzer: malformed allowlist entry (no dot): %q", entry)) } } @@ -74,7 +80,7 @@ func NewAnalyzer(cfg AnalyzerConfig) *analysis.Analyzer { } return &analysis.Analyzer{ - Name: "allowedsymbols", + Name: "analysis", Doc: "enforces symbol-level import restrictions via an allowlist", Run: run, } diff --git a/allowedsymbols/analyzer_test.go b/analysis/analyzer_test.go similarity index 95% rename from allowedsymbols/analyzer_test.go rename to analysis/analyzer_test.go index e192822c..08110014 100644 --- a/allowedsymbols/analyzer_test.go +++ b/analysis/analyzer_test.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2026-present Datadog, Inc. -package allowedsymbols +package analysis import ( "testing" @@ -18,8 +18,8 @@ func TestNewAnalyzer_Metadata(t *testing.T) { Symbols: []string{"fmt.Println"}, ListName: "test", }) - if a.Name != "allowedsymbols" { - t.Errorf("Name = %q, want \"allowedsymbols\"", a.Name) + if a.Name != "analysis" { + t.Errorf("Name = %q, want \"analysis\"", a.Name) } if len(a.Requires) != 0 { t.Errorf("Requires has %d entries, want 0", len(a.Requires)) diff --git a/analysis/structural.go b/analysis/structural.go new file mode 100644 index 00000000..3444fd81 --- /dev/null +++ b/analysis/structural.go @@ -0,0 +1,257 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package analysis + +import ( + "go/ast" + "go/token" + + "golang.org/x/tools/go/analysis" +) + +// ScannerBufferAnalyzer checks that every bufio.NewScanner call in the +// analyzed package has a corresponding .Buffer() call on the returned value +// within the same function scope. Without Buffer(), the scanner uses a fixed +// 64 KiB internal buffer and fails on lines longer than that — a reliability +// and DoS risk for builtins that must handle arbitrary input. +var ScannerBufferAnalyzer = &analysis.Analyzer{ + Name: "scannerbuffer", + Doc: "checks that bufio.NewScanner results have Buffer() called to set a bounded read buffer", + Run: runScannerBuffer, +} + +// OpenFileCloseAnalyzer checks that every callCtx.OpenFile call result that +// is assigned to a variable has a corresponding .Close() call (direct or via +// defer) within the same function scope. Unclosed file handles exhaust file +// descriptors over repeated script executions. +var OpenFileCloseAnalyzer = &analysis.Analyzer{ + Name: "openfileclose", + Doc: "checks that callCtx.OpenFile results are always closed within the same function", + Run: runOpenFileClose, +} + +func runScannerBuffer(pass *analysis.Pass) (any, error) { + for _, f := range pass.Files { + checkFileScannerBuffer(f, func(pos token.Pos, format string, args ...any) { + pass.Reportf(pos, format, args...) + }) + } + return nil, nil +} + +func runOpenFileClose(pass *analysis.Pass) (any, error) { + for _, f := range pass.Files { + checkFileOpenFileClose(f, func(pos token.Pos, format string, args ...any) { + pass.Reportf(pos, format, args...) + }) + } + return nil, nil +} + +// checkFileScannerBuffer enforces the Scanner.Buffer() rule on a single file. +// It is also called directly by the test harness in symbols_test.go. +func checkFileScannerBuffer(f *ast.File, report func(pos token.Pos, format string, args ...any)) { + forEachFuncBody(f, func(body *ast.BlockStmt) { + type scannerVar struct { + pos token.Pos + name string + } + var scanners []scannerVar + buffered := make(map[string]bool) + + inspectBody(body, func(n ast.Node) { + switch node := n.(type) { + case *ast.AssignStmt: + // Detect: x := bufio.NewScanner(...) + for i, rhs := range node.Rhs { + if !isCall(rhs, "bufio", "NewScanner") { + continue + } + if i < len(node.Lhs) { + if id, ok := node.Lhs[i].(*ast.Ident); ok && id.Name != "_" { + scanners = append(scanners, scannerVar{pos: rhs.Pos(), name: id.Name}) + } + } + } + case *ast.CallExpr: + // Detect: x.Buffer(...) + if sel, ok := node.Fun.(*ast.SelectorExpr); ok && sel.Sel.Name == "Buffer" { + if id, ok := sel.X.(*ast.Ident); ok { + buffered[id.Name] = true + } + } + } + }) + + for _, sc := range scanners { + if !buffered[sc.name] { + report(sc.pos, + "bufio.NewScanner result %q must have .Buffer() called to cap the maximum line size (see analysis/README.md §Structural Rules)", + sc.name) + } + } + }) +} + +// checkFileOpenFileClose enforces the OpenFile-must-be-closed rule on a +// single file. It is also called directly by the test harness. +// +// The rule accounts for the common hand-off pattern: +// +// f, err := callCtx.OpenFile(...) +// rc = f // hand off to rc +// defer rc.Close() // closes f transitively +func checkFileOpenFileClose(f *ast.File, report func(pos token.Pos, format string, args ...any)) { + forEachFuncBody(f, func(body *ast.BlockStmt) { + type openVar struct { + pos token.Pos + name string + } + // NOTE: opens is keyed on variable name as a string. If the same name is + // reused for two successive OpenFile calls without closing the first, a + // single Close() call will satisfy both entries and the second leak will + // not be flagged. Use distinct variable names for successive file opens + // in the same scope to ensure reliable detection. + var opens []openVar + closed := make(map[string]bool) + // handOff maps a "holder" variable name to the original variable it was + // assigned from. Closing the holder counts as closing the original. + handOff := make(map[string]string) // holder → original + // returned tracks variables that appear in any return statement. + // Returning a file handle transfers ownership to the caller, so no + // Close() is required in the current scope. + // + // NOTE: This check is path-insensitive. If a variable appears in a + // return statement on one branch (e.g. the happy path) but is leaked on + // another branch (e.g. a subsequent early-return), this checker will NOT + // flag it. Full path-sensitive analysis requires CFG-based data flow, + // which is beyond the scope of this AST-only checker. + returned := make(map[string]bool) + + inspectBody(body, func(n ast.Node) { + switch node := n.(type) { + case *ast.AssignStmt: + for i, rhs := range node.Rhs { + // Detect: f, err := .OpenFile(...) + if call, ok := rhs.(*ast.CallExpr); ok { + if sel, ok := call.Fun.(*ast.SelectorExpr); ok && sel.Sel.Name == "OpenFile" { + if i < len(node.Lhs) { + if id, ok := node.Lhs[i].(*ast.Ident); ok && id.Name != "_" { + opens = append(opens, openVar{pos: rhs.Pos(), name: id.Name}) + } + } + } + } + // Detect: rc = f (hand-off from OpenFile result to another var). + // Works for both := and = assignments. + if rhsId, ok := rhs.(*ast.Ident); ok && i < len(node.Lhs) { + if lhsId, ok := node.Lhs[i].(*ast.Ident); ok && lhsId.Name != "_" { + handOff[lhsId.Name] = rhsId.Name + } + } + } + case *ast.ReturnStmt: + // Detect: return f, nil — caller takes ownership, no Close needed here. + for _, result := range node.Results { + if id, ok := result.(*ast.Ident); ok { + returned[id.Name] = true + } + } + case *ast.CallExpr: + // Detect: f.Close() or defer f.Close() (the defer wrapper is + // stripped by inspectBody before the CallExpr reaches here). + if sel, ok := node.Fun.(*ast.SelectorExpr); ok && sel.Sel.Name == "Close" { + if id, ok := sel.X.(*ast.Ident); ok { + closed[id.Name] = true + } + } + } + }) + + for _, ov := range opens { + if !isClosedTransitive(ov.name, closed, handOff) && !returned[ov.name] { + report(ov.pos, + "OpenFile result %q must be closed via defer or explicit Close() call (see analysis/README.md §Structural Rules)", + ov.name) + } + } + }) +} + +// isClosedTransitive returns true if name is closed directly or transitively +// through a chain of hand-off assignments. It performs a breadth-first search +// over the handOff graph to handle chains of arbitrary depth, e.g.: +// +// f → a → b → Close() (handOff["a"]="f", handOff["b"]="a", closed["b"]=true) +func isClosedTransitive(name string, closed map[string]bool, handOff map[string]string) bool { + visited := make(map[string]bool) + queue := []string{name} + for len(queue) > 0 { + cur := queue[0] + queue = queue[1:] + if visited[cur] { + continue + } + visited[cur] = true + if closed[cur] { + return true + } + // Enqueue all variables that were assigned from cur. + for holder, orig := range handOff { + if orig == cur && !visited[holder] { + queue = append(queue, holder) + } + } + } + return false +} + +// forEachFuncBody calls fn for every function body in f — both top-level +// FuncDecl bodies and FuncLit bodies — so each scope is checked independently. +func forEachFuncBody(f *ast.File, fn func(*ast.BlockStmt)) { + ast.Inspect(f, func(n ast.Node) bool { + switch node := n.(type) { + case *ast.FuncDecl: + if node.Body != nil { + fn(node.Body) + } + case *ast.FuncLit: + fn(node.Body) + } + return true + }) +} + +// inspectBody walks body without recursing into nested FuncLit nodes (which +// are treated as independent scopes by forEachFuncBody). fn is called for +// every non-FuncLit node encountered. +func inspectBody(body *ast.BlockStmt, fn func(ast.Node)) { + ast.Inspect(body, func(n ast.Node) bool { + if n == nil { + return false + } + if _, ok := n.(*ast.FuncLit); ok { + return false // handled as its own scope by forEachFuncBody + } + fn(n) + return true + }) +} + +// isCall returns true if expr is a call to pkg.Name (using the local package +// alias name, e.g. "bufio" for import "bufio"). +func isCall(expr ast.Expr, localPkg, name string) bool { + call, ok := expr.(*ast.CallExpr) + if !ok { + return false + } + sel, ok := call.Fun.(*ast.SelectorExpr) + if !ok { + return false + } + id, ok := sel.X.(*ast.Ident) + return ok && id.Name == localPkg && sel.Sel.Name == name +} diff --git a/analysis/structural_test.go b/analysis/structural_test.go new file mode 100644 index 00000000..69758e16 --- /dev/null +++ b/analysis/structural_test.go @@ -0,0 +1,296 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package analysis + +import ( + "go/parser" + "go/token" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --- ScannerBuffer rule tests --- + +func TestScannerBufferClean(t *testing.T) { + // Scanner with Buffer() called — must not be flagged. + fset := token.NewFileSet() + src := `package p +import "bufio" +func run() { + sc := bufio.NewScanner(nil) + sc.Buffer(make([]byte, 4096), 1<<20) + _ = sc.Scan() +} +` + f, err := parser.ParseFile(fset, "clean.go", src, 0) + require.NoError(t, err) + + var violations []string + checkFileScannerBuffer(f, func(_ token.Pos, format string, _ ...any) { + violations = append(violations, format) + }) + assert.Empty(t, violations, "Scanner with Buffer() should not be flagged") +} + +func TestScannerBufferMissing(t *testing.T) { + // Scanner without Buffer() — must be flagged. + fset := token.NewFileSet() + src := `package p +import "bufio" +func run() { + sc := bufio.NewScanner(nil) + _ = sc.Scan() +} +` + f, err := parser.ParseFile(fset, "missing.go", src, 0) + require.NoError(t, err) + + var violations []string + checkFileScannerBuffer(f, func(_ token.Pos, format string, _ ...any) { + violations = append(violations, format) + }) + assert.Len(t, violations, 1, "Scanner without Buffer() must be flagged") +} + +func TestScannerBufferInsideFuncLit(t *testing.T) { + // Scanner inside a func literal without Buffer() — must be flagged as its own scope. + fset := token.NewFileSet() + src := `package p +import "bufio" +func run() { + fn := func() { + sc := bufio.NewScanner(nil) + _ = sc.Scan() + } + fn() +} +` + f, err := parser.ParseFile(fset, "funlit.go", src, 0) + require.NoError(t, err) + + var violations []string + checkFileScannerBuffer(f, func(_ token.Pos, format string, _ ...any) { + violations = append(violations, format) + }) + assert.Len(t, violations, 1, "Scanner inside func literal without Buffer() must be flagged") +} + +func TestScannerBufferOuterCleanInnerMissing(t *testing.T) { + // Outer function has Buffer(), inner func literal does not — only inner flagged. + fset := token.NewFileSet() + src := `package p +import "bufio" +func run() { + sc := bufio.NewScanner(nil) + sc.Buffer(make([]byte, 4096), 1<<20) + _ = sc.Scan() + + inner := func() { + sc2 := bufio.NewScanner(nil) + _ = sc2.Scan() + } + inner() +} +` + f, err := parser.ParseFile(fset, "mixed.go", src, 0) + require.NoError(t, err) + + var violations []string + checkFileScannerBuffer(f, func(_ token.Pos, format string, _ ...any) { + violations = append(violations, format) + }) + assert.Len(t, violations, 1, "only the inner scanner without Buffer() must be flagged") +} + +// --- OpenFileClose rule tests --- + +func TestOpenFileCloseClean(t *testing.T) { + // File opened and closed via defer — must not be flagged. + fset := token.NewFileSet() + src := `package p +import ("context"; "os") +type cc struct{} +func (cc) OpenFile(ctx context.Context, path string, flags int, mode os.FileMode) (interface{ Read([]byte)(int,error); Close() error }, error) { return nil, nil } +func run(callCtx cc) { + f, err := callCtx.OpenFile(context.Background(), "x", os.O_RDONLY, 0) + if err != nil { return } + defer f.Close() +} +` + f, err := parser.ParseFile(fset, "clean.go", src, 0) + require.NoError(t, err) + + var violations []string + checkFileOpenFileClose(f, func(_ token.Pos, format string, _ ...any) { + violations = append(violations, format) + }) + assert.Empty(t, violations, "OpenFile with defer f.Close() should not be flagged") +} + +func TestOpenFileCloseHandOff(t *testing.T) { + // File opened, handed off to rc, then rc closed — must not be flagged. + fset := token.NewFileSet() + src := `package p +import ("context"; "io"; "os") +type cc struct{} +func (cc) OpenFile(ctx context.Context, path string, flags int, mode os.FileMode) (interface{ Read([]byte)(int,error); Close() error }, error) { return nil, nil } +func run(callCtx cc) { + var rc io.ReadCloser + f, err := callCtx.OpenFile(context.Background(), "x", os.O_RDONLY, 0) + if err != nil { return } + rc = f + defer rc.Close() +} +` + f, err := parser.ParseFile(fset, "handoff.go", src, 0) + require.NoError(t, err) + + var violations []string + checkFileOpenFileClose(f, func(_ token.Pos, format string, _ ...any) { + violations = append(violations, format) + }) + assert.Empty(t, violations, "OpenFile handed off to rc and rc closed should not be flagged") +} + +func TestOpenFileCloseMissing(t *testing.T) { + // File opened but never closed — must be flagged. + fset := token.NewFileSet() + src := `package p +import ("context"; "os") +type cc struct{} +func (cc) OpenFile(ctx context.Context, path string, flags int, mode os.FileMode) (interface{ Read([]byte)(int,error); Close() error }, error) { return nil, nil } +func run(callCtx cc) { + f, err := callCtx.OpenFile(context.Background(), "x", os.O_RDONLY, 0) + if err != nil { return } + _ = f +} +` + f, err := parser.ParseFile(fset, "missing.go", src, 0) + require.NoError(t, err) + + var violations []string + checkFileOpenFileClose(f, func(_ token.Pos, format string, _ ...any) { + violations = append(violations, format) + }) + assert.Len(t, violations, 1, "OpenFile result not closed must be flagged") +} + +func TestOpenFileCloseReturnTransfer(t *testing.T) { + // File opened then returned to caller — must not be flagged. + fset := token.NewFileSet() + src := `package p +import ("context"; "os"; "io") +type cc struct{} +func (cc) OpenFile(ctx context.Context, path string, flags int, mode os.FileMode) (interface{ Read([]byte)(int,error); Close() error }, error) { return nil, nil } +func open(callCtx cc) (io.ReadCloser, error) { + f, err := callCtx.OpenFile(context.Background(), "x", os.O_RDONLY, 0) + if err != nil { return nil, err } + return f, nil +} +` + f, err := parser.ParseFile(fset, "return.go", src, 0) + require.NoError(t, err) + + var violations []string + checkFileOpenFileClose(f, func(_ token.Pos, format string, _ ...any) { + violations = append(violations, format) + }) + assert.Empty(t, violations, "OpenFile result returned to caller should not be flagged") +} + +func TestOpenFileCloseChainedHandOff(t *testing.T) { + // f → a → b → Close(): two-hop chain must be detected. + fset := token.NewFileSet() + src := `package p +import ("context"; "io"; "os") +type cc struct{} +func (cc) OpenFile(ctx context.Context, path string, flags int, mode os.FileMode) (interface{ Read([]byte)(int,error); Close() error }, error) { return nil, nil } +func run(callCtx cc) { + f, err := callCtx.OpenFile(context.Background(), "x", os.O_RDONLY, 0) + if err != nil { return } + var a io.ReadCloser + a = f + var b io.ReadCloser + b = a + defer b.Close() +} +` + f, err := parser.ParseFile(fset, "chain.go", src, 0) + require.NoError(t, err) + + var violations []string + checkFileOpenFileClose(f, func(_ token.Pos, format string, _ ...any) { + violations = append(violations, format) + }) + assert.Empty(t, violations, "two-hop hand-off chain closed at end must not be flagged") +} + +// TestOpenFileCloseKnownLimitationMultiBranchReturn documents a known +// path-insensitivity limitation: if a variable appears in a return statement +// on any branch, the checker treats the whole variable as "returned" and will +// not flag it even if another branch leaks it. Fixing this requires CFG-based +// data-flow analysis beyond the scope of this AST-only checker. +func TestOpenFileCloseKnownLimitationMultiBranchReturn(t *testing.T) { + fset := token.NewFileSet() + src := `package p +import ("context"; "io"; "os") +type cc struct{} +func (cc) OpenFile(ctx context.Context, path string, flags int, mode os.FileMode) (interface{ Read([]byte)(int,error); Close() error }, error) { return nil, nil } +func run(callCtx cc, cond bool) (io.ReadCloser, error) { + f, err := callCtx.OpenFile(context.Background(), "x", os.O_RDONLY, 0) + if err != nil { return nil, err } + if cond { + return f, nil // f is returned here + } + // f is leaked here (no close, no return) — NOT caught due to path-insensitivity + return nil, nil +} +` + f, err := parser.ParseFile(fset, "multibranch.go", src, 0) + require.NoError(t, err) + + var violations []string + checkFileOpenFileClose(f, func(_ token.Pos, format string, _ ...any) { + violations = append(violations, format) + }) + // Known false negative: the checker sees "f" in a return statement and + // exempts it globally. This test documents the gap rather than asserting + // correct behaviour. + assert.Empty(t, violations, "known limitation: path-insensitive return exemption produces false negative") +} + +// TestOpenFileCloseKnownLimitationNameReuse documents a known limitation: +// if the same variable name is reused for two successive OpenFile results, +// a single Close() satisfies both entries in the checker. +func TestOpenFileCloseKnownLimitationNameReuse(t *testing.T) { + fset := token.NewFileSet() + src := `package p +import ("context"; "os") +type cc struct{} +func (cc) OpenFile(ctx context.Context, path string, flags int, mode os.FileMode) (interface{ Read([]byte)(int,error); Close() error }, error) { return nil, nil } +func run(callCtx cc) { + f, err := callCtx.OpenFile(context.Background(), "x", os.O_RDONLY, 0) + if err != nil { return } + defer f.Close() + // Re-using f for a second OpenFile — the second handle is never closed. + f, err = callCtx.OpenFile(context.Background(), "y", os.O_RDONLY, 0) + if err != nil { return } + _ = f +} +` + f, err := parser.ParseFile(fset, "reuse.go", src, 0) + require.NoError(t, err) + + var violations []string + checkFileOpenFileClose(f, func(_ token.Pos, format string, _ ...any) { + violations = append(violations, format) + }) + // Known false negative: closed["f"]=true from the first defer satisfies + // both opens entries. Use distinct variable names in production code. + assert.Empty(t, violations, "known limitation: name reuse produces false negative") +} diff --git a/allowedsymbols/symbols_allowedpaths.go b/analysis/symbols_allowedpaths.go similarity index 99% rename from allowedsymbols/symbols_allowedpaths.go rename to analysis/symbols_allowedpaths.go index cfd69932..aac1dd5a 100644 --- a/allowedsymbols/symbols_allowedpaths.go +++ b/analysis/symbols_allowedpaths.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2026-present Datadog, Inc. -package allowedsymbols +package analysis // allowedpathsAllowedSymbols lists every "importpath.Symbol" that may be used // by non-test Go files in allowedpaths/. Each entry must be in diff --git a/allowedsymbols/symbols_allowedpaths_test.go b/analysis/symbols_allowedpaths_test.go similarity index 98% rename from allowedsymbols/symbols_allowedpaths_test.go rename to analysis/symbols_allowedpaths_test.go index 68efa415..bf552fd2 100644 --- a/allowedsymbols/symbols_allowedpaths_test.go +++ b/analysis/symbols_allowedpaths_test.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2026-present Datadog, Inc. -package allowedsymbols +package analysis import ( "strings" diff --git a/allowedsymbols/symbols_allowedpaths_verification_test.go b/analysis/symbols_allowedpaths_verification_test.go similarity index 98% rename from allowedsymbols/symbols_allowedpaths_verification_test.go rename to analysis/symbols_allowedpaths_verification_test.go index fa6591c0..e994e0d1 100644 --- a/allowedsymbols/symbols_allowedpaths_verification_test.go +++ b/analysis/symbols_allowedpaths_verification_test.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2026-present Datadog, Inc. -package allowedsymbols +package analysis import ( "path/filepath" diff --git a/allowedsymbols/symbols_builtins.go b/analysis/symbols_builtins.go similarity index 99% rename from allowedsymbols/symbols_builtins.go rename to analysis/symbols_builtins.go index 707125db..bd063a52 100644 --- a/allowedsymbols/symbols_builtins.go +++ b/analysis/symbols_builtins.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2026-present Datadog, Inc. -package allowedsymbols +package analysis // builtinAllowedSymbols lists every "importpath.Symbol" that may be used by // command implementation files in builtins/. Each entry must be in diff --git a/allowedsymbols/symbols_builtins_test.go b/analysis/symbols_builtins_test.go similarity index 99% rename from allowedsymbols/symbols_builtins_test.go rename to analysis/symbols_builtins_test.go index 3ff58e5c..574cf0e1 100644 --- a/allowedsymbols/symbols_builtins_test.go +++ b/analysis/symbols_builtins_test.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2026-present Datadog, Inc. -package allowedsymbols +package analysis import ( "strings" diff --git a/allowedsymbols/symbols_builtins_verification_test.go b/analysis/symbols_builtins_verification_test.go similarity index 99% rename from allowedsymbols/symbols_builtins_verification_test.go rename to analysis/symbols_builtins_verification_test.go index cdf8e090..a56f1a0d 100644 --- a/allowedsymbols/symbols_builtins_verification_test.go +++ b/analysis/symbols_builtins_verification_test.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2026-present Datadog, Inc. -package allowedsymbols +package analysis import ( "path/filepath" diff --git a/allowedsymbols/symbols_common.go b/analysis/symbols_common.go similarity index 98% rename from allowedsymbols/symbols_common.go rename to analysis/symbols_common.go index 2fde4e0b..3fe80204 100644 --- a/allowedsymbols/symbols_common.go +++ b/analysis/symbols_common.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2026-present Datadog, Inc. -package allowedsymbols +package analysis // permanentlyBanned lists packages that may never be imported, // regardless of what symbols they export. diff --git a/allowedsymbols/symbols_internal.go b/analysis/symbols_internal.go similarity index 99% rename from allowedsymbols/symbols_internal.go rename to analysis/symbols_internal.go index 9237cc24..0b73ca0a 100644 --- a/allowedsymbols/symbols_internal.go +++ b/analysis/symbols_internal.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2026-present Datadog, Inc. -package allowedsymbols +package analysis // internalPerPackageSymbols maps each builtins/internal/ name to the // symbols it is allowed to use. Every symbol listed here must also appear in diff --git a/allowedsymbols/symbols_interp.go b/analysis/symbols_interp.go similarity index 99% rename from allowedsymbols/symbols_interp.go rename to analysis/symbols_interp.go index 810e9bdf..ffd2ff8d 100644 --- a/allowedsymbols/symbols_interp.go +++ b/analysis/symbols_interp.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2026-present Datadog, Inc. -package allowedsymbols +package analysis // interpAllowedSymbols lists every "importpath.Symbol" that may be used by // non-test Go files in interp/. Each entry must be in "importpath.Symbol" diff --git a/allowedsymbols/symbols_interp_test.go b/analysis/symbols_interp_test.go similarity index 98% rename from allowedsymbols/symbols_interp_test.go rename to analysis/symbols_interp_test.go index f5190043..49c72f94 100644 --- a/allowedsymbols/symbols_interp_test.go +++ b/analysis/symbols_interp_test.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2026-present Datadog, Inc. -package allowedsymbols +package analysis import ( "strings" diff --git a/allowedsymbols/symbols_interp_verification_test.go b/analysis/symbols_interp_verification_test.go similarity index 99% rename from allowedsymbols/symbols_interp_verification_test.go rename to analysis/symbols_interp_verification_test.go index 8b0152c3..84a5e5cf 100644 --- a/allowedsymbols/symbols_interp_verification_test.go +++ b/analysis/symbols_interp_verification_test.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2026-present Datadog, Inc. -package allowedsymbols +package analysis import ( "path/filepath" diff --git a/allowedsymbols/symbols_test.go b/analysis/symbols_test.go similarity index 97% rename from allowedsymbols/symbols_test.go rename to analysis/symbols_test.go index 3a1c6342..6fb8789a 100644 --- a/allowedsymbols/symbols_test.go +++ b/analysis/symbols_test.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2026-present Datadog, Inc. -package allowedsymbols +package analysis import ( "fmt" @@ -78,7 +78,7 @@ func checkAllowedSymbols(t *testing.T, cfg allowedSymbolsConfig) { if cfg.RepoRootOverride != "" { root = cfg.RepoRootOverride } else { - // This package lives in allowedsymbols/, so the repo root is one level up. + // This package lives in analysis/, so the repo root is one level up. dir, err := os.Getwd() if err != nil { t.Fatal(err) @@ -108,6 +108,10 @@ func checkAllowedSymbols(t *testing.T, cfg allowedSymbolsConfig) { reporter := fileLineReporter(fset, rel, reportErr) localToPath := checkFileImports(f, allowedPkgs, cfg.ExemptImport, reporter) checkFileSelectors(f, localToPath, allowedSyms, usedSymbols, reporter) + + // Structural rules — applied to every checked file. + checkFileScannerBuffer(f, reporter) + checkFileOpenFileClose(f, reporter) } if checked < cfg.MinFiles { diff --git a/allowedsymbols/symbols_verification_test.go b/analysis/symbols_verification_test.go similarity index 98% rename from allowedsymbols/symbols_verification_test.go rename to analysis/symbols_verification_test.go index 72bf9476..8ac0c7a7 100644 --- a/allowedsymbols/symbols_verification_test.go +++ b/analysis/symbols_verification_test.go @@ -11,7 +11,7 @@ // repetitive plumbing (copying trees, rewriting Go source, locating files) so // each per-config test file stays focused on the specific violation it tests. -package allowedsymbols +package analysis import ( "io" @@ -64,7 +64,7 @@ func copyFile(src, dst string) error { } // repoRoot returns the repo root by going one level up from the test's working -// directory (allowedsymbols/). +// directory (analysis/). func repoRoot(t *testing.T) string { t.Helper() dir, err := os.Getwd() diff --git a/allowedsymbols/testdata/src/bannedimport/bannedimport.go b/analysis/testdata/src/bannedimport/bannedimport.go similarity index 100% rename from allowedsymbols/testdata/src/bannedimport/bannedimport.go rename to analysis/testdata/src/bannedimport/bannedimport.go diff --git a/allowedsymbols/testdata/src/disallowedimport/disallowedimport.go b/analysis/testdata/src/disallowedimport/disallowedimport.go similarity index 100% rename from allowedsymbols/testdata/src/disallowedimport/disallowedimport.go rename to analysis/testdata/src/disallowedimport/disallowedimport.go diff --git a/allowedsymbols/testdata/src/good/good.go b/analysis/testdata/src/good/good.go similarity index 100% rename from allowedsymbols/testdata/src/good/good.go rename to analysis/testdata/src/good/good.go diff --git a/allowedsymbols/testdata/src/unusedsymbol/unusedsymbol.go b/analysis/testdata/src/unusedsymbol/unusedsymbol.go similarity index 100% rename from allowedsymbols/testdata/src/unusedsymbol/unusedsymbol.go rename to analysis/testdata/src/unusedsymbol/unusedsymbol.go diff --git a/builtins/internal/procinfo/procinfo_linux.go b/builtins/internal/procinfo/procinfo_linux.go index 76c61e80..78dc3fde 100644 --- a/builtins/internal/procinfo/procinfo_linux.go +++ b/builtins/internal/procinfo/procinfo_linux.go @@ -247,6 +247,7 @@ func readUID(procPath string, pid int) string { return "?" } scanner := bufio.NewScanner(bytes.NewReader(data)) + scanner.Buffer(make([]byte, 4096), 1<<20) for scanner.Scan() { line := scanner.Text() if strings.HasPrefix(line, "Uid:") { @@ -286,6 +287,7 @@ func procBootTime(procPath string) (int64, error) { } defer f.Close() scanner := bufio.NewScanner(f) + scanner.Buffer(make([]byte, 4096), 1<<20) for scanner.Scan() { line := scanner.Text() if strings.HasPrefix(line, "btime ") { diff --git a/builtins/internal/winnet/types.go b/builtins/internal/winnet/types.go index 175159bc..40c9d5ac 100644 --- a/builtins/internal/winnet/types.go +++ b/builtins/internal/winnet/types.go @@ -5,7 +5,7 @@ // Package winnet provides socket enumeration for Windows via iphlpapi.dll. // It is placed in builtins/internal/ to isolate the unsafe.Pointer DLL call -// from the allowedsymbols checker, which cannot evaluate build tags. +// from the analysis checker, which cannot evaluate build tags. package winnet // SocketEntry holds the parsed fields for one Windows socket.