diff --git a/.cursorrules b/.cursorrules new file mode 100644 index 0000000..06ca8bc --- /dev/null +++ b/.cursorrules @@ -0,0 +1,25 @@ +# Scan-io Cursor Rules + +## Development workflow +- When implementing new features or making changes to the codebase, always check the `docs/engineering/` directory first for established patterns and guidelines: +- When in doubt about implementation details, always refer to the engineering documentation first, then examine similar existing implementations in the codebase. +- Try to reuse internal packages if relevant. Extend if required functionality does not exist. +- Don't use `data` folder in tests, it will not be available in other environment. But feel free to read content to make proper mocks. +- Try to use `make build-cli` or `make build-plugins` or `make build` instead of `go build ...` + +## Commands +- Build cli with: `make build-cli` +- Build plugins with: `make build-plugins` +- Build everything with: `make build` +- Test with: `make test` +- Use `go fmt` for formatting + +## Code style +- Use early returns when handling errors or special cases to reduce nesting and improve readability. + +## Planning +- When generating a plan for new features, refactoring and so on, make an analysis of codebase, then write a plan in phases. +- The plan may contain one or more phases. Each phase contains tasks. Write inputs and deliverables for each phase, task or group of tasks. +- Ensure that new functionality has tasks related to having tests for that functionality. +- Default plan file is `PLAN.md` in the root. +- Add documentation and help message update when necessary. diff --git a/.gitignore b/.gitignore index cefedaf..8eff9db 100644 --- a/.gitignore +++ b/.gitignore @@ -67,3 +67,5 @@ __debug_bin* *.json *report.html *results.html + +data/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 93ae4b7..b936f46 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,7 +46,7 @@ RUN set -euxo pipefail && \ echo "Building dependencies for '$TARGETOS/$TARGETARCH'" && \ apk update && \ apk upgrade && \ - apk add --no-cache bash python3 py3-pip openssh && \ + apk add --no-cache bash python3 py3-pip openssh git && \ apk add --no-cache --virtual .build-deps \ jq \ libc6-compat \ diff --git a/cmd/root.go b/cmd/root.go index 422d010..1fe282e 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -9,8 +9,9 @@ import ( "github.com/scan-io-git/scan-io/cmd/analyse" "github.com/scan-io-git/scan-io/cmd/fetch" - "github.com/scan-io-git/scan-io/cmd/integration-vcs" + integrationvcs "github.com/scan-io-git/scan-io/cmd/integration-vcs" "github.com/scan-io-git/scan-io/cmd/list" + sarifissues "github.com/scan-io-git/scan-io/cmd/sarif-issues" "github.com/scan-io-git/scan-io/cmd/upload" "github.com/scan-io-git/scan-io/cmd/version" "github.com/scan-io-git/scan-io/pkg/shared" @@ -87,6 +88,7 @@ func initConfig() { fetch.Init(AppConfig, Logger.Named("fetch")) analyse.Init(AppConfig, Logger.Named("analyse")) integrationvcs.Init(AppConfig, Logger.Named("integration-vcs")) + sarifissues.Init(AppConfig, Logger.Named("sarif-issues")) version.Init(AppConfig, Logger.Named("version")) tohtml.Init(AppConfig, Logger.Named("to-html")) upload.Init(AppConfig, Logger.Named("upload")) @@ -100,6 +102,7 @@ func init() { rootCmd.AddCommand(fetch.FetchCmd) rootCmd.AddCommand(analyse.AnalyseCmd) rootCmd.AddCommand(integrationvcs.IntegrationVCSCmd) + rootCmd.AddCommand(sarifissues.SarifIssuesCmd) rootCmd.AddCommand(version.NewVersionCmd()) rootCmd.AddCommand(tohtml.ToHtmlCmd) rootCmd.AddCommand(upload.UploadCmd) diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go new file mode 100644 index 0000000..e601fc2 --- /dev/null +++ b/cmd/sarif-issues/issue_processing.go @@ -0,0 +1,675 @@ +package sarifissues + +import ( + "fmt" + "path/filepath" + "regexp" + "strconv" + "strings" + + hclog "github.com/hashicorp/go-hclog" + "github.com/owenrumney/go-sarif/v2/sarif" + "github.com/scan-io-git/scan-io/internal/git" + internalsarif "github.com/scan-io-git/scan-io/internal/sarif" + issuecorrelation "github.com/scan-io-git/scan-io/pkg/issuecorrelation" + "github.com/scan-io-git/scan-io/pkg/shared" + "github.com/scan-io-git/scan-io/pkg/shared/errors" +) + +// Compiled regex patterns for security tag parsing +var ( + cweRegex = regexp.MustCompile(`^CWE-(\d+)\b`) + owaspRegex = regexp.MustCompile(`^OWASP[- ]?A(\d{2}):(\d{4})\s*-\s*(.+)$`) +) + +// OpenIssueReport represents parsed metadata from an open issue body. +type OpenIssueReport struct { + Severity string + Scanner string + RuleID string + FilePath string + StartLine int + EndLine int + Hash string + Permalink string + Description string +} + +// OpenIssueEntry combines parsed metadata from an open issue body with the +// original IssueParams returned by the VCS plugin. The map returned by +// listOpenIssues uses the issue number as key and this struct as value. +type OpenIssueEntry struct { + OpenIssueReport + Params shared.IssueParams +} + +// NewIssueData holds the data needed to create a new issue from SARIF results. +type NewIssueData struct { + Metadata issuecorrelation.IssueMetadata + Body string + Title string +} + +// parseIssueBody attempts to read the body produced by this command and extract +// known metadata from blockquote format lines. Supports the new format: +// "> **Rule ID**: semgrep.rule.id" +// "> **Severity**: Error, **Scanner**: Semgrep OSS" +// "> **File**: app.py, **Lines**: 11-29" +// Returns an OpenIssueReport with zero values when fields are missing. +func parseIssueBody(body string) OpenIssueReport { + rep := OpenIssueReport{} + + for _, line := range strings.Split(body, "\n") { + line = strings.TrimSpace(line) + + // Only process blockquote metadata lines + if !strings.HasPrefix(line, "> ") { + // Check for GitHub permalink URLs + if rep.Permalink == "" && strings.HasPrefix(line, "https://github.com/") && strings.Contains(line, "/blob/") { + rep.Permalink = line + } + // Capture first non-metadata line as description if empty + if rep.Description == "" && line != "" && !strings.HasPrefix(line, "##") && !strings.HasPrefix(line, "") { + rep.Description = line + } + continue + } + + // Remove "> " prefix and normalize bold markers + content := strings.TrimSpace(strings.TrimPrefix(line, "> ")) + content = strings.ReplaceAll(content, "**", "") + + // Parse comma-separated metadata fields + parts := strings.Split(content, ",") + for _, part := range parts { + segment := strings.TrimSpace(part) + + if strings.HasPrefix(segment, "Rule ID:") { + rep.RuleID = strings.TrimSpace(strings.TrimPrefix(segment, "Rule ID:")) + continue + } + if strings.HasPrefix(segment, "Severity:") { + rep.Severity = strings.TrimSpace(strings.TrimPrefix(segment, "Severity:")) + } else if strings.HasPrefix(segment, "Scanner:") { + rep.Scanner = strings.TrimSpace(strings.TrimPrefix(segment, "Scanner:")) + } else if strings.HasPrefix(segment, "File:") { + rep.FilePath = strings.TrimSpace(strings.TrimPrefix(segment, "File:")) + } else if strings.HasPrefix(segment, "Lines:") { + value := strings.TrimSpace(strings.TrimPrefix(segment, "Lines:")) + rep.StartLine, rep.EndLine = parseLineRange(value) + } else if strings.HasPrefix(segment, "Snippet SHA256:") { + rep.Hash = strings.TrimSpace(strings.TrimPrefix(segment, "Snippet SHA256:")) + } + } + } + // Prefer the metadata-provided Rule ID and fall back to the legacy header format. + if strings.TrimSpace(rep.RuleID) == "" { + if rid := extractRuleIDFromBody(body); rid != "" { + rep.RuleID = rid + } + } + return rep +} + +// extractRuleIDFromBody attempts to parse a rule ID from the new body format header line: +// "## " where can be any single or combined emoji/symbol token. +// Returns empty string if not found. +func extractRuleIDFromBody(body string) string { + // Compile regex once per call; trivial cost compared to network IO. If needed, lift to package scope. + re := regexp.MustCompile(`^##\s+[^\w\s]+\s+(.+)$`) + for _, line := range strings.Split(body, "\n") { + l := strings.TrimSpace(line) + if !strings.HasPrefix(l, "##") { + continue + } + if m := re.FindStringSubmatch(l); len(m) == 2 { + return strings.TrimSpace(m[1]) + } + } + return "" +} + +// listOpenIssues calls the VCS plugin to list open issues for the configured repo +// and parses their bodies into OpenIssueReport structures. +func listOpenIssues(options RunOptions, lg hclog.Logger) (map[int]OpenIssueEntry, error) { + req := shared.VCSListIssuesRequest{ + VCSRequestBase: shared.VCSRequestBase{ + RepoParam: shared.RepositoryParams{ + Namespace: options.Namespace, + Repository: options.Repository, + }, + Action: "listIssues", + }, + State: "open", + BodyFilter: scanioManagedAnnotation, // Filter for scanio-managed issues + } + + var issues []shared.IssueParams + err := shared.WithPlugin(AppConfig, lg, shared.PluginTypeVCS, "github", func(raw interface{}) error { + vcs, ok := raw.(shared.VCS) + if !ok { + return fmt.Errorf("invalid VCS plugin type") + } + list, err := vcs.ListIssues(req) + if err != nil { + return err + } + issues = list + return nil + }) + if err != nil { + return nil, err + } + + reports := make(map[int]OpenIssueEntry, len(issues)) + for _, it := range issues { + rep := parseIssueBody(it.Body) + reports[it.Number] = OpenIssueEntry{ + OpenIssueReport: rep, + Params: it, + } + } + return reports, nil +} + +// buildNewIssuesFromSARIF processes SARIF report and extracts high severity findings, +// returning structured data for creating new issues. +func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, sourceFolderAbs string, repoMetadata *git.RepositoryMetadata, lg hclog.Logger) []NewIssueData { + var newIssueData []NewIssueData + + for _, run := range report.Runs { + // Build a map of rules keyed by rule ID for quick lookups + rulesByID := map[string]*sarif.ReportingDescriptor{} + if run.Tool.Driver != nil { + for _, r := range run.Tool.Driver.Rules { + if r == nil { + continue + } + id := strings.TrimSpace(r.ID) + if id == "" { + continue + } + rulesByID[id] = r + } + } + + for _, res := range run.Results { + level, _ := res.Properties["Level"].(string) + if !isLevelAllowed(strings.ToLower(level), options.Levels) { + continue + } + + ruleID := "" + if res.RuleID != nil { + ruleID = *res.RuleID + } + + // Warn about missing rule ID + if strings.TrimSpace(ruleID) == "" { + lg.Warn("SARIF result missing rule ID, skipping", "result_index", len(newIssueData)) + continue + } + + fileURI, localPath := internalsarif.ExtractFileURIFromResult(res, sourceFolderAbs, repoMetadata) + fileURI = filepath.ToSlash(strings.TrimSpace(fileURI)) + if fileURI == "" { + fileURI = "" + lg.Warn("SARIF result missing file URI, using placeholder", "rule_id", ruleID) + } + line, endLine := internalsarif.ExtractRegionFromResult(res) + + // Warn about missing location information + if line <= 0 { + lg.Warn("SARIF result missing line information", "rule_id", ruleID, "file", fileURI) + } + + snippetHash := issuecorrelation.ComputeSnippetHash(localPath, line, endLine) + if snippetHash == "" && fileURI != "" && line > 0 { + lg.Warn("failed to compute snippet hash", "rule_id", ruleID, "file", fileURI, "line", line, "local_path", localPath) + } + + scannerName := getScannerName(run) + if scannerName == "" { + lg.Warn("SARIF run missing scanner/tool name, using fallback", "rule_id", ruleID) + } + + var ruleDescriptor *sarif.ReportingDescriptor + if r, ok := rulesByID[ruleID]; ok { + ruleDescriptor = r + } + + sev := displaySeverity(level) + + // build body and title with scanner name label + ruleTitleComponent := displayRuleTitleComponent(ruleID, ruleDescriptor) + titleText := buildIssueTitle(scannerName, sev, ruleTitleComponent, fileURI, line, endLine) + + // New body header and compact metadata blockquote + header := "" + if h := internalsarif.DisplayRuleHeading(ruleDescriptor); strings.TrimSpace(h) != "" { + header = fmt.Sprintf("## 🐞 %s\n\n", h) + } + scannerDisp := scannerName + if scannerDisp == "" { + scannerDisp = "SARIF" + } + fileDisp := fileURI + linesDisp := fmt.Sprintf("%d", line) + if endLine > line { + linesDisp = fmt.Sprintf("%d-%d", line, endLine) + } + var metaBuilder strings.Builder + if trimmedID := strings.TrimSpace(ruleID); trimmedID != "" { + metaBuilder.WriteString(fmt.Sprintf("> **Rule ID**: %s\n", trimmedID)) + } + metaBuilder.WriteString(fmt.Sprintf( + "> **Severity**: %s, **Scanner**: %s\n", sev, scannerDisp, + )) + metaBuilder.WriteString(fmt.Sprintf( + "> **File**: %s, **Lines**: %s\n", fileDisp, linesDisp, + )) + meta := metaBuilder.String() + // Only use the new header and blockquote metadata + body := header + meta + "\n" + var references []string + + // Append permalink if available + if link := buildGitHubPermalink(options, repoMetadata, fileURI, line, endLine); link != "" { + body += fmt.Sprintf("\n%s\n", link) + } + + // Add formatted result message if available + if res.Message.Markdown != nil || res.Message.Text != nil { + formatOpts := internalsarif.MessageFormatOptions{ + Namespace: options.Namespace, + Repository: options.Repository, + Ref: options.Ref, + SourceFolder: sourceFolderAbs, + } + if formatted := internalsarif.FormatResultMessage(res, repoMetadata, formatOpts); formatted != "" { + body += fmt.Sprintf("\n\n### Description\n\n%s\n", formatted) + } + } + + // Append rule detail/help content + if ruleDescriptor != nil { + if detail, helpRefs := extractRuleDetail(ruleDescriptor); detail != "" || len(helpRefs) > 0 { + if detail != "" { + body += "\n\n" + detail + } + if len(helpRefs) > 0 { + references = append(references, helpRefs...) + } + } + } + + // Add code flow section if available + if codeFlowSection := FormatCodeFlows(res, options, repoMetadata, sourceFolderAbs); codeFlowSection != "" { + body += "\n\n---\n\n" + codeFlowSection + "\n\n---\n\n" + } + + // Append security identifier tags (CWE, OWASP) with links if available in rule properties + if r, ok := rulesByID[ruleID]; ok && r != nil && r.Properties != nil { + var tags []string + if v, ok := r.Properties["tags"]; ok && v != nil { + switch tv := v.(type) { + case []string: + tags = tv + case []interface{}: + for _, it := range tv { + if s, ok := it.(string); ok { + tags = append(tags, s) + } + } + } + } + + if len(tags) > 0 { + if tagRefs := processSecurityTags(tags); len(tagRefs) > 0 { + references = append(references, tagRefs...) + } + } + } + + if len(references) > 0 { + body += "\n\nReferences:\n" + strings.Join(references, "\n") + } + + // Add a second snippet hash right before the scanio-managed note, as a blockquote + if snippetHash != "" { + body += fmt.Sprintf("\n\n> **Snippet SHA256**: %s\n", snippetHash) + } + body += "\n" + scanioManagedAnnotation + + newIssueData = append(newIssueData, NewIssueData{ + Metadata: issuecorrelation.IssueMetadata{ + IssueID: ruleID, + Scanner: scannerName, + RuleID: ruleID, + Severity: level, + Filename: fileURI, + StartLine: line, + EndLine: endLine, + SnippetHash: snippetHash, + }, + Body: body, + Title: titleText, + }) + } + } + + return newIssueData +} + +// displayRuleTitleComponent returns the identifier segment to embed in the GitHub issue title. +// Prefers rule.Name when available; falls back to ruleID. +func displayRuleTitleComponent(ruleID string, rule *sarif.ReportingDescriptor) string { + if rule != nil && rule.Name != nil { + if name := strings.TrimSpace(*rule.Name); name != "" { + return name + } + } + return strings.TrimSpace(ruleID) +} + +// extractRuleDetail returns a detail string (markdown/plain) and optional reference links. +// Prefers rule.Help.Markdown when available; falls back to rule.FullDescription.Text. +func extractRuleDetail(rule *sarif.ReportingDescriptor) (string, []string) { + if rule == nil { + return "", nil + } + + if rule.Help != nil && rule.Help.Markdown != nil { + if hm := strings.TrimSpace(*rule.Help.Markdown); hm != "" { + if detail, refs := parseRuleHelpMarkdown(hm); strings.TrimSpace(detail) != "" || len(refs) > 0 { + return detail, refs + } + } + } + + if rule.FullDescription != nil && rule.FullDescription.Text != nil { + if fd := strings.TrimSpace(*rule.FullDescription.Text); fd != "" { + return fd, nil + } + } + return "", nil +} + +// buildKnownIssuesFromOpen converts open GitHub issues into correlation metadata, +// filtering for well-structured scanio-managed issues only. +func buildKnownIssuesFromOpen(openIssues map[int]OpenIssueEntry, lg hclog.Logger) []issuecorrelation.IssueMetadata { + knownIssues := make([]issuecorrelation.IssueMetadata, 0, len(openIssues)) + for num, entry := range openIssues { + rep := entry.OpenIssueReport + // Only include well-structured issues for automatic closure. + // If an open issue doesn't include basic metadata we skip it so + // we don't accidentally close unrelated or free-form issues. + if rep.Scanner == "" || rep.RuleID == "" || rep.FilePath == "" { + lg.Debug("skipping malformed open issue (won't be auto-closed)", "number", num) + continue + } + + // Only consider issues that contain the scanio-managed annotation. + // If the annotation is absent, treat the issue as manually managed and + // exclude it from correlation/auto-closure logic. + if !strings.Contains(entry.Params.Body, scanioManagedAnnotation) { + lg.Debug("skipping non-scanio-managed issue (won't be auto-closed)", "number", num) + continue + } + knownIssues = append(knownIssues, issuecorrelation.IssueMetadata{ + IssueID: fmt.Sprintf("%d", num), + Scanner: rep.Scanner, + RuleID: rep.RuleID, + Severity: rep.Severity, + Filename: rep.FilePath, + StartLine: rep.StartLine, + EndLine: rep.EndLine, + SnippetHash: rep.Hash, + }) + } + return knownIssues +} + +// filterIssuesBySourceFolder filters open issues to only those within the current source folder scope. +// This enables independent issue management for different subfolders in monorepo CI workflows. +// Issues are filtered based on their FilePath metadata matching the normalized subfolder path. +func filterIssuesBySourceFolder(openIssues map[int]OpenIssueEntry, repoMetadata *git.RepositoryMetadata, lg hclog.Logger) map[int]OpenIssueEntry { + // Determine the subfolder scope from repo metadata + subfolder := internalsarif.NormalisedSubfolder(repoMetadata) + + // If no subfolder (scanning from root), include all issues + if subfolder == "" { + lg.Debug("no subfolder scope, including all issues") + return openIssues + } + + // Filter issues: keep only those whose FilePath starts with subfolder + filtered := make(map[int]OpenIssueEntry) + for num, entry := range openIssues { + filePath := filepath.ToSlash(entry.OpenIssueReport.FilePath) + if strings.HasPrefix(filePath, subfolder+"/") || filePath == subfolder { + filtered[num] = entry + } else { + lg.Debug("excluding issue outside source folder scope", + "number", num, "file", filePath, "scope", subfolder) + } + } + + lg.Info("filtered issues by source folder scope", + "total", len(openIssues), "scoped", len(filtered), "subfolder", subfolder) + return filtered +} + +// createUnmatchedIssues creates GitHub issues for new findings that don't correlate with existing issues. +// Returns the number of successfully created issues. +func createUnmatchedIssues(unmatchedNew []issuecorrelation.IssueMetadata, newIssues []issuecorrelation.IssueMetadata, newBodies, newTitles []string, options RunOptions, lg hclog.Logger) (int, error) { + created := 0 + for _, u := range unmatchedNew { + // find corresponding index in newIssues to retrieve body/title + var idx int = -1 + for ni, n := range newIssues { + if n == u { + idx = ni + break + } + } + if idx == -1 { + // shouldn't happen + continue + } + + if options.DryRun { + // Print dry-run information instead of making API call + fmt.Printf("[DRY RUN] Would create issue:\n") + fmt.Printf(" Title: %s\n", newTitles[idx]) + fmt.Printf(" File: %s\n", u.Filename) + fmt.Printf(" Lines: %d", u.StartLine) + if u.EndLine > u.StartLine { + fmt.Printf("-%d", u.EndLine) + } + fmt.Printf("\n") + severityDisplay := displaySeverity(u.Severity) + if strings.TrimSpace(severityDisplay) == "" { + severityDisplay = u.Severity + } + fmt.Printf(" Severity: %s\n", severityDisplay) + fmt.Printf(" Scanner: %s\n", u.Scanner) + fmt.Printf(" Rule ID: %s\n", u.RuleID) + fmt.Printf("\n") + created++ + continue + } + + req := shared.VCSIssueCreationRequest{ + VCSRequestBase: shared.VCSRequestBase{ + RepoParam: shared.RepositoryParams{ + Namespace: options.Namespace, + Repository: options.Repository, + }, + Action: "createIssue", + }, + Title: newTitles[idx], + Body: newBodies[idx], + Labels: options.Labels, + Assignees: options.Assignees, + } + + err := shared.WithPlugin(AppConfig, lg, shared.PluginTypeVCS, "github", func(raw interface{}) error { + vcs, ok := raw.(shared.VCS) + if !ok { + return fmt.Errorf("invalid VCS plugin type") + } + _, err := vcs.CreateIssue(req) + return err + }) + if err != nil { + lg.Error("failed to create issue via plugin", "error", err, "file", u.Filename, "line", u.StartLine) + return created, errors.NewCommandError(options, nil, fmt.Errorf("create issue failed: %w", err), 2) + } + created++ + } + return created, nil +} + +// closeUnmatchedIssues closes GitHub issues for known findings that don't correlate with current scan results. +// Returns an error if any issue closure fails. +func closeUnmatchedIssues(unmatchedKnown []issuecorrelation.IssueMetadata, options RunOptions, lg hclog.Logger) (int, error) { + closed := 0 + for _, k := range unmatchedKnown { + // known IssueID contains the number as string + num, err := strconv.Atoi(k.IssueID) + if err != nil { + // skip if we can't parse number + continue + } + + if options.DryRun { + // Print dry-run information instead of making API calls + fmt.Printf("[DRY RUN] Would close issue #%d:\n", num) + fmt.Printf(" File: %s\n", k.Filename) + fmt.Printf(" Lines: %d", k.StartLine) + if k.EndLine > k.StartLine { + fmt.Printf("-%d", k.EndLine) + } + fmt.Printf("\n") + fmt.Printf(" Rule ID: %s\n", k.RuleID) + fmt.Printf(" Reason: Not found in current scan\n") + fmt.Printf("\n") + closed++ + continue + } + + // Leave a comment before closing the issue to explain why it is being closed + commentReq := shared.VCSCreateIssueCommentRequest{ + VCSRequestBase: shared.VCSRequestBase{ + RepoParam: shared.RepositoryParams{ + Namespace: options.Namespace, + Repository: options.Repository, + }, + Action: "createIssueComment", + }, + Number: num, + Body: "Recent scan didn't see the issue; closing this as resolved.", + } + + err = shared.WithPlugin(AppConfig, lg, shared.PluginTypeVCS, "github", func(raw interface{}) error { + vcs, ok := raw.(shared.VCS) + if !ok { + return fmt.Errorf("invalid VCS plugin type") + } + _, err := vcs.CreateIssueComment(commentReq) + return err + }) + if err != nil { + lg.Error("failed to add comment before closing issue", "error", err, "number", num) + // continue to attempt closing even if commenting failed + } + + upd := shared.VCSIssueUpdateRequest{ + VCSRequestBase: shared.VCSRequestBase{ + RepoParam: shared.RepositoryParams{ + Namespace: options.Namespace, + Repository: options.Repository, + }, + Action: "updateIssue", + }, + Number: num, + State: "closed", + } + + err = shared.WithPlugin(AppConfig, lg, shared.PluginTypeVCS, "github", func(raw interface{}) error { + vcs, ok := raw.(shared.VCS) + if !ok { + return fmt.Errorf("invalid VCS plugin type") + } + _, err := vcs.UpdateIssue(upd) + return err + }) + if err != nil { + lg.Error("failed to close issue via plugin", "error", err, "number", num) + // continue closing others but report an error at end + return closed, errors.NewCommandError(options, nil, fmt.Errorf("close issue failed: %w", err), 2) + } + closed++ + } + return closed, nil +} + +// processSARIFReport iterates runs/results in the SARIF report and creates VCS issues for +// high severity findings. Returns number of created issues or an error. +func processSARIFReport(report *internalsarif.Report, options RunOptions, sourceFolderAbs string, repoMetadata *git.RepositoryMetadata, lg hclog.Logger, openIssues map[int]OpenIssueEntry) (int, int, error) { + // Build list of new issues from SARIF using extracted function + newIssueData := buildNewIssuesFromSARIF(report, options, sourceFolderAbs, repoMetadata, lg) + + // Extract metadata, bodies, and titles for correlation and issue creation + newIssues := make([]issuecorrelation.IssueMetadata, len(newIssueData)) + newBodies := make([]string, len(newIssueData)) + newTitles := make([]string, len(newIssueData)) + + for i, data := range newIssueData { + newIssues[i] = data.Metadata + newBodies[i] = data.Body + newTitles[i] = data.Title + } + + // Filter open issues to only those within the current source folder scope + scopedOpenIssues := filterIssuesBySourceFolder(openIssues, repoMetadata, lg) + + // Build list of known issues from the filtered open issues data + knownIssues := buildKnownIssuesFromOpen(scopedOpenIssues, lg) + + // correlate + corr := issuecorrelation.NewCorrelator(newIssues, knownIssues) + corr.Process() + + // Create only unmatched new issues + unmatchedNew := corr.UnmatchedNew() + created, err := createUnmatchedIssues(unmatchedNew, newIssues, newBodies, newTitles, options, lg) + if err != nil { + return created, 0, err + } + + // Close unmatched known issues (open issues that did not correlate) + unmatchedKnown := corr.UnmatchedKnown() + closed, err := closeUnmatchedIssues(unmatchedKnown, options, lg) + if err != nil { + return created, closed, err + } + + return created, closed, nil +} + +// isLevelAllowed checks if a SARIF level is in the allowed levels list +func isLevelAllowed(level string, allowedLevels []string) bool { + // If no levels specified, default to "error" for backward compatibility + if allowedLevels == nil || len(allowedLevels) == 0 { + return strings.ToLower(level) == "error" + } + + for _, allowed := range allowedLevels { + if strings.ToLower(level) == strings.ToLower(allowed) { + return true + } + } + return false +} diff --git a/cmd/sarif-issues/issue_processing_test.go b/cmd/sarif-issues/issue_processing_test.go new file mode 100644 index 0000000..fb00a21 --- /dev/null +++ b/cmd/sarif-issues/issue_processing_test.go @@ -0,0 +1,406 @@ +package sarifissues + +import ( + "bytes" + "os" + "testing" + + "github.com/hashicorp/go-hclog" + "github.com/owenrumney/go-sarif/v2/sarif" + "github.com/scan-io-git/scan-io/internal/git" + issuecorrelation "github.com/scan-io-git/scan-io/pkg/issuecorrelation" +) + +func TestParseIssueBodyUsesMetadataRuleID(t *testing.T) { + body := ` +## 🐞 legacy-header + +> **Rule ID**: semgrep.python.django.security +> **Severity**: High, **Scanner**: Semgrep OSS +> **File**: app.py, **Lines**: 11-29 + +> **Snippet SHA256**: abcdef123456 +` + + rep := parseIssueBody(body) + if rep.RuleID != "semgrep.python.django.security" { + t.Fatalf("expected RuleID from metadata, got %q", rep.RuleID) + } + if rep.Scanner != "Semgrep OSS" { + t.Fatalf("expected scanner parsed from metadata, got %q", rep.Scanner) + } + if rep.Severity != "High" { + t.Fatalf("expected severity parsed from metadata, got %q", rep.Severity) + } + if rep.FilePath != "app.py" { + t.Fatalf("expected filepath parsed from metadata, got %q", rep.FilePath) + } + if rep.StartLine != 11 || rep.EndLine != 29 { + t.Fatalf("expected start/end lines 11/29, got %d/%d", rep.StartLine, rep.EndLine) + } + if rep.Hash != "abcdef123456" { + t.Fatalf("expected hash parsed from metadata, got %q", rep.Hash) + } +} + +func TestParseIssueBodyFallsBackToHeaderRuleID(t *testing.T) { + body := ` +## 🐞 fallback.rule.id + +> **Severity**: High, **Scanner**: Semgrep OSS +> **File**: main.py, **Lines**: 5-5 +` + + rep := parseIssueBody(body) + if rep.RuleID != "fallback.rule.id" { + t.Fatalf("expected RuleID parsed from header fallback, got %q", rep.RuleID) + } +} + +func TestDisplayRuleTitleComponentPrefersName(t *testing.T) { + name := "Descriptive Rule" + rule := &sarif.ReportingDescriptor{ + Name: &name, + } + got := displayRuleTitleComponent("rule.id", rule) + if got != "Descriptive Rule" { + t.Fatalf("expected rule name for title component, got %q", got) + } +} + +func TestDisplayRuleTitleComponentFallsBackToID(t *testing.T) { + got := displayRuleTitleComponent("rule.id", nil) + if got != "rule.id" { + t.Fatalf("expected rule id fallback for title component, got %q", got) + } +} + +func TestExtractRuleDetailPrefersHelpMarkdown(t *testing.T) { + markdown := "Detailed explanation" + rule := &sarif.ReportingDescriptor{ + Help: &sarif.MultiformatMessageString{ + Markdown: &markdown, + }, + } + + detail, refs := extractRuleDetail(rule) + if detail != "Detailed explanation" { + t.Fatalf("expected help markdown detail, got %q", detail) + } + if len(refs) != 0 { + t.Fatalf("expected no references for plain markdown, got %d", len(refs)) + } +} + +func TestExtractRuleDetailFallsBackToFullDescription(t *testing.T) { + full := "Full description text" + rule := &sarif.ReportingDescriptor{ + FullDescription: &sarif.MultiformatMessageString{ + Text: &full, + }, + } + + detail, refs := extractRuleDetail(rule) + if detail != "Full description text" { + t.Fatalf("expected full description fallback, got %q", detail) + } + if refs != nil { + t.Fatalf("expected nil references for full description fallback, got %#v", refs) + } +} + +func TestExtractRuleDetailEmptyWhenNoContent(t *testing.T) { + detail, refs := extractRuleDetail(nil) + if detail != "" || refs != nil { + t.Fatalf("expected empty detail and nil refs, got %q %#v", detail, refs) + } +} + +func TestFilterIssuesBySourceFolder(t *testing.T) { + // Create test logger + logger := hclog.NewNullLogger() + + // Test cases + tests := []struct { + name string + repoMetadata *git.RepositoryMetadata + openIssues map[int]OpenIssueEntry + expectedCount int + expectedIssues []int // issue numbers that should be included + }{ + { + name: "no subfolder - include all issues", + repoMetadata: &git.RepositoryMetadata{ + Subfolder: "", + }, + openIssues: map[int]OpenIssueEntry{ + 1: {OpenIssueReport: OpenIssueReport{FilePath: "apps/demo/main.py"}}, + 2: {OpenIssueReport: OpenIssueReport{FilePath: "apps/another/main.py"}}, + 3: {OpenIssueReport: OpenIssueReport{FilePath: "root/file.py"}}, + }, + expectedCount: 3, + expectedIssues: []int{1, 2, 3}, + }, + { + name: "subfolder scope - filter correctly", + repoMetadata: &git.RepositoryMetadata{ + Subfolder: "apps/demo", + }, + openIssues: map[int]OpenIssueEntry{ + 1: {OpenIssueReport: OpenIssueReport{FilePath: "apps/demo/main.py"}}, + 2: {OpenIssueReport: OpenIssueReport{FilePath: "apps/another/main.py"}}, + 3: {OpenIssueReport: OpenIssueReport{FilePath: "apps/demo/utils.py"}}, + 4: {OpenIssueReport: OpenIssueReport{FilePath: "root/file.py"}}, + }, + expectedCount: 2, + expectedIssues: []int{1, 3}, + }, + { + name: "exact subfolder match", + repoMetadata: &git.RepositoryMetadata{ + Subfolder: "apps/demo", + }, + openIssues: map[int]OpenIssueEntry{ + 1: {OpenIssueReport: OpenIssueReport{FilePath: "apps/demo"}}, + 2: {OpenIssueReport: OpenIssueReport{FilePath: "apps/demo/main.py"}}, + 3: {OpenIssueReport: OpenIssueReport{FilePath: "apps/demo/subdir/file.py"}}, + }, + expectedCount: 3, + expectedIssues: []int{1, 2, 3}, + }, + { + name: "nil repo metadata - include all", + repoMetadata: nil, + openIssues: map[int]OpenIssueEntry{ + 1: {OpenIssueReport: OpenIssueReport{FilePath: "any/path/file.py"}}, + 2: {OpenIssueReport: OpenIssueReport{FilePath: "another/path/file.py"}}, + }, + expectedCount: 2, + expectedIssues: []int{1, 2}, + }, + { + name: "empty open issues", + repoMetadata: &git.RepositoryMetadata{ + Subfolder: "apps/demo", + }, + openIssues: map[int]OpenIssueEntry{}, + expectedCount: 0, + expectedIssues: []int{}, + }, + { + name: "subfolder with trailing slashes", + repoMetadata: &git.RepositoryMetadata{ + Subfolder: "/apps/demo/", + }, + openIssues: map[int]OpenIssueEntry{ + 1: {OpenIssueReport: OpenIssueReport{FilePath: "apps/demo/main.py"}}, + 2: {OpenIssueReport: OpenIssueReport{FilePath: "apps/another/main.py"}}, + }, + expectedCount: 1, + expectedIssues: []int{1}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + filtered := filterIssuesBySourceFolder(tt.openIssues, tt.repoMetadata, logger) + + if len(filtered) != tt.expectedCount { + t.Fatalf("expected %d filtered issues, got %d", tt.expectedCount, len(filtered)) + } + + // Check that only expected issues are present + for _, expectedNum := range tt.expectedIssues { + if _, exists := filtered[expectedNum]; !exists { + t.Fatalf("expected issue %d to be included in filtered results", expectedNum) + } + } + + // Check that no unexpected issues are present + for num := range filtered { + found := false + for _, expectedNum := range tt.expectedIssues { + if num == expectedNum { + found = true + break + } + } + if !found { + t.Fatalf("unexpected issue %d found in filtered results", num) + } + } + }) + } +} + +func TestCreateUnmatchedIssuesDryRun(t *testing.T) { + // Capture stdout + oldStdout := os.Stdout + r, w, _ := os.Pipe() + os.Stdout = w + + // Test data + unmatchedNew := []issuecorrelation.IssueMetadata{ + { + IssueID: "test-1", + Scanner: "Semgrep", + RuleID: "sql-injection", + Severity: "error", + Filename: "app.py", + StartLine: 11, + EndLine: 29, + SnippetHash: "abc123", + }, + { + IssueID: "test-2", + Scanner: "Snyk", + RuleID: "xss-vulnerability", + Severity: "warning", + Filename: "main.js", + StartLine: 5, + EndLine: 5, + SnippetHash: "def456", + }, + } + + newIssues := []issuecorrelation.IssueMetadata{ + unmatchedNew[0], unmatchedNew[1], + } + + newBodies := []string{ + "Test body for issue 1", + "Test body for issue 2", + } + + newTitles := []string{ + "[Semgrep][High][sql-injection] at app.py:11-29", + "[Snyk][Medium][xss-vulnerability] at main.js:5", + } + + options := RunOptions{ + DryRun: true, + } + + logger := hclog.NewNullLogger() + + // Test dry-run mode + created, err := createUnmatchedIssues(unmatchedNew, newIssues, newBodies, newTitles, options, logger) + + // Restore stdout + w.Close() + os.Stdout = oldStdout + + // Read captured output + var buf bytes.Buffer + buf.ReadFrom(r) + output := buf.String() + + // Verify results + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if created != 2 { + t.Fatalf("expected 2 created issues, got %d", created) + } + + // Check output contains expected dry-run information + expectedOutputs := []string{ + "[DRY RUN] Would create issue:", + "Title: [Semgrep][High][sql-injection] at app.py:11-29", + "File: app.py", + "Lines: 11-29", + "Severity: High", + "Scanner: Semgrep", + "Rule ID: sql-injection", + "Title: [Snyk][Medium][xss-vulnerability] at main.js:5", + "File: main.js", + "Lines: 5", + "Severity: Medium", + "Scanner: Snyk", + "Rule ID: xss-vulnerability", + } + + for _, expected := range expectedOutputs { + if !bytes.Contains(buf.Bytes(), []byte(expected)) { + t.Fatalf("expected output to contain %q, got:\n%s", expected, output) + } + } +} + +func TestCloseUnmatchedIssuesDryRun(t *testing.T) { + // Capture stdout + oldStdout := os.Stdout + r, w, _ := os.Pipe() + os.Stdout = w + + // Test data + unmatchedKnown := []issuecorrelation.IssueMetadata{ + { + IssueID: "42", + Scanner: "Semgrep", + RuleID: "deprecated-rule", + Severity: "error", + Filename: "old-file.py", + StartLine: 5, + EndLine: 10, + SnippetHash: "xyz789", + }, + { + IssueID: "123", + Scanner: "Snyk", + RuleID: "old-vulnerability", + Severity: "warning", + Filename: "legacy.js", + StartLine: 15, + EndLine: 15, + SnippetHash: "abc123", + }, + } + + options := RunOptions{ + DryRun: true, + } + + logger := hclog.NewNullLogger() + + // Test dry-run mode + closed, err := closeUnmatchedIssues(unmatchedKnown, options, logger) + + // Restore stdout + w.Close() + os.Stdout = oldStdout + + // Read captured output + var buf bytes.Buffer + buf.ReadFrom(r) + output := buf.String() + + // Verify results + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if closed != 2 { + t.Fatalf("expected 2 closed issues, got %d", closed) + } + + // Check output contains expected dry-run information + expectedOutputs := []string{ + "[DRY RUN] Would close issue #42:", + "File: old-file.py", + "Lines: 5-10", + "Rule ID: deprecated-rule", + "Reason: Not found in current scan", + "[DRY RUN] Would close issue #123:", + "File: legacy.js", + "Lines: 15", + "Rule ID: old-vulnerability", + } + + for _, expected := range expectedOutputs { + if !bytes.Contains(buf.Bytes(), []byte(expected)) { + t.Fatalf("expected output to contain %q, got:\n%s", expected, output) + } + } +} diff --git a/cmd/sarif-issues/sarif-issues.go b/cmd/sarif-issues/sarif-issues.go new file mode 100644 index 0000000..4beb4c3 --- /dev/null +++ b/cmd/sarif-issues/sarif-issues.go @@ -0,0 +1,206 @@ +package sarifissues + +import ( + "fmt" + "strings" + + "github.com/hashicorp/go-hclog" + "github.com/spf13/cobra" + + "github.com/scan-io-git/scan-io/internal/git" + internalsarif "github.com/scan-io-git/scan-io/internal/sarif" + "github.com/scan-io-git/scan-io/pkg/shared" + "github.com/scan-io-git/scan-io/pkg/shared/config" + "github.com/scan-io-git/scan-io/pkg/shared/errors" +) + +// scanioManagedAnnotation is appended to issue bodies created by this command +// and is required for correlation/auto-closure to consider an issue +// managed by automation. +const ( + scanioManagedAnnotation = "> [!NOTE]\n> This issue was created and will be managed by scanio automation. Don't change body manually for proper processing, unless you know what you do" + semgrepPromoFooter = "#### 💎 Enable cross-file analysis and Pro rules for free at sg.run/pro\n\n" +) + +// RunOptions holds flags for the sarif-issues command. +type RunOptions struct { + Namespace string `json:"namespace,omitempty"` + Repository string `json:"repository,omitempty"` + SarifPath string `json:"sarif_path,omitempty"` + SourceFolder string `json:"source_folder,omitempty"` + Ref string `json:"ref,omitempty"` + Labels []string `json:"labels,omitempty"` + Assignees []string `json:"assignees,omitempty"` + Levels []string `json:"levels,omitempty"` + DryRun bool `json:"dry_run,omitempty"` +} + +var ( + AppConfig *config.Config + cmdLogger hclog.Logger + opts RunOptions + + // Example usage for the sarif-issues command + exampleSarifIssuesUsage = ` # Recommended: run from repository root and use relative paths + scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --source-folder apps/demo + + # Run inside git repository (auto-detects namespace, repository, ref) + scanio sarif-issues --sarif semgrep-demo.sarif --source-folder apps/demo + + # Create issues from SARIF report with basic configuration (default: error level only) + scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif + + # Create issues for multiple severity levels using SARIF levels + scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --levels error,warning + + # Create issues for multiple severity levels using display levels + scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --levels High,Medium + + # Create issues with labels and assignees + scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --labels bug,security --assignees alice,bob + + # Create issues with source folder for better file path resolution + scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --source-folder /path/to/source + + # Create issues with specific git reference for permalinks + scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --ref feature-branch + + # Using environment variables (GitHub Actions) + GITHUB_REPOSITORY_OWNER=scan-io-git GITHUB_REPOSITORY=scan-io-git/scan-io GITHUB_SHA=abc123 scanio sarif-issues --sarif /path/to/report.sarif + + # Preview what issues would be created/closed without making actual GitHub calls + scanio sarif-issues --sarif /path/to/report.sarif --dry-run` + + // SarifIssuesCmd represents the command to create GitHub issues from a SARIF file. + SarifIssuesCmd = &cobra.Command{ + Use: "sarif-issues --sarif PATH [--namespace NAMESPACE] [--repository REPO] [--source-folder PATH] [--ref REF] [--labels label[,label...]] [--assignees user[,user...]] [--levels level[,level...]]", + Short: "Create GitHub issues for SARIF findings with configurable severity levels", + Example: exampleSarifIssuesUsage, + SilenceUsage: false, + Hidden: false, + DisableFlagsInUseLine: true, + RunE: runSarifIssues, + } +) + +// Init wires config into this command. +func Init(cfg *config.Config, l hclog.Logger) { + AppConfig = cfg + cmdLogger = l +} + +// runSarifIssues is the main execution function for the sarif-issues command. +func runSarifIssues(cmd *cobra.Command, args []string) error { + // 1. Check for help request + if len(args) == 0 && !shared.HasFlags(cmd.Flags()) { + return cmd.Help() + } + + // 2. Use global logger + lg := cmdLogger + + // 3. Handle environment variable fallbacks + ApplyEnvironmentFallbacks(&opts) + + // 4. Handle git metadata fallbacks + ApplyGitMetadataFallbacks(&opts, lg) + + // 4.5. Default source-folder to current directory when empty + if strings.TrimSpace(opts.SourceFolder) == "" { + opts.SourceFolder = "." + lg.Info("no --source-folder provided; defaulting to current directory", "source_folder", opts.SourceFolder) + } + + // 4.6. Validate and normalize severity levels + normalizedLevels, err := normalizeAndValidateLevels(opts.Levels) + if err != nil { + lg.Error("invalid severity levels", "error", err) + return errors.NewCommandError(opts, nil, fmt.Errorf("invalid severity levels: %w", err), 1) + } + opts.Levels = normalizedLevels + lg.Debug("normalized severity levels", "levels", opts.Levels) + + // 5. Validate arguments + if err := validate(&opts); err != nil { + lg.Error("invalid arguments", "error", err) + return errors.NewCommandError(opts, nil, fmt.Errorf("invalid arguments: %w", err), 1) + } + + // 6. Read and process SARIF report + report, err := internalsarif.ReadReport(opts.SarifPath, lg, opts.SourceFolder, true) + if err != nil { + lg.Error("failed to read SARIF report", "error", err) + return errors.NewCommandError(opts, nil, fmt.Errorf("failed to read SARIF report: %w", err), 2) + } + + // Resolve source folder to absolute form for path calculations + sourceFolderAbs := ResolveSourceFolder(opts.SourceFolder, lg) + + // Collect repository metadata to understand repo root vs. subfolder layout + repoMetadata := resolveRepositoryMetadata(sourceFolderAbs, lg) + if repoMetadata == nil { + lg.Warn("git metadata unavailable; permalinks and snippet hashing may be degraded", "source_folder", sourceFolderAbs) + } + + // Enrich to ensure Levels and Titles are present + report.EnrichResultsLevelProperty() + report.EnrichResultsTitleProperty() + + // 7. Get all open GitHub issues + openIssues, err := listOpenIssues(opts, lg) + if err != nil { + lg.Error("failed to list open issues", "error", err) + return errors.NewCommandError(opts, nil, fmt.Errorf("failed to list open issues: %w", err), 2) + } + lg.Info("fetched open issues from repository", "count", len(openIssues)) + + // 8. Process SARIF report and create/close issues + created, closed, err := processSARIFReport(report, opts, sourceFolderAbs, repoMetadata, lg, openIssues) + if err != nil { + lg.Error("failed to process SARIF report", "error", err) + return err + } + + // 9. Log success and handle output + lg.Info("sarif-issues run completed", "created", created, "closed", closed) + if opts.DryRun { + fmt.Printf("[DRY RUN] Would create %d issue(s); would close %d resolved issue(s)\n", created, closed) + } else { + fmt.Printf("Created %d issue(s); closed %d resolved issue(s)\n", created, closed) + } + + return nil +} + +func init() { + SarifIssuesCmd.Flags().StringVar(&opts.Namespace, "namespace", "", "GitHub org/user (defaults to $GITHUB_REPOSITORY_OWNER when unset)") + SarifIssuesCmd.Flags().StringVar(&opts.Repository, "repository", "", "Repository name (defaults to ${GITHUB_REPOSITORY#*/} when unset)") + SarifIssuesCmd.Flags().StringVar(&opts.SarifPath, "sarif", "", "Path to SARIF file") + SarifIssuesCmd.Flags().StringVar(&opts.SourceFolder, "source-folder", "", "Optional: source folder to improve file path resolution in SARIF (used for absolute paths)") + SarifIssuesCmd.Flags().StringVar(&opts.Ref, "ref", "", "Git ref (branch or commit SHA) to build a permalink to the vulnerable code (defaults to $GITHUB_SHA when unset)") + // --labels supports multiple usages (e.g., --labels bug --labels security) or comma-separated values + SarifIssuesCmd.Flags().StringSliceVar(&opts.Labels, "labels", nil, "Optional: labels to assign to created GitHub issues (repeat flag or use comma-separated values)") + // --assignees supports multiple usages or comma-separated values + SarifIssuesCmd.Flags().StringSliceVar(&opts.Assignees, "assignees", nil, "Optional: assignees (GitHub logins) to assign to created issues (repeat flag or use comma-separated values)") + SarifIssuesCmd.Flags().StringSliceVar(&opts.Levels, "levels", []string{"error"}, "SARIF severity levels to process: SARIF levels (error, warning, note, none) or display levels (High, Medium, Low, Info). Cannot mix formats. (repeat flag or use comma-separated values)") + SarifIssuesCmd.Flags().BoolVar(&opts.DryRun, "dry-run", false, "Show what issues would be created/closed without making actual GitHub API calls") + SarifIssuesCmd.Flags().BoolP("help", "h", false, "Show help for sarif-issues command.") +} + +func resolveRepositoryMetadata(sourceFolderAbs string, lg hclog.Logger) *git.RepositoryMetadata { + if strings.TrimSpace(sourceFolderAbs) == "" { + return nil + } + + md, err := git.CollectRepositoryMetadata(sourceFolderAbs) + if err != nil { + // If we defaulted to current directory and git metadata collection fails, + // log a concise warning but don't fail hard (preserve existing error guidance) + if sourceFolderAbs == "." { + lg.Warn("unable to collect git metadata from current directory - snippet hashes may not be computed") + } else { + lg.Debug("unable to collect repository metadata", "error", err) + } + } + return md +} diff --git a/cmd/sarif-issues/utils.go b/cmd/sarif-issues/utils.go new file mode 100644 index 0000000..77ad9bf --- /dev/null +++ b/cmd/sarif-issues/utils.go @@ -0,0 +1,495 @@ +package sarifissues + +import ( + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "golang.org/x/text/cases" + "golang.org/x/text/language" + + "github.com/hashicorp/go-hclog" + "github.com/owenrumney/go-sarif/v2/sarif" + "github.com/scan-io-git/scan-io/internal/git" + internalsarif "github.com/scan-io-git/scan-io/internal/sarif" + "github.com/scan-io-git/scan-io/pkg/shared/files" + "github.com/scan-io-git/scan-io/pkg/shared/vcsurl" +) + +// parseLineRange parses line range from strings like "123" or "123-456". +// Returns (start, end) where end equals start for single line numbers. +func parseLineRange(value string) (int, int) { + value = strings.TrimSpace(value) + if strings.Contains(value, "-") { + parts := strings.SplitN(value, "-", 2) + if len(parts) == 2 { + start, err1 := strconv.Atoi(strings.TrimSpace(parts[0])) + end, err2 := strconv.Atoi(strings.TrimSpace(parts[1])) + if err1 == nil && err2 == nil { + return start, end + } + } + } else { + if line, err := strconv.Atoi(value); err == nil { + return line, line + } + } + return 0, 0 +} + +// displaySeverity normalizes SARIF severity levels to more descriptive labels. +func displaySeverity(level string) string { + normalized := strings.ToLower(strings.TrimSpace(level)) + switch normalized { + case "error": + return "High" + case "warning": + return "Medium" + case "note": + return "Low" + case "none": + return "Info" + default: + if normalized == "" { + return "" + } + return cases.Title(language.Und).String(normalized) + } +} + +// normalizeAndValidateLevels validates and normalizes severity levels input. +// Accepts both SARIF levels (error, warning, note, none) and display levels (High, Medium, Low, Info). +// Returns normalized SARIF levels and an error if mixing formats is detected. +func normalizeAndValidateLevels(levels []string) ([]string, error) { + if len(levels) == 0 { + return []string{"error"}, nil + } + + var sarifLevels []string + var displayLevels []string + var normalized []string + + // Check each level and categorize + for _, level := range levels { + normalizedLevel := strings.ToLower(strings.TrimSpace(level)) + + // Check if it's a SARIF level + if isSARIFLevel(normalizedLevel) { + sarifLevels = append(sarifLevels, normalizedLevel) + normalized = append(normalized, normalizedLevel) + } else if isDisplayLevel(normalizedLevel) { + displayLevels = append(displayLevels, normalizedLevel) + // Convert display level to SARIF level + sarifLevel := displayToSARIFLevel(normalizedLevel) + normalized = append(normalized, sarifLevel) + } else { + return nil, fmt.Errorf("invalid severity level '%s'. Valid SARIF levels: error, warning, note, none. Valid display levels: high, medium, low, info", level) + } + } + + // Check for mixing formats + if len(sarifLevels) > 0 && len(displayLevels) > 0 { + return nil, fmt.Errorf("cannot mix SARIF levels (error, warning, note, none) with display levels (High, Medium, Low, Info)") + } + + return normalized, nil +} + +// isSARIFLevel checks if the normalized level is a valid SARIF level +func isSARIFLevel(level string) bool { + switch level { + case "error", "warning", "note", "none": + return true + default: + return false + } +} + +// isDisplayLevel checks if the normalized level is a valid display level +func isDisplayLevel(level string) bool { + switch level { + case "high", "medium", "low", "info": + return true + default: + return false + } +} + +// displayToSARIFLevel converts a display level to its corresponding SARIF level +func displayToSARIFLevel(displayLevel string) string { + switch displayLevel { + case "high": + return "error" + case "medium": + return "warning" + case "low": + return "note" + case "info": + return "none" + default: + return displayLevel + } +} + +// generateOWASPSlug creates a URL-safe slug from OWASP title text. +// Converts spaces to underscores and removes non-alphanumeric characters except hyphens and underscores. +func generateOWASPSlug(title string) string { + slug := strings.ReplaceAll(strings.TrimSpace(title), " ", "_") + clean := make([]rune, 0, len(slug)) + for _, r := range slug { + if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' || r == '-' { + clean = append(clean, r) + } + } + return string(clean) +} + +// processSecurityTags converts security tags (CWE, OWASP) into reference links. +// Returns a slice of markdown reference links for recognized security identifiers. +func processSecurityTags(tags []string) []string { + var tagRefs []string + for _, tag := range tags { + t := strings.TrimSpace(tag) + if t == "" { + continue + } + + // Process CWE tags + if m := cweRegex.FindStringSubmatch(t); len(m) == 2 { + num := m[1] + url := fmt.Sprintf("https://cwe.mitre.org/data/definitions/%s.html", num) + tagRefs = append(tagRefs, fmt.Sprintf("- [%s](%s)", t, url)) + continue + } + + // Process OWASP tags + if m := owaspRegex.FindStringSubmatch(t); len(m) == 4 { + rank := m[1] + year := m[2] + title := m[3] + slug := generateOWASPSlug(title) + url := fmt.Sprintf("https://owasp.org/Top10/A%s_%s-%s/", rank, year, slug) + tagRefs = append(tagRefs, fmt.Sprintf("- [%s](%s)", t, url)) + continue + } + } + return tagRefs +} + +// buildIssueTitle creates a concise issue title using scanner name (fallback to SARIF), +// severity, ruleID and location info. It formats as "[][][] at" +// and includes a range when endLine > line. +func buildIssueTitle(scannerName, severity, ruleID, fileURI string, line, endLine int) string { + label := strings.TrimSpace(scannerName) + if label == "" { + label = "SARIF" + } + sev := strings.TrimSpace(severity) + parts := []string{label} + if sev != "" { + parts = append(parts, sev) + } + parts = append(parts, ruleID) + title := fmt.Sprintf("[%s]", strings.Join(parts, "][")) + if line > 0 { + if endLine > line { + return fmt.Sprintf("%s at %s:%d-%d", title, fileURI, line, endLine) + } + return fmt.Sprintf("%s at %s:%d", title, fileURI, line) + } + return fmt.Sprintf("%s at %s", title, fileURI) +} + +// parseRuleHelpMarkdown removes promotional content from help markdown and splits +// it into the descriptive details and a list of reference bullet points. +func parseRuleHelpMarkdown(markdown string) (string, []string) { + cleaned := strings.ReplaceAll(markdown, semgrepPromoFooter, "") + cleaned = strings.TrimSpace(cleaned) + if cleaned == "" { + return "", nil + } + + lines := strings.Split(cleaned, "\n") + referencesStart := -1 + for idx, raw := range lines { + if strings.TrimSpace(raw) == "References:" { + referencesStart = idx + break + } + } + + if referencesStart == -1 { + return cleaned, nil + } + + detail := strings.TrimSpace(strings.Join(lines[:referencesStart], "\n")) + var references []string + for _, raw := range lines[referencesStart+1:] { + trimmed := strings.TrimSpace(raw) + if trimmed == "" { + continue + } + // Normalise to Markdown bullet points regardless of the original marker. + trimmed = strings.TrimLeft(trimmed, "-* \t") + if trimmed == "" { + continue + } + references = append(references, "- "+trimmed) + } + + return detail, references +} + +// getScannerName returns the tool/driver name for a SARIF run when available. +func getScannerName(run *sarif.Run) string { + if run == nil { + return "" + } + if run.Tool.Driver == nil { + return "" + } + if run.Tool.Driver.Name != "" { + return run.Tool.Driver.Name + } + return "" +} + +// buildGitHubPermalink builds a permalink to a file and region in GitHub. +// It prefers the CLI --ref when provided; otherwise attempts to read the +// current commit hash from repo metadata (falling back to collecting it +// directly when metadata is not provided). Returns empty string when any +// critical component is missing. +func buildGitHubPermalink(options RunOptions, repoMetadata *git.RepositoryMetadata, fileURI string, start, end int) string { + ref := strings.TrimSpace(options.Ref) + + if ref == "" { + if repoMetadata != nil && repoMetadata.CommitHash != nil && *repoMetadata.CommitHash != "" { + ref = *repoMetadata.CommitHash + } else if options.SourceFolder != "" { + if md, err := git.CollectRepositoryMetadata(options.SourceFolder); err == nil && md.CommitHash != nil && *md.CommitHash != "" { + ref = *md.CommitHash + } + } + } + + if ref == "" || fileURI == "" || fileURI == "" { + return "" + } + + path := filepath.ToSlash(fileURI) + return internalsarif.BuildGitHubPermalink(options.Namespace, options.Repository, ref, path, start, end) +} + +// ResolveSourceFolder resolves a source folder path to its absolute form for path calculations. +// It handles path expansion (e.g., ~) and absolute path resolution with graceful fallbacks. +// Returns an empty string if the input folder is empty or whitespace-only. +func ResolveSourceFolder(folder string, logger hclog.Logger) string { + if folder := strings.TrimSpace(folder); folder != "" { + expandedFolder, expandErr := files.ExpandPath(folder) + if expandErr != nil { + logger.Debug("failed to expand source folder; using raw value", "error", expandErr) + expandedFolder = folder + } + if absFolder, absErr := filepath.Abs(expandedFolder); absErr != nil { + logger.Debug("failed to resolve absolute source folder; using expanded value", "error", absErr) + return expandedFolder + } else { + return filepath.Clean(absFolder) + } + } + return "" +} + +// ApplyEnvironmentFallbacks applies environment variable fallbacks to the run options. +// It sets namespace, repository, and ref from GitHub environment variables if not already provided. +func ApplyEnvironmentFallbacks(opts *RunOptions) { + // Fallback: if --namespace not provided, try $GITHUB_REPOSITORY_OWNER + if strings.TrimSpace(opts.Namespace) == "" { + if ns := strings.TrimSpace(os.Getenv("GITHUB_REPOSITORY_OWNER")); ns != "" { + opts.Namespace = ns + } + } + + // Fallback: if --repository not provided, try ${GITHUB_REPOSITORY#*/} + if strings.TrimSpace(opts.Repository) == "" { + if gr := strings.TrimSpace(os.Getenv("GITHUB_REPOSITORY")); gr != "" { + if idx := strings.Index(gr, "/"); idx >= 0 && idx < len(gr)-1 { + opts.Repository = gr[idx+1:] + } else { + // No slash present; fall back to the whole value + opts.Repository = gr + } + } + } + + // Fallback: if --ref not provided, try $GITHUB_SHA + if strings.TrimSpace(opts.Ref) == "" { + if sha := strings.TrimSpace(os.Getenv("GITHUB_SHA")); sha != "" { + opts.Ref = sha + } + } +} + +// FormatCodeFlows formats code flows from SARIF results into collapsible markdown sections. +// Each thread flow is displayed in a separate
block with numbered steps and GitHub permalinks. +func FormatCodeFlows(result *sarif.Result, options RunOptions, repoMetadata *git.RepositoryMetadata, sourceFolderAbs string) string { + if result == nil || len(result.CodeFlows) == 0 { + return "" + } + + var sections []string + threadFlowCounter := 0 + + for _, codeFlow := range result.CodeFlows { + if codeFlow == nil || len(codeFlow.ThreadFlows) == 0 { + continue + } + + for _, threadFlow := range codeFlow.ThreadFlows { + if threadFlow == nil || len(threadFlow.Locations) == 0 { + continue + } + + threadFlowCounter++ + var steps []string + seenSteps := make(map[string]bool) // Track seen permalink+message combinations + actualStepNum := 0 // Track actual step number for sequential numbering + + for _, threadFlowLocation := range threadFlow.Locations { + if threadFlowLocation == nil || threadFlowLocation.Location == nil { + continue + } + + location := threadFlowLocation.Location + if location.PhysicalLocation == nil || location.PhysicalLocation.ArtifactLocation == nil { + continue + } + + // Extract file path and line information + fileURI, _ := internalsarif.ExtractFileURIFromResult(&sarif.Result{ + Locations: []*sarif.Location{location}, + }, sourceFolderAbs, repoMetadata) + + if fileURI == "" { + continue + } + + // Extract line numbers + startLine := 0 + endLine := 0 + if location.PhysicalLocation.Region != nil { + if location.PhysicalLocation.Region.StartLine != nil { + startLine = *location.PhysicalLocation.Region.StartLine + } + if location.PhysicalLocation.Region.EndLine != nil { + endLine = *location.PhysicalLocation.Region.EndLine + } + } + + // Create GitHub permalink + permalink := buildGitHubPermalink(options, repoMetadata, fileURI, startLine, endLine) + + // Format step with optional message text + messageText := "" + if location.Message != nil && location.Message.Text != nil && strings.TrimSpace(*location.Message.Text) != "" { + messageText = strings.TrimSpace(*location.Message.Text) + } + + // Create unique key for deduplication (permalink + message text) + dedupKey := fmt.Sprintf("%s|%s", permalink, messageText) + + // Skip if we've already seen this exact combination + if seenSteps[dedupKey] { + continue + } + seenSteps[dedupKey] = true + + // Increment actual step number only when we add a step + actualStepNum++ + + // Format step with optional message text + stepText := fmt.Sprintf("Step %d:", actualStepNum) + if messageText != "" { + stepText = fmt.Sprintf("Step %d: %s", actualStepNum, messageText) + } + + // Add step text and permalink on separate lines + if permalink != "" { + steps = append(steps, stepText+"\n"+permalink) + } else { + steps = append(steps, stepText) + } + } + + if len(steps) > 0 { + summary := fmt.Sprintf("Code Flow %d", threadFlowCounter) + section := fmt.Sprintf("
\n%s\n\n%s\n
", + summary, strings.Join(steps, "\n\n")) + sections = append(sections, section) + } + } + } + + if len(sections) == 0 { + return "" + } + + return strings.Join(sections, "\n\n") +} + +// ApplyGitMetadataFallbacks applies git metadata fallbacks to the run options. +// It extracts namespace, repository, and ref from local git repository metadata +// when the corresponding flags are not already provided. +func ApplyGitMetadataFallbacks(opts *RunOptions, logger hclog.Logger) { + // Determine the base folder for git metadata extraction + baseFolder := strings.TrimSpace(opts.SourceFolder) + if baseFolder == "" { + // Use current working directory if source-folder is not provided + if cwd, err := os.Getwd(); err == nil { + baseFolder = cwd + } else { + logger.Debug("failed to get current working directory for git metadata extraction", "error", err) + return + } + } + + // Collect git repository metadata + repoMetadata, err := git.CollectRepositoryMetadata(baseFolder) + if err != nil { + logger.Debug("unable to collect git repository metadata", "error", err, "baseFolder", baseFolder) + return + } + + // Extract namespace and repository from git remote URL if not already set + if strings.TrimSpace(opts.Namespace) == "" || strings.TrimSpace(opts.Repository) == "" { + if repoMetadata.RepositoryFullName != nil && *repoMetadata.RepositoryFullName != "" { + // Parse the repository URL to extract namespace and repository + vcsURL, err := vcsurl.ParseForVCSType(*repoMetadata.RepositoryFullName, vcsurl.UnknownVCS) + if err != nil { + logger.Debug("failed to parse git repository URL", "error", err, "url", *repoMetadata.RepositoryFullName) + } else { + // Apply namespace if not already set + if strings.TrimSpace(opts.Namespace) == "" && vcsURL.Namespace != "" { + opts.Namespace = vcsURL.Namespace + logger.Debug("auto-detected namespace from git metadata", "namespace", vcsURL.Namespace) + } + + // Apply repository if not already set + if strings.TrimSpace(opts.Repository) == "" && vcsURL.Repository != "" { + opts.Repository = vcsURL.Repository + logger.Debug("auto-detected repository from git metadata", "repository", vcsURL.Repository) + } + } + } + } + + // Extract commit hash for ref if not already set + if strings.TrimSpace(opts.Ref) == "" { + if repoMetadata.CommitHash != nil && *repoMetadata.CommitHash != "" { + opts.Ref = *repoMetadata.CommitHash + logger.Debug("auto-detected ref from git metadata", "ref", *repoMetadata.CommitHash) + } + } +} diff --git a/cmd/sarif-issues/utils_test.go b/cmd/sarif-issues/utils_test.go new file mode 100644 index 0000000..db13f6d --- /dev/null +++ b/cmd/sarif-issues/utils_test.go @@ -0,0 +1,1700 @@ +package sarifissues + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/hashicorp/go-hclog" + "github.com/owenrumney/go-sarif/v2/sarif" + "github.com/scan-io-git/scan-io/internal/git" + internalsarif "github.com/scan-io-git/scan-io/internal/sarif" +) + +func TestDisplaySeverity(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + // Standard SARIF severity levels + { + name: "error level", + input: "error", + expected: "High", + }, + { + name: "warning level", + input: "warning", + expected: "Medium", + }, + { + name: "note level", + input: "note", + expected: "Low", + }, + { + name: "none level", + input: "none", + expected: "Info", + }, + // Case insensitive tests + { + name: "ERROR uppercase", + input: "ERROR", + expected: "High", + }, + { + name: "Warning mixed case", + input: "Warning", + expected: "Medium", + }, + { + name: "NOTE uppercase", + input: "NOTE", + expected: "Low", + }, + { + name: "NONE uppercase", + input: "NONE", + expected: "Info", + }, + // Whitespace handling + { + name: "error with leading space", + input: " error", + expected: "High", + }, + { + name: "warning with trailing space", + input: "warning ", + expected: "Medium", + }, + { + name: "note with surrounding spaces", + input: " note ", + expected: "Low", + }, + { + name: "none with tabs", + input: "\tnone\t", + expected: "Info", + }, + // Edge cases + { + name: "empty string", + input: "", + expected: "", + }, + { + name: "whitespace only", + input: " ", + expected: "", + }, + { + name: "tab only", + input: "\t", + expected: "", + }, + { + name: "newline only", + input: "\n", + expected: "", + }, + // Custom/unknown severity levels (should be title-cased) + { + name: "custom severity lowercase", + input: "critical", + expected: "Critical", + }, + { + name: "custom severity uppercase", + input: "FATAL", + expected: "Fatal", + }, + { + name: "custom severity mixed case", + input: "sEvErE", + expected: "Severe", + }, + { + name: "custom multi-word severity", + input: "very high", + expected: "Very High", + }, + { + name: "custom with numbers", + input: "level1", + expected: "Level1", + }, + { + name: "custom with special chars", + input: "high-priority", + expected: "High-Priority", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := displaySeverity(tt.input) + if result != tt.expected { + t.Errorf("displaySeverity(%q) = %q, want %q", tt.input, result, tt.expected) + } + }) + } +} + +func TestBuildGitHubPermalink(t *testing.T) { + fileURI := filepath.ToSlash(filepath.Join("apps", "demo", "main.py")) + options := RunOptions{ + Namespace: "scan-io-git", + Repository: "scanio-test", + Ref: "aec0b795c350ff53fe9ab01adf862408aa34c3fd", + } + + link := buildGitHubPermalink(options, nil, fileURI, 11, 29) + expected := "https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py#L11-L29" + if link != expected { + t.Fatalf("expected permalink %q, got %q", expected, link) + } + + // Ref fallback to repository metadata + options.Ref = "" + commit := "1234567890abcdef" + metadata := &git.RepositoryMetadata{ + RepoRootFolder: "/tmp/repo", + CommitHash: &commit, + } + link = buildGitHubPermalink(options, metadata, fileURI, 5, 5) + expected = "https://github.com/scan-io-git/scanio-test/blob/1234567890abcdef/apps/demo/main.py#L5" + if link != expected { + t.Fatalf("expected metadata permalink %q, got %q", expected, link) + } + + // Missing ref and metadata commit should return empty string + options.Ref = "" + metadata.CommitHash = nil + link = buildGitHubPermalink(options, metadata, fileURI, 1, 1) + if link != "" { + t.Fatalf("expected empty permalink when ref and metadata are missing, got %q", link) + } +} + +func TestBuildNewIssuesFromSARIFManualScenarios(t *testing.T) { + tempDir, err := os.MkdirTemp("", "sarif_scenarios") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + repoRoot := filepath.Join(tempDir, "scanio-test") + subfolder := filepath.Join(repoRoot, "apps", "demo") + if err := os.MkdirAll(subfolder, 0o755); err != nil { + t.Fatalf("Failed to create repo subfolder: %v", err) + } + + mainFile := filepath.Join(subfolder, "main.py") + var builder strings.Builder + for i := 1; i <= 60; i++ { + builder.WriteString(fmt.Sprintf("line %d\n", i)) + } + if err := os.WriteFile(mainFile, []byte(builder.String()), 0o644); err != nil { + t.Fatalf("Failed to write main.py: %v", err) + } + + logger := hclog.NewNullLogger() + commit := "aec0b795c350ff53fe9ab01adf862408aa34c3fd" + + metadata := &git.RepositoryMetadata{ + RepoRootFolder: repoRoot, + Subfolder: filepath.ToSlash(filepath.Join("apps", "demo")), + CommitHash: &commit, + } + + options := RunOptions{ + Namespace: "scan-io-git", + Repository: "scanio-test", + Ref: commit, + SourceFolder: subfolder, + } + + expectedRepoPath := filepath.ToSlash(filepath.Join("apps", "demo", "main.py")) + permalink := fmt.Sprintf("https://github.com/%s/%s/blob/%s/%s#L%d-L%d", + options.Namespace, + options.Repository, + commit, + expectedRepoPath, + 11, + 29, + ) + + scenarios := []struct { + name string + uri string + sourceFolderCLI string + sourceFolderAbs string + }{ + { + name: "outside project absolute", + uri: mainFile, + sourceFolderCLI: subfolder, + sourceFolderAbs: subfolder, + }, + { + name: "outside project relative", + uri: filepath.ToSlash(filepath.Join("..", "scanio-test", "apps", "demo", "main.py")), + sourceFolderCLI: filepath.Join("..", "scanio-test", "apps", "demo"), + sourceFolderAbs: subfolder, + }, + { + name: "from root absolute", + uri: mainFile, + sourceFolderCLI: subfolder, + sourceFolderAbs: subfolder, + }, + { + name: "from root relative", + uri: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), + sourceFolderCLI: filepath.Join("apps", "demo"), + sourceFolderAbs: subfolder, + }, + { + name: "from subfolder absolute", + uri: mainFile, + sourceFolderCLI: subfolder, + sourceFolderAbs: subfolder, + }, + { + name: "from subfolder relative", + uri: "main.py", + sourceFolderCLI: ".", + sourceFolderAbs: subfolder, + }, + } + + for _, scenario := range scenarios { + scenario := scenario + t.Run(scenario.name, func(t *testing.T) { + ruleID := "test.rule" + uriValue := scenario.uri + startLine := 11 + endLine := 29 + message := "Test finding" + baseID := "%SRCROOT%" + + result := &sarif.Result{ + RuleID: &ruleID, + Message: sarif.Message{ + Text: &message, + }, + Locations: []*sarif.Location{ + { + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: &uriValue, + URIBaseId: &baseID, + }, + Region: &sarif.Region{ + StartLine: &startLine, + EndLine: &endLine, + }, + }, + }, + }, + } + result.PropertyBag = *sarif.NewPropertyBag() + result.Add("Level", "error") + + report := &internalsarif.Report{ + Report: &sarif.Report{ + Runs: []*sarif.Run{ + { + Tool: sarif.Tool{ + Driver: &sarif.ToolComponent{ + Name: "Semgrep", + Rules: []*sarif.ReportingDescriptor{ + {ID: ruleID}, + }, + }, + }, + Results: []*sarif.Result{result}, + }, + }, + }, + } + + scenarioOptions := options + scenarioOptions.SourceFolder = scenario.sourceFolderCLI + + issues := buildNewIssuesFromSARIF(report, scenarioOptions, scenario.sourceFolderAbs, metadata, logger) + if len(issues) == 0 { + t.Fatalf("expected issues for scenario %q", scenario.name) + } + + issue := issues[0] + if issue.Metadata.Filename != expectedRepoPath { + t.Fatalf("scenario %q expected repo path %q, got %q", scenario.name, expectedRepoPath, issue.Metadata.Filename) + } + if issue.Metadata.SnippetHash == "" { + t.Fatalf("scenario %q expected snippet hash to be populated", scenario.name) + } + if !strings.Contains(issue.Body, permalink) { + t.Fatalf("scenario %q issue body missing permalink %q", scenario.name, permalink) + } + }) + } +} + +func TestResolveSourceFolder(t *testing.T) { + // Create a test logger + logger := hclog.NewNullLogger() + + tests := []struct { + name string + input string + expected string + setup func() (string, func()) // setup function that returns a test path and cleanup function + }{ + { + name: "empty string", + input: "", + expected: "", + }, + { + name: "whitespace only", + input: " \t\n ", + expected: "", + }, + { + name: "relative path", + input: "", + expected: "", // Will be resolved to absolute path + setup: func() (string, func()) { + // Create a temporary directory and change to it + tempDir, err := os.MkdirTemp("", "sarif-test-*") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + testDir := filepath.Join(tempDir, "testdir") + err = os.Mkdir(testDir, 0755) + if err != nil { + os.RemoveAll(tempDir) + t.Fatalf("failed to create test dir: %v", err) + } + return testDir, func() { os.RemoveAll(tempDir) } + }, + }, + { + name: "absolute path", + input: "", + expected: "", // Will be set by setup + setup: func() (string, func()) { + tempDir, err := os.MkdirTemp("", "sarif-test-*") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + return tempDir, func() { os.RemoveAll(tempDir) } + }, + }, + { + name: "path with tilde expansion", + input: "~/testdir", + expected: "", // Will be resolved to actual home directory path + setup: func() (string, func()) { + tempHome, err := os.MkdirTemp("", "sarif-home-*") + if err != nil { + t.Fatalf("failed to create temp home dir: %v", err) + } + t.Setenv("HOME", tempHome) + testDir := filepath.Join(tempHome, "testdir") + if err := os.MkdirAll(testDir, 0o755); err != nil { + os.RemoveAll(tempHome) + t.Fatalf("failed to create test dir: %v", err) + } + return testDir, func() { os.RemoveAll(tempHome) } + }, + }, + { + name: "path with dots and slashes", + input: "", + expected: "", // Will be cleaned and resolved + setup: func() (string, func()) { + tempDir, err := os.MkdirTemp("", "sarif-test-*") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + // Create a parent directory structure + parentDir := filepath.Join(tempDir, "parent") + err = os.Mkdir(parentDir, 0755) + if err != nil { + os.RemoveAll(tempDir) + t.Fatalf("failed to create parent dir: %v", err) + } + testDir := filepath.Join(parentDir, "testdir") + err = os.Mkdir(testDir, 0755) + if err != nil { + os.RemoveAll(tempDir) + t.Fatalf("failed to create test dir: %v", err) + } + return testDir, func() { os.RemoveAll(tempDir) } + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var cleanup func() + var expectedPath string + + if tt.setup != nil { + testPath, cleanupFunc := tt.setup() + cleanup = cleanupFunc + expectedPath = filepath.Clean(testPath) + + // Set the input based on test type + if tt.name == "relative path" { + // Use absolute path for relative path test since we can't control working directory + tt.input = testPath + } else if tt.name == "absolute path" { + tt.input = testPath + } else if tt.name == "path with tilde expansion" { + tt.input = "~/testdir" + } else if tt.name == "path with dots and slashes" { + // Use absolute path for this test too + tt.input = testPath + } + } else { + expectedPath = tt.expected + } + + result := ResolveSourceFolder(tt.input, logger) + + if tt.setup != nil { + // For tests with setup, verify the result is an absolute path + if !filepath.IsAbs(result) { + t.Errorf("expected absolute path, got relative path: %s", result) + } + // Verify the resolved path points to the same directory + if result != expectedPath { + t.Errorf("expected %s, got %s", expectedPath, result) + } + } else { + // For tests without setup, verify exact match + if result != expectedPath { + t.Errorf("expected %s, got %s", expectedPath, result) + } + } + + if cleanup != nil { + cleanup() + } + }) + } +} + +func TestResolveSourceFolderErrorHandling(t *testing.T) { + logger := hclog.NewNullLogger() + + t.Run("non-existent path", func(t *testing.T) { + // Test with a path that doesn't exist - should still resolve to absolute path + result := ResolveSourceFolder("/non/existent/path", logger) + + // Should still return an absolute path even if it doesn't exist + if !filepath.IsAbs(result) { + t.Errorf("expected absolute path even for non-existent path, got: %s", result) + } + + expected := "/non/existent/path" + if result != expected { + t.Errorf("expected %s, got %s", expected, result) + } + }) + + t.Run("invalid characters in path", func(t *testing.T) { + // Test with path containing invalid characters + result := ResolveSourceFolder("/tmp/test\x00invalid", logger) + + // Should handle gracefully and return the path as-is + if result == "" { + t.Error("expected non-empty result for invalid path") + } + }) +} + +func TestResolveSourceFolderRelativePaths(t *testing.T) { + logger := hclog.NewNullLogger() + + t.Run("relative path with working directory change", func(t *testing.T) { + // Create a temporary directory structure + tempDir, err := os.MkdirTemp("", "sarif-test-*") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create a test directory + testDir := filepath.Join(tempDir, "testdir") + err = os.Mkdir(testDir, 0755) + if err != nil { + t.Fatalf("failed to create test dir: %v", err) + } + + // Change to the temp directory + originalDir, err := os.Getwd() + if err != nil { + t.Fatalf("failed to get current directory: %v", err) + } + defer os.Chdir(originalDir) + + err = os.Chdir(tempDir) + if err != nil { + t.Fatalf("failed to change directory: %v", err) + } + + // Test relative path + result := ResolveSourceFolder("./testdir", logger) + expected := filepath.Clean(testDir) + + if result != expected { + t.Errorf("expected %s, got %s", expected, result) + } + + if !filepath.IsAbs(result) { + t.Errorf("expected absolute path, got relative path: %s", result) + } + }) +} + +func TestApplyEnvironmentFallbacks(t *testing.T) { + tests := []struct { + name string + initialOpts RunOptions + envVars map[string]string + expectedOpts RunOptions + }{ + { + name: "no environment variables set", + initialOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "test-repo", + Ref: "test-ref", + }, + envVars: map[string]string{}, + expectedOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "test-repo", + Ref: "test-ref", + }, + }, + { + name: "all options already set - no fallbacks applied", + initialOpts: RunOptions{ + Namespace: "existing-namespace", + Repository: "existing-repo", + Ref: "existing-ref", + }, + envVars: map[string]string{ + "GITHUB_REPOSITORY_OWNER": "env-namespace", + "GITHUB_REPOSITORY": "env-owner/env-repo", + "GITHUB_SHA": "env-sha", + }, + expectedOpts: RunOptions{ + Namespace: "existing-namespace", + Repository: "existing-repo", + Ref: "existing-ref", + }, + }, + { + name: "namespace fallback applied", + initialOpts: RunOptions{ + Namespace: "", + Repository: "test-repo", + Ref: "test-ref", + }, + envVars: map[string]string{ + "GITHUB_REPOSITORY_OWNER": "env-namespace", + }, + expectedOpts: RunOptions{ + Namespace: "env-namespace", + Repository: "test-repo", + Ref: "test-ref", + }, + }, + { + name: "repository fallback applied with slash", + initialOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "", + Ref: "test-ref", + }, + envVars: map[string]string{ + "GITHUB_REPOSITORY": "env-owner/env-repo", + }, + expectedOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "env-repo", + Ref: "test-ref", + }, + }, + { + name: "repository fallback applied without slash", + initialOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "", + Ref: "test-ref", + }, + envVars: map[string]string{ + "GITHUB_REPOSITORY": "env-repo-only", + }, + expectedOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "env-repo-only", + Ref: "test-ref", + }, + }, + { + name: "ref fallback applied", + initialOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "test-repo", + Ref: "", + }, + envVars: map[string]string{ + "GITHUB_SHA": "env-sha-123", + }, + expectedOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "test-repo", + Ref: "env-sha-123", + }, + }, + { + name: "all fallbacks applied", + initialOpts: RunOptions{ + Namespace: "", + Repository: "", + Ref: "", + }, + envVars: map[string]string{ + "GITHUB_REPOSITORY_OWNER": "env-namespace", + "GITHUB_REPOSITORY": "env-owner/env-repo", + "GITHUB_SHA": "env-sha-123", + }, + expectedOpts: RunOptions{ + Namespace: "env-namespace", + Repository: "env-repo", + Ref: "env-sha-123", + }, + }, + { + name: "whitespace handling", + initialOpts: RunOptions{ + Namespace: " ", + Repository: "\t", + Ref: "\n", + }, + envVars: map[string]string{ + "GITHUB_REPOSITORY_OWNER": " env-namespace ", + "GITHUB_REPOSITORY": "\tenv-owner/env-repo\t", + "GITHUB_SHA": "\nenv-sha-123\n", + }, + expectedOpts: RunOptions{ + Namespace: "env-namespace", + Repository: "env-repo", + Ref: "env-sha-123", + }, + }, + { + name: "repository with multiple slashes", + initialOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "", + Ref: "test-ref", + }, + envVars: map[string]string{ + "GITHUB_REPOSITORY": "env-owner/subdir/env-repo", + }, + expectedOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "subdir/env-repo", + Ref: "test-ref", + }, + }, + { + name: "repository with slash at end", + initialOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "", + Ref: "test-ref", + }, + envVars: map[string]string{ + "GITHUB_REPOSITORY": "env-owner/env-repo/", + }, + expectedOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "env-repo/", + Ref: "test-ref", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Set up environment variables + for key, value := range tt.envVars { + t.Setenv(key, value) + } + + // Create a copy of initial options + opts := tt.initialOpts + + // Apply environment fallbacks + ApplyEnvironmentFallbacks(&opts) + + // Verify results + if opts.Namespace != tt.expectedOpts.Namespace { + t.Errorf("Namespace: expected %q, got %q", tt.expectedOpts.Namespace, opts.Namespace) + } + if opts.Repository != tt.expectedOpts.Repository { + t.Errorf("Repository: expected %q, got %q", tt.expectedOpts.Repository, opts.Repository) + } + if opts.Ref != tt.expectedOpts.Ref { + t.Errorf("Ref: expected %q, got %q", tt.expectedOpts.Ref, opts.Ref) + } + }) + } +} + +func TestApplyEnvironmentFallbacksEdgeCases(t *testing.T) { + t.Run("empty environment variables", func(t *testing.T) { + opts := RunOptions{ + Namespace: "", + Repository: "", + Ref: "", + } + + // Set empty environment variables + t.Setenv("GITHUB_REPOSITORY_OWNER", "") + t.Setenv("GITHUB_REPOSITORY", "") + t.Setenv("GITHUB_SHA", "") + + ApplyEnvironmentFallbacks(&opts) + + // Should remain empty + if opts.Namespace != "" { + t.Errorf("Expected empty namespace, got %q", opts.Namespace) + } + if opts.Repository != "" { + t.Errorf("Expected empty repository, got %q", opts.Repository) + } + if opts.Ref != "" { + t.Errorf("Expected empty ref, got %q", opts.Ref) + } + }) + + t.Run("repository with only slash", func(t *testing.T) { + opts := RunOptions{ + Repository: "", + } + + t.Setenv("GITHUB_REPOSITORY", "/") + + ApplyEnvironmentFallbacks(&opts) + + // Should fall back to the whole value + if opts.Repository != "/" { + t.Errorf("Expected repository to be '/', got %q", opts.Repository) + } + }) + + t.Run("repository with slash at beginning", func(t *testing.T) { + opts := RunOptions{ + Repository: "", + } + + t.Setenv("GITHUB_REPOSITORY", "/env-repo") + + ApplyEnvironmentFallbacks(&opts) + + // Should extract the part after the slash since idx=0 and idx < len(gr)-1 + if opts.Repository != "env-repo" { + t.Errorf("Expected repository to be 'env-repo', got %q", opts.Repository) + } + }) +} + +func TestBuildIssueBodyWithCodeQLMessage(t *testing.T) { + // Test integration with CodeQL-style SARIF data using mocks + tempDir, err := os.MkdirTemp("", "sarif_codeql_test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create test file structure + repoRoot := filepath.Join(tempDir, "scanio-test") + subfolder := filepath.Join(repoRoot, "apps", "demo") + if err := os.MkdirAll(subfolder, 0o755); err != nil { + t.Fatalf("Failed to create repo subfolder: %v", err) + } + + mainFile := filepath.Join(subfolder, "main.py") + if err := os.WriteFile(mainFile, []byte("line 1\nline 2\nline 3\n"), 0o644); err != nil { + t.Fatalf("Failed to write main.py: %v", err) + } + + logger := hclog.NewNullLogger() + commit := "aec0b795c350ff53fe9ab01adf862408aa34c3fd" + + metadata := &git.RepositoryMetadata{ + RepoRootFolder: repoRoot, + Subfolder: filepath.ToSlash(filepath.Join("apps", "demo")), + CommitHash: &commit, + } + + options := RunOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: commit, + SourceFolder: subfolder, + } + + // Create mock SARIF report with CodeQL-style message + sarifReport := &sarif.Report{ + Runs: []*sarif.Run{ + { + Tool: sarif.Tool{ + Driver: &sarif.ToolComponent{ + Rules: []*sarif.ReportingDescriptor{ + { + ID: "py/template-injection", + Properties: map[string]interface{}{ + "problem.severity": "error", + }, + }, + }, + }, + }, + Results: []*sarif.Result{ + { + RuleID: stringPtr("py/template-injection"), + Level: stringPtr("error"), + Message: sarif.Message{ + Text: stringPtr("This template construction depends on a [user-provided value](1)."), + }, + RelatedLocations: []*sarif.Location{ + { + Id: uintPtr(1), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(1), + StartColumn: intPtr(50), + EndLine: intPtr(1), + EndColumn: intPtr(57), + }, + }, + }, + }, + Locations: []*sarif.Location{ + { + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(10), + StartColumn: intPtr(5), + EndLine: intPtr(10), + EndColumn: intPtr(15), + }, + }, + }, + }, + }, + }, + }, + }, + } + + report := &internalsarif.Report{ + Report: sarifReport, + } + + // Enrich results with level property + report.EnrichResultsLevelProperty() + + // Process SARIF results + issues := buildNewIssuesFromSARIF(report, options, subfolder, metadata, logger) + + // Verify that formatted messages are included in issue bodies + if len(issues) == 0 { + t.Fatalf("Expected at least one issue, got 0") + } + + issue := issues[0] + if !strings.Contains(issue.Body, "### Description") { + t.Errorf("Expected issue body to contain '### Description' section") + } + + // Check that the formatted message contains a hyperlink + if !strings.Contains(issue.Body, "https://github.com/test-org/test-repo/blob/") { + t.Errorf("Expected issue body to contain GitHub permalink") + } + + // Check for CodeQL-style formatting (single reference) + if !strings.Contains(issue.Body, "[user-provided value](") { + t.Errorf("Expected issue body to contain CodeQL-style formatted reference") + } +} + +func TestBuildIssueBodyWithSnykMessage(t *testing.T) { + // Test integration with Snyk-style SARIF data using mocks + tempDir, err := os.MkdirTemp("", "sarif_snyk_test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create test file structure + repoRoot := filepath.Join(tempDir, "scanio-test") + subfolder := filepath.Join(repoRoot, "apps", "demo") + if err := os.MkdirAll(subfolder, 0o755); err != nil { + t.Fatalf("Failed to create repo subfolder: %v", err) + } + + mainFile := filepath.Join(subfolder, "main.py") + if err := os.WriteFile(mainFile, []byte("line 1\nline 2\nline 3\n"), 0o644); err != nil { + t.Fatalf("Failed to write main.py: %v", err) + } + + logger := hclog.NewNullLogger() + commit := "aec0b795c350ff53fe9ab01adf862408aa34c3fd" + + metadata := &git.RepositoryMetadata{ + RepoRootFolder: repoRoot, + Subfolder: filepath.ToSlash(filepath.Join("apps", "demo")), + CommitHash: &commit, + } + + options := RunOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: commit, + SourceFolder: subfolder, + } + + // Create mock SARIF report with Snyk-style message + sarifReport := &sarif.Report{ + Runs: []*sarif.Run{ + { + Tool: sarif.Tool{ + Driver: &sarif.ToolComponent{ + Rules: []*sarif.ReportingDescriptor{ + { + ID: "python/Ssti", + Properties: map[string]interface{}{ + "problem.severity": "error", + }, + }, + }, + }, + }, + Results: []*sarif.Result{ + { + RuleID: stringPtr("python/Ssti"), + Level: stringPtr("error"), + Message: sarif.Message{ + Markdown: stringPtr("Unsanitized input from {0} {1} into {2}, where it is used to render an HTML page returned to the user. This may result in a Cross-Site Scripting attack (XSS)."), + Arguments: []string{ + "[an HTTP parameter](0)", + "[flows](1),(2),(3),(4),(5),(6)", + "[flask.render_template_string](7)", + }, + }, + RelatedLocations: []*sarif.Location{ + { + Id: uintPtr(0), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(1), + StartColumn: intPtr(50), + EndLine: intPtr(1), + EndColumn: intPtr(57), + }, + }, + }, + { + Id: uintPtr(1), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(8), + StartColumn: intPtr(18), + EndLine: intPtr(8), + EndColumn: intPtr(25), + }, + }, + }, + { + Id: uintPtr(2), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(8), + StartColumn: intPtr(18), + EndLine: intPtr(8), + EndColumn: intPtr(30), + }, + }, + }, + { + Id: uintPtr(3), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(8), + StartColumn: intPtr(18), + EndLine: intPtr(8), + EndColumn: intPtr(46), + }, + }, + }, + { + Id: uintPtr(4), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(8), + StartColumn: intPtr(5), + EndLine: intPtr(8), + EndColumn: intPtr(15), + }, + }, + }, + { + Id: uintPtr(5), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(11), + StartColumn: intPtr(5), + EndLine: intPtr(11), + EndColumn: intPtr(13), + }, + }, + }, + { + Id: uintPtr(6), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(29), + StartColumn: intPtr(35), + EndLine: intPtr(29), + EndColumn: intPtr(43), + }, + }, + }, + { + Id: uintPtr(7), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(29), + StartColumn: intPtr(12), + EndLine: intPtr(29), + EndColumn: intPtr(44), + }, + }, + }, + }, + Locations: []*sarif.Location{ + { + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(29), + StartColumn: intPtr(12), + EndLine: intPtr(29), + EndColumn: intPtr(44), + }, + }, + }, + }, + }, + }, + }, + }, + } + + report := &internalsarif.Report{ + Report: sarifReport, + } + + // Enrich results with level property + report.EnrichResultsLevelProperty() + + // Process SARIF results + issues := buildNewIssuesFromSARIF(report, options, subfolder, metadata, logger) + + // Verify that formatted messages are included in issue bodies + if len(issues) == 0 { + t.Fatalf("Expected at least one issue, got 0") + } + + issue := issues[0] + + if !strings.Contains(issue.Body, "### Description") { + t.Errorf("Expected issue body to contain '### Description' section") + } + + // Check that the formatted message contains hyperlinks + if !strings.Contains(issue.Body, "https://github.com/test-org/test-repo/blob/") { + t.Errorf("Expected issue body to contain GitHub permalink") + } + + // Check for flow chain formatting (multiple references) + if !strings.Contains(issue.Body, " > ") { + t.Errorf("Expected issue body to contain flow chain formatting") + } + + // Check for Snyk-style formatting (multiple references in flow chain) + if !strings.Contains(issue.Body, "flows (") { + t.Errorf("Expected issue body to contain Snyk-style formatted flow reference") + } +} + +func TestFormatCodeFlows(t *testing.T) { + tests := []struct { + name string + result *sarif.Result + expected string + }{ + { + name: "no code flows", + result: &sarif.Result{}, + expected: "", + }, + { + name: "nil code flows", + result: &sarif.Result{ + CodeFlows: nil, + }, + expected: "", + }, + { + name: "empty code flows", + result: &sarif.Result{ + CodeFlows: []*sarif.CodeFlow{}, + }, + expected: "", + }, + { + name: "single thread flow with message text", + result: &sarif.Result{ + CodeFlows: []*sarif.CodeFlow{ + { + ThreadFlows: []*sarif.ThreadFlow{ + { + Locations: []*sarif.ThreadFlowLocation{ + { + Location: &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(1), + EndLine: intPtr(1), + }, + }, + Message: &sarif.Message{ + Text: stringPtr("ControlFlowNode for ImportMember"), + }, + }, + }, + { + Location: &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(8), + EndLine: intPtr(8), + }, + }, + Message: &sarif.Message{ + Text: stringPtr("ControlFlowNode for request"), + }, + }, + }, + }, + }, + }, + }, + }, + }, + expected: `
+Code Flow 1 + +Step 1: ControlFlowNode for ImportMember +https://github.com/test-org/test-repo/blob/test-ref/main.py#L1 + +Step 2: ControlFlowNode for request +https://github.com/test-org/test-repo/blob/test-ref/main.py#L8 +
`, + }, + { + name: "single thread flow without message text", + result: &sarif.Result{ + CodeFlows: []*sarif.CodeFlow{ + { + ThreadFlows: []*sarif.ThreadFlow{ + { + Locations: []*sarif.ThreadFlowLocation{ + { + Location: &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("app.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(5), + EndLine: intPtr(5), + }, + }, + Message: &sarif.Message{ + Text: stringPtr(""), + }, + }, + }, + }, + }, + }, + }, + }, + }, + expected: `
+Code Flow 1 + +Step 1: +https://github.com/test-org/test-repo/blob/test-ref/app.py#L5 +
`, + }, + { + name: "multiple thread flows", + result: &sarif.Result{ + CodeFlows: []*sarif.CodeFlow{ + { + ThreadFlows: []*sarif.ThreadFlow{ + { + Locations: []*sarif.ThreadFlowLocation{ + { + Location: &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("file1.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(1), + EndLine: intPtr(1), + }, + }, + Message: &sarif.Message{ + Text: stringPtr("First flow step"), + }, + }, + }, + }, + }, + { + Locations: []*sarif.ThreadFlowLocation{ + { + Location: &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("file2.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(10), + EndLine: intPtr(10), + }, + }, + Message: &sarif.Message{ + Text: stringPtr("Second flow step"), + }, + }, + }, + }, + }, + }, + }, + }, + }, + expected: `
+Code Flow 1 + +Step 1: First flow step +https://github.com/test-org/test-repo/blob/test-ref/file1.py#L1 +
+ +
+Code Flow 2 + +Step 1: Second flow step +https://github.com/test-org/test-repo/blob/test-ref/file2.py#L10 +
`, + }, + { + name: "thread flow with line range", + result: &sarif.Result{ + CodeFlows: []*sarif.CodeFlow{ + { + ThreadFlows: []*sarif.ThreadFlow{ + { + Locations: []*sarif.ThreadFlowLocation{ + { + Location: &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(10), + EndLine: intPtr(15), + }, + }, + Message: &sarif.Message{ + Text: stringPtr("Multi-line location"), + }, + }, + }, + }, + }, + }, + }, + }, + }, + expected: `
+Code Flow 1 + +Step 1: Multi-line location +https://github.com/test-org/test-repo/blob/test-ref/main.py#L10-L15 +
`, + }, + { + name: "duplicate steps with same permalink and message", + result: &sarif.Result{ + CodeFlows: []*sarif.CodeFlow{ + { + ThreadFlows: []*sarif.ThreadFlow{ + { + Locations: []*sarif.ThreadFlowLocation{ + { + Location: &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(1), + EndLine: intPtr(1), + }, + }, + Message: &sarif.Message{ + Text: stringPtr("First step"), + }, + }, + }, + { + Location: &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(1), + EndLine: intPtr(1), + }, + }, + Message: &sarif.Message{ + Text: stringPtr("First step"), // Same message as previous + }, + }, + }, + { + Location: &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(2), + EndLine: intPtr(2), + }, + }, + Message: &sarif.Message{ + Text: stringPtr("Second step"), // Different message + }, + }, + }, + }, + }, + }, + }, + }, + }, + expected: `
+Code Flow 1 + +Step 1: First step +https://github.com/test-org/test-repo/blob/test-ref/main.py#L1 + +Step 2: Second step +https://github.com/test-org/test-repo/blob/test-ref/main.py#L2 +
`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + options := RunOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "test-ref", + } + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/repo", + } + + result := FormatCodeFlows(tt.result, options, repoMetadata, "/test/repo") + + if result != tt.expected { + t.Errorf("Expected:\n%s\nGot:\n%s", tt.expected, result) + } + }) + } +} + +// TestDefaultSourceFolderRegression tests the fix for the issue where omitting --source-folder +// causes snippet hash computation to fail. This validates the documented workflow: +// "Run inside git repository (auto-detects namespace, repository, ref)" +func TestDefaultSourceFolderRegression(t *testing.T) { + // Create a temporary directory structure that mimics a git repository + tempDir, err := os.MkdirTemp("", "sarif-test-*") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create a git repository structure + repoRoot := filepath.Join(tempDir, "test-repo") + if err := os.Mkdir(repoRoot, 0755); err != nil { + t.Fatalf("failed to create repo root: %v", err) + } + + // Create a test file with content for snippet hashing + testFile := filepath.Join(repoRoot, "main.py") + var builder strings.Builder + for i := 1; i <= 30; i++ { + builder.WriteString(fmt.Sprintf("line %d\n", i)) + } + if err := os.WriteFile(testFile, []byte(builder.String()), 0o644); err != nil { + t.Fatalf("failed to write test file: %v", err) + } + + // Initialize git repository + gitDir := filepath.Join(repoRoot, ".git") + if err := os.Mkdir(gitDir, 0755); err != nil { + t.Fatalf("failed to create .git dir: %v", err) + } + + // Create a mock git config file + configFile := filepath.Join(gitDir, "config") + configContent := `[remote "origin"] + url = https://github.com/test-org/test-repo.git` + if err := os.WriteFile(configFile, []byte(configContent), 0o644); err != nil { + t.Fatalf("failed to write git config: %v", err) + } + + // Create a mock HEAD file + headFile := filepath.Join(gitDir, "HEAD") + if err := os.WriteFile(headFile, []byte("ref: refs/heads/main\n"), 0o644); err != nil { + t.Fatalf("failed to write HEAD file: %v", err) + } + + // Create refs/heads directory and main branch file + refsDir := filepath.Join(gitDir, "refs", "heads") + if err := os.MkdirAll(refsDir, 0755); err != nil { + t.Fatalf("failed to create refs dir: %v", err) + } + mainBranchFile := filepath.Join(refsDir, "main") + commitHash := "aec0b795c350ff53fe9ab01adf862408aa34c3fd" + if err := os.WriteFile(mainBranchFile, []byte(commitHash+"\n"), 0o644); err != nil { + t.Fatalf("failed to write main branch file: %v", err) + } + + // Change to the repository directory to simulate running from inside the repo + originalDir, err := os.Getwd() + if err != nil { + t.Fatalf("failed to get current directory: %v", err) + } + defer os.Chdir(originalDir) + + if err := os.Chdir(repoRoot); err != nil { + t.Fatalf("failed to change to repo directory: %v", err) + } + + // Create test logger + logger := hclog.NewNullLogger() + + // Create repository metadata (simulating successful git metadata collection) + metadata := &git.RepositoryMetadata{ + RepoRootFolder: repoRoot, + Subfolder: "", // No subfolder when running from root + CommitHash: &commitHash, + RepositoryFullName: stringPtr("test-org/test-repo"), + } + + // Create SARIF result pointing to our test file + ruleID := "test.rule" + uriValue := "main.py" // Relative URI as would be in SARIF when scanning from repo root + startLine := 11 + endLine := 29 + message := "Test finding" + + result := &sarif.Result{ + RuleID: &ruleID, + Message: sarif.Message{ + Text: &message, + }, + Locations: []*sarif.Location{ + { + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: &uriValue, + }, + Region: &sarif.Region{ + StartLine: &startLine, + EndLine: &endLine, + }, + }, + }, + }, + } + result.PropertyBag = *sarif.NewPropertyBag() + result.Add("Level", "error") + + // Create SARIF report + report := &internalsarif.Report{ + Report: &sarif.Report{ + Runs: []*sarif.Run{ + { + Tool: sarif.Tool{ + Driver: &sarif.ToolComponent{ + Name: "Test Scanner", + Rules: []*sarif.ReportingDescriptor{ + {ID: ruleID}, + }, + }, + }, + Results: []*sarif.Result{result}, + }, + }, + }, + } + + // Test with empty source folder (simulating omitted --source-folder flag) + options := RunOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: commitHash, + SourceFolder: "", // Empty source folder - should default to current directory + } + + // Build issues from SARIF + issues := buildNewIssuesFromSARIF(report, options, repoRoot, metadata, logger) + + // Verify results + if len(issues) == 0 { + t.Fatal("expected at least one issue to be created") + } + + issue := issues[0] + + // Verify snippet hash was computed successfully + if issue.Metadata.SnippetHash == "" { + t.Fatal("expected snippet hash to be computed when source-folder defaults to current directory") + } + + // Verify snippet hash is included in issue body + if !strings.Contains(issue.Body, "Snippet SHA256") { + t.Fatal("expected issue body to contain Snippet SHA256 block") + } + + // Verify the snippet hash in the body matches the metadata + expectedHash := issue.Metadata.SnippetHash + if !strings.Contains(issue.Body, expectedHash) { + t.Fatalf("expected issue body to contain snippet hash %q", expectedHash) + } + + // Verify file path is correctly resolved + expectedRepoPath := "main.py" + if issue.Metadata.Filename != expectedRepoPath { + t.Fatalf("expected repo path %q, got %q", expectedRepoPath, issue.Metadata.Filename) + } + + // Verify permalink is generated correctly + expectedPermalink := fmt.Sprintf("https://github.com/%s/%s/blob/%s/%s#L%d-L%d", + options.Namespace, + options.Repository, + commitHash, + expectedRepoPath, + startLine, + endLine, + ) + if !strings.Contains(issue.Body, expectedPermalink) { + t.Fatalf("expected issue body to contain permalink %q", expectedPermalink) + } + + t.Logf("Successfully computed snippet hash: %s", issue.Metadata.SnippetHash) +} + +// Helper functions for creating test data +func stringPtr(s string) *string { + return &s +} + +func intPtr(i int) *int { + return &i +} + +func uintPtr(u uint) *uint { + return &u +} diff --git a/cmd/sarif-issues/validation.go b/cmd/sarif-issues/validation.go new file mode 100644 index 0000000..52c30fa --- /dev/null +++ b/cmd/sarif-issues/validation.go @@ -0,0 +1,20 @@ +package sarifissues + +import ( + "fmt" + "strings" +) + +// validate validates the RunOptions for the sarif-issues command. +func validate(o *RunOptions) error { + if o.Namespace == "" { + return fmt.Errorf("--namespace is required") + } + if o.Repository == "" { + return fmt.Errorf("--repository is required") + } + if strings.TrimSpace(o.SarifPath) == "" { + return fmt.Errorf("--sarif is required") + } + return nil +} diff --git a/cmd/to-html/to-html.go b/cmd/to-html/to-html.go index bb290d5..0a42010 100644 --- a/cmd/to-html/to-html.go +++ b/cmd/to-html/to-html.go @@ -142,7 +142,26 @@ var ToHtmlCmd = &cobra.Command{ if err != nil { logger.Warn("can't collect repository metadata", "reason", err) } else { - logger.Debug("repositoryMetadata", "BranchName", *repositoryMetadata.BranchName, "CommitHash", *repositoryMetadata.CommitHash, "RepositoryFullName", *repositoryMetadata.RepositoryFullName, "Subfolder", repositoryMetadata.Subfolder, "RepoRootFolder", repositoryMetadata.RepoRootFolder) + branch := "" + if repositoryMetadata.BranchName != nil { + branch = *repositoryMetadata.BranchName + } + commit := "" + if repositoryMetadata.CommitHash != nil { + commit = *repositoryMetadata.CommitHash + } + fullName := "" + if repositoryMetadata.RepositoryFullName != nil { + fullName = *repositoryMetadata.RepositoryFullName + } + logger.Debug( + "repositoryMetadata", + "BranchName", branch, + "CommitHash", commit, + "RepositoryFullName", fullName, + "Subfolder", repositoryMetadata.Subfolder, + "RepoRootFolder", repositoryMetadata.RepoRootFolder, + ) } var url *vcsurl.VCSURL diff --git a/docs/engineering/sarif-issues-path-analysis.md b/docs/engineering/sarif-issues-path-analysis.md new file mode 100644 index 0000000..534eb11 --- /dev/null +++ b/docs/engineering/sarif-issues-path-analysis.md @@ -0,0 +1,160 @@ +# SARIF Issues Path Handling Analysis + +## Reproduction Context +- Command sequence: + 1. `scanio analyse --scanner semgrep /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo --format sarif --output outside-project.sarif` + 2. `scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif from-subfolder.sarif --source-folder /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo` +- Expected permalink: `.../blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py#L11-L29` +- Actual permalink (incorrect): `.../blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/main.py#L11-L29` + +## Key Observations +- `data/outside-project.sarif` contains absolute URIs such as `/home/.../scanio-test/apps/demo/main.py`. +- `data/from-subfolder.sarif` contains relative URIs (`main.py`) because Semgrep ran from the subfolder. +- In both cases the SARIF report points to the file under `apps/demo/main.py`, yet the CLI emits `main.py` in issue bodies and permalinks. + +## Code Flow Review +- `cmd/sarif-issues/issue_processing.go` calls `extractFileURIFromResult` to determine the file path recorded in `NewIssueData` (`buildNewIssuesFromSARIF`, line references around `fileURI` usage). +- `extractFileURIFromResult` (`cmd/sarif-issues/utils.go:173-212`) trims the `--source-folder` prefix from absolute URIs and returns the remainder; for relative URIs it simply returns the raw value. + - When `--source-folder` is `/.../scanio-test/apps/demo`, absolute URIs reduce to `main.py`, losing the repository subpath. +- `buildGitHubPermalink` (`utils.go:125-170`) expects `fileURI` to be repository-relative when constructing `https://github.com/{namespace}/{repo}/blob/{ref}/{fileURI}#L...`. +- `computeSnippetHash` (`utils.go:104-121`) relies on joining `sourceFolder` with the same `fileURI` to re-read the local file. If we change `fileURI` to be repo-relative (`apps/demo/main.py`), the current join logic will point at `/.../apps/demo/apps/demo/main.py` and fail. +- `internal/sarif.Report.EnrichResultsLocationProperty` and `EnrichResultsLocationURIProperty` perform similar prefix stripping using `sourceFolder`, so the HTML report path logic (`cmd/to-html.go`) inherits the same limitation. +- `internal/git.CollectRepositoryMetadata` already derives `RepoRootFolder` and the `Subfolder` path segment when `--source-folder` is nested within the repo. + +## Root Cause +The CLI assumes `--source-folder` equals the repository root. When the user points it to a subdirectory, the helper trims that prefix and drops intermediate path segments. Consequently: +- Issue metadata (`File` field) loses the directory context. +- GitHub permalinks omit the subfolder and land on the wrong file. +- Correlation metadata (`Metadata.Filename`) no longer matches the path stored in GitHub issues, risking mismatches if/when we fix the permalink logic without updating correlation. + +## Fix Considerations +1. **Determine repository root & subfolder once.** `internal/git.CollectRepositoryMetadata` gives us both `RepoRootFolder` and `Subfolder` for any path inside the repo. Reusing this keeps CLI logic consistent with the HTML report command. +2. **Produce dual path representations.** + - Repo-relative path (e.g. `apps/demo/main.py`) for GitHub URLs and issue bodies. + - Source-folder-relative path (e.g. `main.py`) or absolute path for reading files/snippet hashing. +3. **Avoid regressions in existing flows.** After changing `fileURI`, ensure: + - `computeSnippetHash` receives the correct on-disk path. + - Issue correlation (`Metadata.Filename`) uses the same representation that is stored in GitHub issue bodies to preserve matching. +4. **Consider harmonising SARIF helpers.** Updating `internal/sarif` enrichment to use repo metadata would fix both CLI commands (`sarif-issues`, `to-html`) and reduce duplicated path trimming logic. + +## Proposed Fix Plan +1. Enhance the `sarif-issues` command to collect repository metadata: + - Call `git.CollectRepositoryMetadata(opts.SourceFolder)` early (guard for errors). + - Derive helper closures that can translate between repo-relative and local paths. +2. Update `extractFileURIFromResult` (or an adjacent helper) to: + - Resolve the SARIF URI to an absolute path (using `uriBaseId` and `sourceFolder` when necessary). + - Emit the repo-relative path (using metadata.RepoRootFolder) for issue content and permalinks. + - Return both repo-relative and local paths, or store them in a small struct to avoid repeated conversions. +3. Adjust `computeSnippetHash` and correlation metadata to consume the correct local path while storing repo-relative filenames in issue metadata. +4. Reuse the new path helper in `buildGitHubPermalink` so the permalink path stays in sync. +5. Add regression tests: + - Extend `cmd/sarif-issues/utils_test.go` (or introduce new tests) covering absolute and relative SARIF URIs when `sourceFolder` points to a subdirectory. + - Include permalink assertions using `data/from-subfolder.sarif` / `data/outside-project.sarif`. +6. Evaluate whether `internal/sarif`’s enrichment should adopt the same metadata-aware logic; if so, share the helper to keep `to-html` and future commands consistent. + +# Manual testing +## Scans from root, subfolder, outside, with abs and relative paths +### Semgrep scan of subfolder (monorepo like use case) +```sh +# 1. Outside folder absolute paths +cd /home/jekos/ghq/github.com/scan-io-git/scan-io +scanio analyse --scanner semgrep /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/outside-project-abs.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif data/outside-project-abs.sarif --source-folder /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo +# validate here: 2 issues with correct permalinks +# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py + +# 2. Outside folder relative paths +cd /home/jekos/ghq/github.com/scan-io-git/scan-io +scanio analyse --scanner semgrep ../scanio-test/apps/demo --format sarif --output data/outside-project-rel.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif data/outside-project-rel.sarif --source-folder ../scanio-test/apps/demo +# validate here: 2 issues with correct permalinks +# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py + +# 3. From root absolute path +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test +scanio analyse --scanner semgrep /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-root-asb.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-root-asb.sarif --source-folder /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo +# validate here: 2 issues with correct permalinks +# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py + +# 4. From root relative paths +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test +scanio analyse --scanner semgrep apps/demo --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-root-rel.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-root-rel.sarif --source-folder apps/demo +# validate here: 2 issues with correct permalinks +# correct https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py +# correct even when .git folder is not there + +# 5. From subfolder absolute paths +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo +scanio analyse --scanner semgrep /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-subfolder-abs.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-subfolder-abs.sarif --source-folder /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo +# validate here: 2 issues with correct permalinks +# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py + +# 6. From subfolder relative paths +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo +scanio analyse --scanner semgrep . --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-subfolder-rel.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-subfolder-rel.sarif --source-folder . +# validate here: 2 issues with correct permalinks +# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py +``` +### snyk +```sh +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test + +# 1. scan root +snyk code test --sarif-file-output=/home/jekos/ghq/github.com/scan-io-git/scan-io/data/snyk-root.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/snyk-root.sarif --source-folder . + +# 2. scan subfolder from root +snyk code test --sarif-file-output=/home/jekos/ghq/github.com/scan-io-git/scan-io/data/snyk-subfolder-from-root.sarif apps/demo +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/snyk-subfolder-from-root.sarif --source-folder apps/demo +``` +### codeql +```sh +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test + +# 1. scan root +/tmp/codeql/codeql database create /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-database-root --language=python --source-root=. +/tmp/codeql/codeql database analyze /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-database-root --format=sarif-latest --output=/home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-root.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-root.sarif --source-folder . + +# 1. scan subfolder +/tmp/codeql/codeql database create /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-database-subfolder --language=python --source-root=apps/demo +/tmp/codeql/codeql database analyze /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-database-subfolder --format=sarif-latest --output=/home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-subfolder.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-subfolder.sarif --source-folder apps/demo +``` +## How to handle 2 subfolders with 2 separate scans +```sh +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test + +# scan projects +scanio analyse --scanner semgrep apps/demo --format sarif --output semgrep-demo.sarif +snyk code test --sarif-file-output=snyk-another.sarif apps/another + +# create issues +scanio sarif-issues --sarif semgrep-demo.sarif --source-folder apps/demo +scanio sarif-issues --sarif snyk-another.sarif --source-folder apps/another +``` + +**Solution Implemented**: The `sarif-issues` command now filters open issues by source folder scope before correlation. Issues are scoped based on their file path metadata matching the normalized subfolder path. This enables independent issue management for different subfolders in monorepo CI workflows. + +**Key Changes**: +- Added `filterIssuesBySourceFolder()` function that filters open issues to only those within the current `--source-folder` scope +- Issues are filtered before correlation, ensuring each subfolder's issues are managed independently +- When `--source-folder` points to a subfolder, only issues whose file paths start with that subfolder are considered +- When scanning from root (no subfolder), all issues are included as before + +**Expected Behavior**: Both sets of issues remain open and are managed independently. Issues from `apps/demo` won't be closed when running the second command for `apps/another`. + +## empty source-folder test +```sh +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test + +snyk code test --sarif-file-output=snyk.sarif + +# create issues +scanio sarif-issues --sarif snyk.sarif --source-folder . +scanio sarif-issues --sarif snyk.sarif +``` \ No newline at end of file diff --git a/docs/reference/README.md b/docs/reference/README.md index 52dc68d..773e6ac 100644 --- a/docs/reference/README.md +++ b/docs/reference/README.md @@ -11,6 +11,7 @@ This section provides detailed technical documentation for Scanio’s commands, - [List Command](cmd-list.md): Describes repository discovery functionality across supported VCS platforms, available filtering options, and command output structure. - [Fetch Command](cmd-fetch.md): Explains repository fetching logic, supported authentication types, URL formats, and command output structure. - [Analyse Command](cmd-analyse.md): Provides details on running security scanners, handling input data, configuring output formats, and command output structure. +- [SARIF Issues Command](cmd-sarif-issues.md): Explains how to create GitHub issues from SARIF findings with configurable severity levels, with automated lifecycle management. - [To-HTML Command](cmd-to-html.md): Explains conversion of SARIF reports to human-friendly HTML format, code snippet inclusion, and template customization options. - [Report Patch Command](cmd-report-patch.md): Details how to make structured modifications to SARIF reports, including different filtering capabilities and actions. diff --git a/docs/reference/cmd-sarif-issues.md b/docs/reference/cmd-sarif-issues.md new file mode 100644 index 0000000..db17a67 --- /dev/null +++ b/docs/reference/cmd-sarif-issues.md @@ -0,0 +1,400 @@ +# SARIF Issues Command +The `sarif-issues` command creates GitHub issues from SARIF findings with configurable severity levels. It implements intelligent issue correlation to avoid duplicates and automatically closes issues that are no longer present in recent scans. + +This command is designed and recommended for CI/CD integration and automated security issue management, enabling teams to track and manage security findings directly in their GitHub repositories. + +## Table of Contents + +- [Key Features](#key-features) +- [Syntax](#syntax) +- [Options](#options) +- [Severity Level Configuration](#severity-level-configuration) +- [Core Validation](#core-validation) +- [Dry Run Mode](#dry-run-mode) +- [GitHub Authentication Setup](#github-authentication-setup) +- [Usage Examples](#usage-examples) +- [Command Output Format](#command-output-format) +- [Issue Correlation Logic](#issue-correlation-logic) +- [Issue Format](#issue-format) + +## Key Features + +| Feature | Description | +|-------------------------------------------|----------------------------------------------------------| +| Create issues from configurable severity levels | Automatically creates GitHub issues for SARIF findings with specified severity levels (default: "error") | +| Correlate with existing issues | Matches new findings against open issues to prevent duplicates | +| Auto-close resolved issues | Closes open issues that are no longer present in current scan results | +| Add metadata and permalinks | Enriches issues with file links, severity, scanner info, and snippet hashes | + +## Syntax +```bash +scanio sarif-issues --sarif PATH [--namespace NAMESPACE] [--repository REPO] [--source-folder PATH] [--ref REF] [--labels label[,label...]] [--assignees user[,user...]] [--levels level[,level...]] [--dry-run] +``` + +## Options + +| Option | Type | Required | Default Value | Description | +|---------------------|----------|-------------|----------------------------------|-----------------------------------------------------------------------------| +| `--sarif` | string | Yes | `none` | Path to SARIF report file containing security findings. | +| `--namespace` | string | Conditional | `$GITHUB_REPOSITORY_OWNER` | GitHub organization or user name. Required if environment variable not set. | +| `--repository` | string | Conditional | `${GITHUB_REPOSITORY#*/}` | Repository name. Required if environment variable not set. | +| `--source-folder` | string | No | `.` | Path to source code folder for improved file path resolution and snippets. | +| `--ref` | string | No | `$GITHUB_SHA` | Git ref (branch or commit SHA) for building permalinks to vulnerable code. | +| `--labels` | strings | No | `none` | Labels to assign to created GitHub issues (comma-separated or repeat flag). | +| `--assignees` | strings | No | `none` | GitHub usernames to assign to created issues (comma-separated or repeat flag). | +| `--levels` | strings | No | `["error"]` | SARIF severity levels to process. Accepts SARIF levels (error, warning, note, none) or display levels (High, Medium, Low, Info). Cannot mix formats. Case-insensitive. | +| `--help`, `-h` | flag | No | `false` | Displays help for the `sarif-issues` command. | + +**Environment Variable Fallbacks**
+The command automatically uses GitHub Actions environment variables when flags are not provided: +- `GITHUB_REPOSITORY_OWNER` → `--namespace` +- `GITHUB_REPOSITORY` → `--repository` (extracts repo name after `/`) +- `GITHUB_SHA` → `--ref` + +This enables seamless integration with GitHub Actions workflows without explicit configuration. + +## Severity Level Configuration + +The `--levels` flag allows you to specify which SARIF severity levels should trigger issue creation. This provides flexibility in managing different types of security findings based on your team's priorities. + +### Supported Level Formats + +**SARIF Levels** (native SARIF format): +- `error` - High severity findings (default) +- `warning` - Medium severity findings +- `note` - Low severity findings +- `none` - Informational findings + +**Display Levels** (human-readable format): +- `High` - Maps to SARIF `error` +- `Medium` - Maps to SARIF `warning` +- `Low` - Maps to SARIF `note` +- `Info` - Maps to SARIF `none` + +### Usage Rules + +- **Case-insensitive**: All level comparisons are case-insensitive +- **Format consistency**: Cannot mix SARIF and display levels in the same command +- **Multiple values**: Use comma-separated values or repeat the flag +- **Default behavior**: When `--levels` is not specified, only `error` level findings create issues + +### Examples + +```bash +# Default behavior (error level only) +scanio sarif-issues --sarif report.sarif + +# Multiple SARIF levels +scanio sarif-issues --sarif report.sarif --levels error,warning + +# Multiple display levels +scanio sarif-issues --sarif report.sarif --levels High,Medium + +# All severity levels using SARIF format +scanio sarif-issues --sarif report.sarif --levels error,warning,note,none + +# Invalid mixing (will error) +scanio sarif-issues --sarif report.sarif --levels error,High +``` + +## Core Validation +The `sarif-issues` command includes several validation layers to ensure robust execution: +- **Required Parameters**: Validates that `--sarif`, `--namespace`, and `--repository` are provided via flags, environment variables, or auto-detected git metadata. +- **SARIF File Validation**: Ensures the SARIF file exists and can be parsed successfully. +- **GitHub Authentication**: Requires valid GitHub credentials configured through the GitHub plugin. +- **Severity Level Validation**: Validates and normalizes severity levels, preventing mixing of SARIF and display level formats. +- **Configurable Severity Filtering**: Processes SARIF results based on specified severity levels (default: "error" only). + +## Dry Run Mode + +The `--dry-run` flag allows you to preview what the command would do without making actual GitHub API calls. This is particularly useful for: + +- **Testing and Validation**: Verify the command behavior before running in production +- **Understanding Impact**: See exactly what issues would be created or closed +- **Debugging**: Troubleshoot issue correlation logic and SARIF processing +- **CI/CD Integration**: Validate SARIF files and command configuration + +### Dry Run Output Format + +When using `--dry-run`, the command provides detailed preview information: + +**For issues to be created:** +``` +[DRY RUN] Would create issue: + Title: [Semgrep][High][sql-injection] at app.py:11-29 + File: apps/demo/main.py + Lines: 11-29 + Severity: High + Scanner: Semgrep + Rule ID: sql-injection +``` + +**For issues to be closed:** +``` +[DRY RUN] Would close issue #42: + File: apps/demo/old-file.py + Lines: 5-10 + Rule ID: deprecated-rule + Reason: Not found in current scan +``` + +**Final summary:** +``` +[DRY RUN] Would create 3 issue(s); would close 1 resolved issue(s) +``` + +### Usage Example +```bash +scanio sarif-issues --sarif results/semgrep.sarif --dry-run +``` + +## GitHub Authentication Setup + +The `sarif-issues` command requires GitHub authentication to create and manage issues. Configure authentication using one of the following methods: + +### Environment Variables (Recommended for CI/CD) +```bash +export SCANIO_GITHUB_TOKEN="your-github-token" +export SCANIO_GITHUB_USERNAME="your-github-username" # Optional for HTTP auth +``` + +### Configuration File +Add to your `config.yml`: +```yaml +github_plugin: + token: "your-github-token" + username: "your-github-username" # Optional for HTTP auth +``` + +### Required Token Permissions +The GitHub token must have the following scopes: +- **`repo`** - Required for creating, updating, and listing issues +- **`read:org`** - Required for organizational repositories (optional for personal repos) + +For detailed GitHub plugin configuration, refer to [GitHub Plugin Documentation](plugin-github.md#configuration-references). + +## Usage Examples + +> **Recommendation:** Run the command from your repository root and pass `--source-folder` as repo-relative paths (for example `--source-folder apps/demo`). The flag defaults to `.` when omitted; if git metadata cannot be detected, permalinks and snippet hashing may be incomplete. + +### Basic Usage in GitHub Actions +Create issues from SARIF report using environment variables: +```bash +scanio sarif-issues --sarif results/semgrep.sarif +``` + +### Manual Usage with Explicit Parameters +Create issues with custom namespace and repository: +```bash +scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif +``` + +### Enhanced Issue Creation +Create issues with source code snippets, labels, and assignees: +```bash +scanio sarif-issues --sarif results/semgrep.sarif --source-folder . --labels bug,security --assignees alice,bob +``` + +### Configurable Severity Levels +Create issues for multiple severity levels using SARIF levels: +```bash +scanio sarif-issues --sarif results/semgrep.sarif --levels error,warning +``` + +Create issues for multiple severity levels using display levels: +```bash +scanio sarif-issues --sarif results/semgrep.sarif --levels High,Medium +``` + +### With Custom Git Reference +Create issues with specific commit reference for permalinks: +```bash +scanio sarif-issues --sarif results/semgrep.sarif --source-folder . --ref feature-branch +``` + +### Dry Run Mode +Preview what issues would be created/closed without making actual GitHub API calls: +```bash +scanio sarif-issues --sarif results/semgrep.sarif --dry-run +``` + +This is useful for: +- Testing and validation before running in production +- Understanding what the command would do without making changes +- Debugging issue correlation logic +- Verifying SARIF file processing + +## Command Output Format + +### Normal Mode Output +``` +Created 3 issue(s); closed 1 resolved issue(s) +``` + +### Dry Run Mode Output +When using `--dry-run`, the command shows detailed preview information: + +**For issues to be created:** +``` +[DRY RUN] Would create issue: + Title: [Semgrep][High][sql-injection] at app.py:11-29 + File: apps/demo/main.py + Lines: 11-29 + Severity: High + Scanner: Semgrep + Rule ID: sql-injection +``` + +**For issues to be closed:** +``` +[DRY RUN] Would close issue #42: + File: apps/demo/old-file.py + Lines: 5-10 + Rule ID: deprecated-rule + Reason: Not found in current scan +``` + +**Final summary:** +``` +[DRY RUN] Would create 3 issue(s); would close 1 resolved issue(s) +``` + +### Logging Information +The command provides some logging information including: +- Number of open issues fetched from the repository +- Issue correlation results (matched/unmatched) +- Created and closed issue counts +- Error details for failed operations + +## Issue Correlation Logic + +The command implements intelligent issue correlation to manage the lifecycle of security findings: + +### New Issue Creation +- **Configurable Severity Levels**: Creates issues for SARIF findings with specified severity levels (default: "error" only) +- **Duplicate Prevention**: Uses hierarchical correlation to match new findings against existing open issues +- **Unmatched Findings**: Creates GitHub issues only for findings that don't match existing open issues through any correlation stage + +### Automatic Issue Closure +- **Resolved Findings**: Automatically closes open issues that don't correlate with current scan results +- **Comment Before Closure**: Adds comment `Recent scan didn't see the issue; closing this as resolved.`. +- **Managed Issues Only**: Only closes issues containing the scanio-managed annotation to avoid affecting manually created issues + +### Correlation Criteria +The correlation logic uses a **4-stage hierarchical matching system** that processes stages in order, with earlier stages being more precise. Once an issue is matched in any stage, it's excluded from subsequent stages. + +**Required for all stages**: Scanner name and Rule ID must match exactly. + +**Stage 1 (Most Precise)**: Scanner + RuleID + Filename + StartLine + EndLine + SnippetHash +- All fields must match exactly +- Used when both issues have snippet hashes available + +**Stage 2**: Scanner + RuleID + Filename + SnippetHash +- Matches based on code content fingerprint +- Used when snippet hashes are available but line numbers differ + +**Stage 3**: Scanner + RuleID + Filename + StartLine + EndLine +- Matches based on exact line range +- Used when snippet hashes are not available + +**Stage 4 (Least Precise)**: Scanner + RuleID + Filename + StartLine +- Matches based on file and starting line only +- Fallback when end line information is missing + +### Issue Filtering for Correlation +Only specific types of open issues are considered for correlation: +- **Well-structured issues**: Must have Scanner, RuleID, and FilePath metadata +- **Scanio-managed issues**: Must contain the scanio-managed annotation +- **Malformed issues are skipped**: Issues without proper metadata are ignored to prevent accidental closure of manually created issues + +### Subfolder Scoping +The command supports independent issue management for different subfolders in monorepo workflows: + +- **Scoped Correlation**: When `--source-folder` points to a subfolder, only open issues whose file paths fall within that subfolder are considered for correlation +- **Independent Management**: Issues from different subfolders are managed independently, preventing cross-subfolder interference +- **Root Scope**: When scanning from repository root (no `--source-folder` or `--source-folder` points to root), all issues are considered + +**Example Monorepo Workflow**: +```bash +# Frontend CI job - manages issues in apps/frontend only +scanio sarif-issues --sarif frontend-results.sarif --source-folder apps/frontend + +# Backend CI job - manages issues in apps/backend only +scanio sarif-issues --sarif backend-results.sarif --source-folder apps/backend +``` + +This enables separate CI jobs for different parts of a monorepo without issues from one subfolder affecting the other. + +## Issue Format + +### Issue Title Format +``` +[][][] at :[-] +``` +**Example**: `[Semgrep OSS][High][Express Missing CSRF Protection] at app.js:42-45` +When a rule provides a human-friendly `name`, Scanio uses it; otherwise the rule ID is shown. + +### Issue Body Structure + +**Header** +```markdown +## 🐞 +``` +Scanio prefers the SARIF rule's short description for the heading; if that is missing it falls back to the rule name, then to the raw rule ID. + +**Compact Metadata (Blockquote)** +```markdown +> **Rule ID**: +> **Severity**: High, **Scanner**: Semgrep OSS +> **File**: app.js, **Lines**: 42-45 +``` + +**Rule Description** +- Includes help text from SARIF rule definitions +- Parses and formats reference links +- Falls back to the rule's full description when markdown help is not available + +**GitHub Permalink** +- Direct link to vulnerable code in repository +- Uses the `--ref` value when supplied (branch or SHA), falling back to the current commit hash from git metadata for stable links +- Includes line number anchors: `#L42-L45` + +**Security References** +- Automatically generates links for CWE identifiers: `[CWE-79](https://cwe.mitre.org/data/definitions/79.html)` +- Creates OWASP Top 10 links when applicable +- Extracts from SARIF rule tags and properties + +**Snippet Hash** +```markdown +> **Snippet SHA256**: abc123... +``` + +**Management Annotation** +```markdown +> [!NOTE] +> This issue was created and will be managed by scanio automation. Don't change body manually for proper processing, unless you know what you do +``` + +### Example Complete Issue Body +```markdown +## 🐞 javascript.express.security.audit.express-check-csurf-middleware-usage.express-check-csurf-middleware-usage + +> **Rule ID**: javascript.express.security.audit.express-check-csurf-middleware-usage.express-check-csurf-middleware-usage +> **Severity**: High, **Scanner**: Semgrep OSS +> **File**: app.js, **Lines**: 42-45 + +This Express.js application appears to be missing CSRF protection middleware. CSRF attacks can force authenticated users to perform unintended actions. + +https://github.com/scan-io-git/scan-io/blob/abc123def456/app.js#L42-L45 + +References: +- [CWE-352](https://cwe.mitre.org/data/definitions/352.html) +- [OWASP A01:2021 - Broken Access Control](https://owasp.org/Top10/A01_2021-Broken_Access_Control/) + +> **Snippet SHA256**: abc123def456789... + +> [!NOTE] +> This issue was created and will be managed by scanio automation. Don't change body manually for proper processing, unless you know what you do +``` + +This format provides comprehensive information while maintaining machine readability for correlation and automated management. diff --git a/internal/git/metadata.go b/internal/git/metadata.go index 82fbbc7..072d66d 100644 --- a/internal/git/metadata.go +++ b/internal/git/metadata.go @@ -2,6 +2,7 @@ package git import ( "fmt" + "path/filepath" "strings" "github.com/go-git/go-git/v5" @@ -19,44 +20,50 @@ type RepositoryMetadata struct { // CollectRepositoryMetadata function collects repository metadata // that includes branch name, commit hash, repository full name, subfolder and repository root folder func CollectRepositoryMetadata(sourceFolder string) (*RepositoryMetadata, error) { - defaultRepositoryMetadata := &RepositoryMetadata{ - RepoRootFolder: sourceFolder, - Subfolder: "", + if sourceFolder == "" { + return &RepositoryMetadata{}, fmt.Errorf("source folder is not set") } - if sourceFolder == "" { - return defaultRepositoryMetadata, fmt.Errorf("source folder is not set") + if absSource, err := filepath.Abs(sourceFolder); err == nil { + sourceFolder = absSource + } + + md := &RepositoryMetadata{ + RepoRootFolder: filepath.Clean(sourceFolder), } repoRootFolder, err := findGitRepositoryPath(sourceFolder) if err != nil { - return defaultRepositoryMetadata, err + return md, err } + md.RepoRootFolder = filepath.Clean(repoRootFolder) + repo, err := git.PlainOpen(repoRootFolder) if err != nil { - return defaultRepositoryMetadata, fmt.Errorf("failed to open repository: %w", err) + return md, fmt.Errorf("failed to open repository: %w", err) } - head, err := repo.Head() - if err != nil { - return defaultRepositoryMetadata, fmt.Errorf("failed to get HEAD: %w", err) + if rel, err := filepath.Rel(repoRootFolder, sourceFolder); err == nil && rel != "." { + md.Subfolder = filepath.ToSlash(rel) } - branchName := head.Name().Short() - commitHash := head.Hash().String() - remote, err := repo.Remote("origin") - if err != nil { - return defaultRepositoryMetadata, fmt.Errorf("failed to get remote: %w", err) + if head, err := repo.Head(); err == nil { + if head.Name().IsBranch() { + branchName := head.Name().Short() + md.BranchName = &branchName + } + + hash := head.Hash().String() + md.CommitHash = &hash } - repositoryFullName := strings.TrimSuffix(remote.Config().URLs[0], ".git") + if remote, err := repo.Remote("origin"); err == nil { + if cfg := remote.Config(); cfg != nil && len(cfg.URLs) > 0 { + repositoryFullName := strings.TrimSuffix(cfg.URLs[0], ".git") + md.RepositoryFullName = &repositoryFullName + } + } - return &RepositoryMetadata{ - BranchName: &branchName, - CommitHash: &commitHash, - RepositoryFullName: &repositoryFullName, - Subfolder: strings.TrimPrefix(sourceFolder, repoRootFolder), - RepoRootFolder: repoRootFolder, - }, nil + return md, nil } diff --git a/internal/sarif/message_formatter.go b/internal/sarif/message_formatter.go new file mode 100644 index 0000000..049dab3 --- /dev/null +++ b/internal/sarif/message_formatter.go @@ -0,0 +1,274 @@ +package sarif + +import ( + "fmt" + "regexp" + "strconv" + "strings" + + "github.com/owenrumney/go-sarif/v2/sarif" + "github.com/scan-io-git/scan-io/internal/git" +) + +// MessageFormatOptions contains the configuration needed to format SARIF messages with GitHub links +type MessageFormatOptions struct { + Namespace string + Repository string + Ref string + SourceFolder string +} + +// FormatResultMessage is the main entry point for formatting SARIF result messages +// It processes the message template, substitutes arguments, and converts location references to GitHub hyperlinks +func FormatResultMessage(result *sarif.Result, repoMetadata *git.RepositoryMetadata, options MessageFormatOptions) string { + // Extract locations for reference lookup + locations := extractLocationsForFormatting(result) + if len(locations) == 0 { + // No locations available, return plain text message + if result.Message.Text != nil { + return *result.Message.Text + } + return "" + } + + // Check if this is a CodeQL-style message (direct markdown in text field) + if result.Message.Text != nil && result.Message.Markdown == nil && len(result.Message.Arguments) == 0 { + return formatCodeQLStyleMessage(*result.Message.Text, result, repoMetadata, options) + } + + // Format the message with arguments and location links (Snyk style) + formatted := formatMessageWithArguments(&result.Message, locations, repoMetadata, options) + if formatted != "" { + return formatted + } + + // Fallback to plain text + if result.Message.Text != nil { + return *result.Message.Text + } + + return "" +} + +// formatCodeQLStyleMessage handles CodeQL-style messages where the text contains direct markdown links +// Example: "This template construction depends on a [user-provided value](1)." +func formatCodeQLStyleMessage(text string, result *sarif.Result, repoMetadata *git.RepositoryMetadata, options MessageFormatOptions) string { + // Pattern to match [text](id) where id is a number + pattern := regexp.MustCompile(`\[([^\]]+)\]\((\d+)\)`) + + return pattern.ReplaceAllStringFunc(text, func(match string) string { + matches := pattern.FindStringSubmatch(match) + if len(matches) != 3 { + return match // Return original if pattern doesn't match + } + + linkText := matches[1] + idStr := matches[2] + + // Convert id to integer (CodeQL uses 1-based indexing) + id, err := strconv.Atoi(idStr) + if err != nil { + return match // Return original if id is not a number + } + + // Find the relatedLocation with matching id + var targetLocation *sarif.Location + for _, relLoc := range result.RelatedLocations { + if relLoc.Id != nil && *relLoc.Id == uint(id) { + // Create a Location from RelatedLocation + targetLocation = &sarif.Location{ + PhysicalLocation: relLoc.PhysicalLocation, + } + break + } + } + + if targetLocation != nil { + link := buildLocationLink(targetLocation, repoMetadata, options) + if link != "" { + return fmt.Sprintf("[%s](%s)", linkText, link) + } + } + + // If we can't build a link, return the original text without the reference + return linkText + }) +} + +// extractLocationsForFormatting extracts locations from SARIF result in priority order: +// 1) relatedLocations, 2) codeFlows[0].threadFlows[0].locations, 3) empty array +func extractLocationsForFormatting(result *sarif.Result) []*sarif.Location { + var locations []*sarif.Location + + // Priority 1: relatedLocations + if len(result.RelatedLocations) > 0 { + for _, relLoc := range result.RelatedLocations { + if relLoc != nil { + locations = append(locations, relLoc) + } + } + return locations + } + + // Priority 2: codeFlows[0].threadFlows[0].locations + if len(result.CodeFlows) > 0 && len(result.CodeFlows[0].ThreadFlows) > 0 { + threadFlow := result.CodeFlows[0].ThreadFlows[0] + for _, threadLoc := range threadFlow.Locations { + if threadLoc.Location != nil { + locations = append(locations, threadLoc.Location) + } + } + return locations + } + + // Priority 3: empty array (fallback) + return locations +} + +// formatMessageWithArguments processes the message template, substitutes placeholders, and converts location references +func formatMessageWithArguments(message *sarif.Message, locations []*sarif.Location, repoMetadata *git.RepositoryMetadata, options MessageFormatOptions) string { + // Use markdown template if available, otherwise fall back to text + template := "" + if message.Markdown != nil { + template = *message.Markdown + } else if message.Text != nil { + template = *message.Text + } else { + return "" + } + + // If no arguments, return template as-is + if len(message.Arguments) == 0 { + return template + } + + // Process each argument and substitute placeholders + result := template + for i, arg := range message.Arguments { + placeholder := fmt.Sprintf("{%d}", i) + + // Parse the argument to extract text and location references + text, refs := parseLocationReference(arg) + + // Convert location references to hyperlinks + formattedArg := formatLocationReferences(text, refs, locations, repoMetadata, options) + + // Substitute the placeholder + result = strings.ReplaceAll(result, placeholder, formattedArg) + } + + return result +} + +// parseLocationReference parses SARIF message arguments to extract text and location reference numbers +// Examples: +// +// "[user-provided value](1)" -> text="user-provided value", refs=[1] +// "[flows](1),(2),(3),(4),(5),(6)" -> text="flows", refs=[1,2,3,4,5,6] +func parseLocationReference(arg string) (text string, refs []int) { + // Pattern to match [text](ref1),(ref2),... + // This handles both single references and multiple references + pattern := regexp.MustCompile(`^\[([^\]]+)\]\((.+)\)$`) + matches := pattern.FindStringSubmatch(arg) + + if len(matches) != 3 { + // Malformed argument, return as-is + return arg, nil + } + + text = matches[1] + refsStr := matches[2] + + // Parse reference numbers (handle both single and multiple) + // The format is like "1),(2),(3),(4),(5),(6" - we need to extract numbers + refParts := strings.Split(refsStr, "),(") + for _, part := range refParts { + part = strings.TrimSpace(part) + // Remove any remaining parentheses + part = strings.Trim(part, "()") + if refNum, err := strconv.Atoi(part); err == nil { + refs = append(refs, refNum) + } + } + + return text, refs +} + +// formatLocationReferences converts location reference numbers to GitHub hyperlinks +func formatLocationReferences(text string, refs []int, locations []*sarif.Location, repoMetadata *git.RepositoryMetadata, options MessageFormatOptions) string { + if len(refs) == 0 { + return text + } + + // Build links for each reference + var links []string + for _, ref := range refs { + if ref >= 0 && ref < len(locations) { + link := buildLocationLink(locations[ref], repoMetadata, options) + if link != "" { + links = append(links, fmt.Sprintf("[%d](%s)", ref, link)) + } else { + links = append(links, fmt.Sprintf("%d", ref)) + } + } else { + // Invalid reference, use as-is + links = append(links, fmt.Sprintf("%d", ref)) + } + } + + // Format based on number of references + if len(refs) == 1 { + // Single reference: "[text](link)" + if refs[0] >= 0 && refs[0] < len(locations) { + link := buildLocationLink(locations[refs[0]], repoMetadata, options) + if link != "" { + return fmt.Sprintf("[%s](%s)", text, link) + } else { + return fmt.Sprintf("%s (%d)", text, refs[0]) + } + } else { + return fmt.Sprintf("%s (%d)", text, refs[0]) + } + } else { + // Multiple references: "text ([1](link1) > [2](link2) > ...)" + linkChain := strings.Join(links, " > ") + return fmt.Sprintf("%s (%s)", text, linkChain) + } +} + +// buildLocationLink constructs a GitHub permalink for a SARIF location +func buildLocationLink(location *sarif.Location, repoMetadata *git.RepositoryMetadata, options MessageFormatOptions) string { + if location.PhysicalLocation == nil || location.PhysicalLocation.ArtifactLocation == nil { + return "" + } + + artifact := location.PhysicalLocation.ArtifactLocation + if artifact.URI == nil { + return "" + } + + // Get file path and convert to repository-relative path + filePath := *artifact.URI + repoPath := ConvertToRepoRelativePath(filePath, repoMetadata, options.SourceFolder) + + // Get line information + region := location.PhysicalLocation.Region + if region == nil { + return "" + } + + startLine := 1 + endLine := 1 + + if region.StartLine != nil { + startLine = *region.StartLine + } + if region.EndLine != nil { + endLine = *region.EndLine + } else { + endLine = startLine + } + + // Build GitHub permalink using shared helper + return BuildGitHubPermalink(options.Namespace, options.Repository, options.Ref, repoPath, startLine, endLine) +} diff --git a/internal/sarif/message_formatter_test.go b/internal/sarif/message_formatter_test.go new file mode 100644 index 0000000..01bbdae --- /dev/null +++ b/internal/sarif/message_formatter_test.go @@ -0,0 +1,478 @@ +package sarif + +import ( + "os" + "path/filepath" + "testing" + + "github.com/owenrumney/go-sarif/v2/sarif" + "github.com/scan-io-git/scan-io/internal/git" +) + +func TestFormatMessageWithSingleReference(t *testing.T) { + // Test CodeQL style single reference: "[user-provided value](1)" + message := &sarif.Message{ + Markdown: stringPtr("This template construction depends on a {0}."), + Arguments: []string{ + "[user-provided value](0)", + }, + } + + locations := []*sarif.Location{ + createTestLocation("main.py", 1, 50, 1, 57), + } + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/source", + CommitHash: stringPtr("abc123"), + } + + options := MessageFormatOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "main", + SourceFolder: "/test/source", + } + + result := formatMessageWithArguments(message, locations, repoMetadata, options) + expected := "This template construction depends on a [user-provided value](https://github.com/test-org/test-repo/blob/main/main.py#L1)." + + if result != expected { + t.Errorf("Expected: %s\nGot: %s", expected, result) + } +} + +func TestFormatMessageWithMultipleReferences(t *testing.T) { + // Test Snyk style multiple references: "[flows](1),(2),(3),(4),(5),(6)" + message := &sarif.Message{ + Markdown: stringPtr("Unsanitized input from {0} {1} into {2}, where it is used to render an HTML page returned to the user. This may result in a Cross-Site Scripting attack (XSS)."), + Arguments: []string{ + "[an HTTP parameter](0)", + "[flows](1),(2),(3),(4),(5),(6)", + "[flask.render_template_string](7)", + }, + } + + locations := []*sarif.Location{ + createTestLocation("main.py", 1, 50, 1, 57), // 0 + createTestLocation("main.py", 8, 18, 8, 25), // 1 + createTestLocation("main.py", 8, 18, 8, 30), // 2 + createTestLocation("main.py", 8, 18, 8, 46), // 3 + createTestLocation("main.py", 8, 5, 8, 15), // 4 + createTestLocation("main.py", 11, 5, 11, 13), // 5 + createTestLocation("main.py", 29, 35, 29, 43), // 6 + createTestLocation("main.py", 29, 12, 29, 44), // 7 + } + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/source", + CommitHash: stringPtr("abc123"), + } + + options := MessageFormatOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "main", + SourceFolder: "/test/source", + } + + result := formatMessageWithArguments(message, locations, repoMetadata, options) + expected := "Unsanitized input from [an HTTP parameter](https://github.com/test-org/test-repo/blob/main/main.py#L1) flows ([1](https://github.com/test-org/test-repo/blob/main/main.py#L8) > [2](https://github.com/test-org/test-repo/blob/main/main.py#L8) > [3](https://github.com/test-org/test-repo/blob/main/main.py#L8) > [4](https://github.com/test-org/test-repo/blob/main/main.py#L8) > [5](https://github.com/test-org/test-repo/blob/main/main.py#L11) > [6](https://github.com/test-org/test-repo/blob/main/main.py#L29)) into [flask.render_template_string](https://github.com/test-org/test-repo/blob/main/main.py#L29), where it is used to render an HTML page returned to the user. This may result in a Cross-Site Scripting attack (XSS)." + + if result != expected { + t.Errorf("Expected: %s\nGot: %s", expected, result) + } +} + +func TestFormatMessageWithPlaceholders(t *testing.T) { + // Test template with placeholders + message := &sarif.Message{ + Markdown: stringPtr("Input from {0} flows to {1}"), + Arguments: []string{ + "[user input](0)", + "[template](1)", + }, + } + + locations := []*sarif.Location{ + createTestLocation("main.py", 1, 1, 1, 10), + createTestLocation("main.py", 2, 1, 2, 10), + } + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/source", + CommitHash: stringPtr("abc123"), + } + + options := MessageFormatOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "main", + SourceFolder: "/test/source", + } + + result := formatMessageWithArguments(message, locations, repoMetadata, options) + expected := "Input from [user input](https://github.com/test-org/test-repo/blob/main/main.py#L1) flows to [template](https://github.com/test-org/test-repo/blob/main/main.py#L2)" + + if result != expected { + t.Errorf("Expected: %s\nGot: %s", expected, result) + } +} + +func TestFormatMessageNoMarkdown(t *testing.T) { + // Test fallback to plain text when no markdown template + message := &sarif.Message{ + Text: stringPtr("Plain text message without formatting"), + } + + locations := []*sarif.Location{} + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/source", + CommitHash: stringPtr("abc123"), + } + + options := MessageFormatOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "main", + SourceFolder: "/test/source", + } + + result := formatMessageWithArguments(message, locations, repoMetadata, options) + expected := "Plain text message without formatting" + + if result != expected { + t.Errorf("Expected: %s\nGot: %s", expected, result) + } +} + +func TestFormatMessageMissingLocations(t *testing.T) { + // Test graceful degradation when locations are missing + message := &sarif.Message{ + Markdown: stringPtr("Input from {0} flows to {1}"), + Arguments: []string{ + "[user input](0)", + "[template](1)", + }, + } + + locations := []*sarif.Location{} // Empty locations + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/source", + CommitHash: stringPtr("abc123"), + } + + options := MessageFormatOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "main", + SourceFolder: "/test/source", + } + + result := formatMessageWithArguments(message, locations, repoMetadata, options) + expected := "Input from user input (0) flows to template (1)" // References show as plain text with numbers + + if result != expected { + t.Errorf("Expected: %s\nGot: %s", expected, result) + } +} + +func TestExtractLocationsFromRelatedLocations(t *testing.T) { + // Test priority 1: relatedLocations + result := &sarif.Result{ + RelatedLocations: []*sarif.Location{ + createTestLocation("main.py", 1, 50, 1, 57), + }, + } + + locations := extractLocationsForFormatting(result) + + if len(locations) != 1 { + t.Errorf("Expected 1 location, got %d", len(locations)) + } + + if locations[0].PhysicalLocation.ArtifactLocation.URI == nil || *locations[0].PhysicalLocation.ArtifactLocation.URI != "main.py" { + t.Errorf("Expected location URI 'main.py', got %v", locations[0].PhysicalLocation.ArtifactLocation.URI) + } +} + +func TestExtractLocationsFromCodeFlows(t *testing.T) { + // Test priority 2: codeFlows fallback + result := &sarif.Result{ + CodeFlows: []*sarif.CodeFlow{ + { + ThreadFlows: []*sarif.ThreadFlow{ + { + Locations: []*sarif.ThreadFlowLocation{ + { + Location: createTestLocation("main.py", 1, 50, 1, 57), + }, + { + Location: createTestLocation("main.py", 8, 18, 8, 25), + }, + }, + }, + }, + }, + }, + } + + locations := extractLocationsForFormatting(result) + + if len(locations) != 2 { + t.Errorf("Expected 2 locations, got %d", len(locations)) + } +} + +func TestExtractLocationsEmpty(t *testing.T) { + // Test priority 3: empty fallback + result := &sarif.Result{} + + locations := extractLocationsForFormatting(result) + + if len(locations) != 0 { + t.Errorf("Expected 0 locations, got %d", len(locations)) + } +} + +func TestParseLocationReference(t *testing.T) { + tests := []struct { + name string + input string + expected string + refs []int + }{ + { + name: "single reference", + input: "[user-provided value](1)", + expected: "user-provided value", + refs: []int{1}, + }, + { + name: "multiple references", + input: "[flows](1),(2),(3),(4),(5),(6)", + expected: "flows", + refs: []int{1, 2, 3, 4, 5, 6}, + }, + { + name: "malformed input", + input: "plain text without brackets", + expected: "plain text without brackets", + refs: nil, + }, + { + name: "invalid reference numbers", + input: "[text](abc),(def)", + expected: "text", + refs: []int{}, // Invalid numbers are skipped + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + text, refs := parseLocationReference(tt.input) + if text != tt.expected { + t.Errorf("Expected text '%s', got '%s'", tt.expected, text) + } + if len(refs) != len(tt.refs) { + t.Errorf("Expected %d refs, got %d", len(tt.refs), len(refs)) + } + for i, ref := range refs { + if ref != tt.refs[i] { + t.Errorf("Expected ref[%d] = %d, got %d", i, tt.refs[i], ref) + } + } + }) + } +} + +func TestBuildLocationLink(t *testing.T) { + location := createTestLocation("main.py", 10, 5, 10, 15) + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/source", + CommitHash: stringPtr("abc123"), + } + + options := MessageFormatOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "main", + SourceFolder: "/test/source", + } + + result := buildLocationLink(location, repoMetadata, options) + expected := "https://github.com/test-org/test-repo/blob/main/main.py#L10" + + if result != expected { + t.Errorf("Expected: %s\nGot: %s", expected, result) + } +} + +func TestBuildLocationLinkRange(t *testing.T) { + location := createTestLocation("main.py", 10, 5, 15, 20) + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/source", + CommitHash: stringPtr("abc123"), + } + + options := MessageFormatOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "main", + SourceFolder: "/test/source", + } + + result := buildLocationLink(location, repoMetadata, options) + expected := "https://github.com/test-org/test-repo/blob/main/main.py#L10-L15" + + if result != expected { + t.Errorf("Expected: %s\nGot: %s", expected, result) + } +} + +func TestBuildLocationLinkAbsolutePath(t *testing.T) { + location := createTestLocation("/test/source/main.py", 10, 5, 10, 15) + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/source", + CommitHash: stringPtr("abc123"), + } + + options := MessageFormatOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "main", + SourceFolder: "/test/source", + } + + result := buildLocationLink(location, repoMetadata, options) + expected := "https://github.com/test-org/test-repo/blob/main/main.py#L10" + + if result != expected { + t.Errorf("Expected: %s\nGot: %s", expected, result) + } +} + +func TestBuildLocationLinkWithSubfolder(t *testing.T) { + // Test the specific case mentioned in the issue: subfolder path resolution + // Create a temporary directory structure to simulate the repository + tempDir, err := os.MkdirTemp("", "sarif_test") + if err != nil { + t.Fatalf("failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + repoRoot := filepath.Join(tempDir, "scanio-test") + subfolder := filepath.Join(repoRoot, "apps", "demo") + if err := os.MkdirAll(subfolder, 0755); err != nil { + t.Fatalf("failed to create subfolder: %v", err) + } + + // Create the file so path resolution works correctly + mainFile := filepath.Join(subfolder, "main.py") + if err := os.WriteFile(mainFile, []byte("test"), 0644); err != nil { + t.Fatalf("failed to create test file: %v", err) + } + + location := createTestLocation("main.py", 34, 1, 34, 10) + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: repoRoot, + Subfolder: "apps/demo", + CommitHash: stringPtr("aec0b795c350ff53fe9ab01adf862408aa34c3fd"), + } + + options := MessageFormatOptions{ + Namespace: "scan-io-git", + Repository: "scanio-test", + Ref: "aec0b795c350ff53fe9ab01adf862408aa34c3fd", + SourceFolder: subfolder, + } + + result := buildLocationLink(location, repoMetadata, options) + expected := "https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py#L34" + + if result != expected { + t.Errorf("Expected: %s\nGot: %s", expected, result) + } +} + +func TestFormatCodeQLStyleMessage(t *testing.T) { + // Test CodeQL style message with direct markdown links + message := &sarif.Message{ + Text: stringPtr("This template construction depends on a [user-provided value](1)."), + } + + result := &sarif.Result{ + Message: *message, + RelatedLocations: []*sarif.Location{ + { + Id: uintPtr(1), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(1), + StartColumn: intPtr(50), + EndLine: intPtr(1), + EndColumn: intPtr(57), + }, + }, + }, + }, + } + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/source", + CommitHash: stringPtr("abc123"), + } + + options := MessageFormatOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "main", + SourceFolder: "/test/source", + } + + formatted := FormatResultMessage(result, repoMetadata, options) + expected := "This template construction depends on a [user-provided value](https://github.com/test-org/test-repo/blob/main/main.py#L1)." + + if formatted != expected { + t.Errorf("Expected: %s\nGot: %s", expected, formatted) + } +} + +// Helper functions for creating test data + +func createTestLocation(uri string, startLine, startCol, endLine, endCol int) *sarif.Location { + return &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr(uri), + }, + Region: &sarif.Region{ + StartLine: intPtr(startLine), + StartColumn: intPtr(startCol), + EndLine: intPtr(endLine), + EndColumn: intPtr(endCol), + }, + }, + } +} + +func stringPtr(s string) *string { + return &s +} + +func intPtr(i int) *int { + return &i +} + +func uintPtr(u uint) *uint { + return &u +} diff --git a/internal/sarif/path_helpers.go b/internal/sarif/path_helpers.go new file mode 100644 index 0000000..ca62af3 --- /dev/null +++ b/internal/sarif/path_helpers.go @@ -0,0 +1,339 @@ +package sarif + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/owenrumney/go-sarif/v2/sarif" + "github.com/scan-io-git/scan-io/internal/git" +) + +// NormalisedSubfolder extracts and normalizes the subfolder from repository metadata. +// It returns the subfolder path with forward slashes and no leading/trailing slashes. +// Returns empty string if metadata is nil or subfolder is empty. +func NormalisedSubfolder(md *git.RepositoryMetadata) string { + if md == nil { + return "" + } + sub := strings.Trim(md.Subfolder, "/\\") + // Replace all backslashes with forward slashes for cross-platform compatibility + sub = strings.ReplaceAll(sub, "\\", "/") + return sub +} + +// PathWithin checks if a path is within another path (root). +// It handles both absolute and relative paths, attempting to resolve them first. +// Returns true if path is within root, or if root is empty. +func PathWithin(path, root string) bool { + if root == "" { + return true + } + cleanPath, err1 := filepath.Abs(path) + cleanRoot, err2 := filepath.Abs(root) + if err1 != nil || err2 != nil { + cleanPath = filepath.Clean(path) + cleanRoot = filepath.Clean(root) + } + if cleanPath == cleanRoot { + return true + } + rootWithSep := cleanRoot + string(filepath.Separator) + return strings.HasPrefix(cleanPath, rootWithSep) +} + +// ResolveRelativeLocalPath resolves a relative URI to a local filesystem path. +// It tries multiple base directories in order of preference: +// 1. repoRoot +// 2. repoRoot/subfolder (if subfolder is provided) +// 3. absSource +// +// For each base, it checks if the resolved path exists on the filesystem and is within repoRoot. +// If no path exists, it returns the first candidate that would be within repoRoot. +// Falls back to absSource-based path if all else fails. +// +// Parameters: +// - cleanURI: the relative URI path (already cleaned) +// - repoRoot: the repository root directory (optional) +// - subfolder: the subfolder within the repository (optional) +// - absSource: the absolute source folder path (optional) +func ResolveRelativeLocalPath(cleanURI, repoRoot, subfolder, absSource string) string { + candidateRel := cleanURI + var bases []string + seen := map[string]struct{}{} + + addBase := func(base string) { + if base == "" { + return + } + if abs, err := filepath.Abs(base); err == nil { + base = abs + } else { + base = filepath.Clean(base) + } + if _, ok := seen[base]; ok { + return + } + seen[base] = struct{}{} + bases = append(bases, base) + } + + addBase(repoRoot) + if repoRoot != "" && subfolder != "" { + addBase(filepath.Join(repoRoot, filepath.FromSlash(subfolder))) + } + addBase(absSource) + + // Try each base directory, checking if the file exists + for _, base := range bases { + candidate := filepath.Clean(filepath.Join(base, candidateRel)) + if repoRoot != "" && !PathWithin(candidate, repoRoot) { + continue + } + if _, err := os.Stat(candidate); err == nil { + return candidate + } + } + + // If no file exists, return the first valid candidate path + if len(bases) > 0 { + candidate := filepath.Clean(filepath.Join(bases[0], candidateRel)) + if repoRoot == "" || PathWithin(candidate, repoRoot) { + return candidate + } + } + + // Final fallback to absSource + if absSource != "" { + return filepath.Clean(filepath.Join(absSource, candidateRel)) + } + return "" +} + +// ConvertToRepoRelativePath converts a SARIF artifact URI to a repository-relative path. +// This function handles both absolute and relative URIs, normalizing them to repo-relative paths. +// +// The conversion process: +// 1. Normalizes the URI (removes file:// prefix, converts to OS path separators) +// 2. For absolute paths: calculates relative path from repoRoot or sourceFolder +// 3. For relative paths: resolves to absolute first, then calculates repo-relative path +// 4. Ensures subfolder prefix is included when scanning from a subdirectory +// +// Parameters: +// - rawURI: the artifact URI from SARIF (may be absolute or relative, may have file:// prefix) +// - repoMetadata: repository metadata containing RepoRootFolder and Subfolder (optional) +// - sourceFolder: the source folder provided by the user (optional) +// +// Returns: +// - A forward-slash separated path relative to the repository root +// - Empty string if the URI is invalid or empty +func ConvertToRepoRelativePath(rawURI string, repoMetadata *git.RepositoryMetadata, sourceFolder string) string { + rawURI = strings.TrimSpace(rawURI) + if rawURI == "" { + return "" + } + + repoPath := "" + subfolder := NormalisedSubfolder(repoMetadata) + var repoRoot string + if repoMetadata != nil && strings.TrimSpace(repoMetadata.RepoRootFolder) != "" { + repoRoot = filepath.Clean(repoMetadata.RepoRootFolder) + } + absSource := strings.TrimSpace(sourceFolder) + if absSource != "" { + if abs, err := filepath.Abs(absSource); err == nil { + absSource = abs + } else { + absSource = filepath.Clean(absSource) + } + } + + // Normalize URI to the host OS path representation + osURI := filepath.FromSlash(rawURI) + osURI = strings.TrimPrefix(osURI, "file://") + cleanURI := filepath.Clean(osURI) + + if filepath.IsAbs(cleanURI) { + // Absolute path: calculate repo-relative path + localPath := cleanURI + if repoRoot != "" { + if rel, err := filepath.Rel(repoRoot, localPath); err == nil { + if rel != "." && !strings.HasPrefix(rel, "..") { + repoPath = filepath.ToSlash(rel) + } + } + } + if repoPath == "" && absSource != "" { + if rel, err := filepath.Rel(absSource, localPath); err == nil { + repoPath = filepath.ToSlash(rel) + } + } + if repoPath == "" { + repoPath = filepath.ToSlash(strings.TrimPrefix(localPath, string(filepath.Separator))) + } + } else { + // Relative path: resolve to absolute first + localPath := ResolveRelativeLocalPath(cleanURI, repoRoot, subfolder, absSource) + + if repoRoot != "" && localPath != "" && PathWithin(localPath, repoRoot) { + if rel, err := filepath.Rel(repoRoot, localPath); err == nil { + if rel != "." { + repoPath = filepath.ToSlash(rel) + } + } + } + + if repoPath == "" { + normalised := strings.TrimLeft(filepath.ToSlash(cleanURI), "./") + if subfolder != "" && !strings.HasPrefix(normalised, subfolder+"/") && normalised != subfolder { + repoPath = filepath.ToSlash(filepath.Join(subfolder, normalised)) + } else { + repoPath = filepath.ToSlash(normalised) + } + } + } + + repoPath = strings.TrimLeft(repoPath, "/") + repoPath = filepath.ToSlash(repoPath) + return repoPath +} + +// BuildGitHubPermalink constructs a GitHub permalink for a file and line range. +// It takes the core components needed for URL construction and handles the anchor format. +// Returns empty string if any critical component is missing. +// +// Parameters: +// - namespace: GitHub namespace/organization +// - repository: GitHub repository name +// - ref: Git reference (commit hash, branch, or tag) +// - repoRelativePath: file path relative to repository root (forward slashes) +// - startLine: starting line number (1-based) +// - endLine: ending line number (1-based, defaults to startLine if 0) +// +// Returns: +// - GitHub permalink string in format: https://github.com/{namespace}/{repo}/blob/{ref}/{file}#L{start}-L{end} +// - Empty string if any required parameter is missing +func BuildGitHubPermalink(namespace, repository, ref, repoRelativePath string, startLine, endLine int) string { + // Validate required parameters + if namespace == "" || repository == "" || ref == "" || repoRelativePath == "" { + return "" + } + + // Build base URL + baseURL := fmt.Sprintf("https://github.com/%s/%s/blob/%s/%s", namespace, repository, ref, repoRelativePath) + + // Handle line anchor + if startLine <= 0 { + return baseURL + } + + if endLine <= 0 || endLine == startLine || endLine < startLine { + return fmt.Sprintf("%s#L%d", baseURL, startLine) + } + + return fmt.Sprintf("%s#L%d-L%d", baseURL, startLine, endLine) +} + +// ExtractFileURIFromResult derives both the repository-relative path and local filesystem path +// for the first location in a SARIF result. When repository metadata is available the repo-relative +// path is anchored at the repository root; otherwise the function falls back to trimming the +// provided source folder (preserving legacy behaviour). +func ExtractFileURIFromResult(res *sarif.Result, absSourceFolder string, repoMetadata *git.RepositoryMetadata) (string, string) { + if res == nil || len(res.Locations) == 0 { + return "", "" + } + loc := res.Locations[0] + if loc.PhysicalLocation == nil { + return "", "" + } + art := loc.PhysicalLocation.ArtifactLocation + if art == nil || art.URI == nil { + return "", "" + } + rawURI := strings.TrimSpace(*art.URI) + if rawURI == "" { + return "", "" + } + + // Use shared function to get repo-relative path + repoPath := ConvertToRepoRelativePath(rawURI, repoMetadata, absSourceFolder) + + // Calculate local path for file operations (snippet hashing, etc.) + localPath := CalculateLocalPath(rawURI, repoMetadata, absSourceFolder) + + return repoPath, localPath +} + +// CalculateLocalPath determines the absolute local filesystem path for a SARIF URI. +// This is used for reading files for snippet hashing and other local file operations. +func CalculateLocalPath(rawURI string, repoMetadata *git.RepositoryMetadata, absSourceFolder string) string { + subfolder := NormalisedSubfolder(repoMetadata) + var repoRoot string + if repoMetadata != nil && strings.TrimSpace(repoMetadata.RepoRootFolder) != "" { + repoRoot = filepath.Clean(repoMetadata.RepoRootFolder) + } + absSource := strings.TrimSpace(absSourceFolder) + if absSource != "" { + if abs, err := filepath.Abs(absSource); err == nil { + absSource = abs + } else { + absSource = filepath.Clean(absSource) + } + } + + // Normalize URI to the host OS path representation + osURI := filepath.FromSlash(rawURI) + osURI = strings.TrimPrefix(osURI, "file://") + cleanURI := filepath.Clean(osURI) + + if filepath.IsAbs(cleanURI) { + return cleanURI + } + + // Relative path - resolve to absolute + return ResolveRelativeLocalPath(cleanURI, repoRoot, subfolder, absSource) +} + +// ExtractRegionFromResult returns start and end line numbers (0 when not present) +// taken from the SARIF result's first location region. +func ExtractRegionFromResult(res *sarif.Result) (int, int) { + if res == nil || len(res.Locations) == 0 { + return 0, 0 + } + loc := res.Locations[0] + if loc.PhysicalLocation == nil || loc.PhysicalLocation.Region == nil { + return 0, 0 + } + start := 0 + end := 0 + if loc.PhysicalLocation.Region.StartLine != nil { + start = *loc.PhysicalLocation.Region.StartLine + } + if loc.PhysicalLocation.Region.EndLine != nil { + end = *loc.PhysicalLocation.Region.EndLine + } + return start, end +} + +// DisplayRuleHeading returns the preferred human-friendly rule heading for the issue body: +// 1. rule.ShortDescription.Text when available. +// 2. rule.Name when available. +// 3. rule.ID as a fallback. +func DisplayRuleHeading(rule *sarif.ReportingDescriptor) string { + if rule != nil { + if rule.ShortDescription != nil && rule.ShortDescription.Text != nil { + if heading := strings.TrimSpace(*rule.ShortDescription.Text); heading != "" { + return heading + } + } + if rule.Name != nil { + if heading := strings.TrimSpace(*rule.Name); heading != "" { + return heading + } + } + // Parse ruleId from rule.ID instead of separate parameter + return strings.TrimSpace(rule.ID) + } + return "" +} diff --git a/internal/sarif/path_helpers_test.go b/internal/sarif/path_helpers_test.go new file mode 100644 index 0000000..8383b59 --- /dev/null +++ b/internal/sarif/path_helpers_test.go @@ -0,0 +1,671 @@ +package sarif + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/owenrumney/go-sarif/v2/sarif" + "github.com/scan-io-git/scan-io/internal/git" +) + +func TestNormalisedSubfolder(t *testing.T) { + tests := []struct { + name string + metadata *git.RepositoryMetadata + expected string + }{ + { + name: "nil metadata", + metadata: nil, + expected: "", + }, + { + name: "empty subfolder", + metadata: &git.RepositoryMetadata{ + Subfolder: "", + }, + expected: "", + }, + { + name: "subfolder with forward slash", + metadata: &git.RepositoryMetadata{ + Subfolder: "apps/demo", + }, + expected: "apps/demo", + }, + { + name: "subfolder with leading slash", + metadata: &git.RepositoryMetadata{ + Subfolder: "/apps/demo", + }, + expected: "apps/demo", + }, + { + name: "subfolder with trailing slash", + metadata: &git.RepositoryMetadata{ + Subfolder: "apps/demo/", + }, + expected: "apps/demo", + }, + { + name: "subfolder with backslash", + metadata: &git.RepositoryMetadata{ + Subfolder: "apps\\demo", + }, + expected: "apps/demo", + }, + { + name: "subfolder with both slashes", + metadata: &git.RepositoryMetadata{ + Subfolder: "/apps/demo\\", + }, + expected: "apps/demo", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := NormalisedSubfolder(tt.metadata) + if result != tt.expected { + t.Errorf("expected %q, got %q", tt.expected, result) + } + }) + } +} + +func TestPathWithin(t *testing.T) { + tempDir, err := os.MkdirTemp("", "path_within_test") + if err != nil { + t.Fatalf("failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + subdir := filepath.Join(tempDir, "subdir") + if err := os.Mkdir(subdir, 0755); err != nil { + t.Fatalf("failed to create subdirectory: %v", err) + } + + tests := []struct { + name string + path string + root string + expected bool + }{ + { + name: "empty root always returns true", + path: "/any/path", + root: "", + expected: true, + }, + { + name: "path equals root", + path: tempDir, + root: tempDir, + expected: true, + }, + { + name: "path within root", + path: subdir, + root: tempDir, + expected: true, + }, + { + name: "path outside root", + path: tempDir, + root: subdir, + expected: false, + }, + { + name: "relative path within root", + path: filepath.Join(tempDir, ".", "subdir"), + root: tempDir, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := PathWithin(tt.path, tt.root) + if result != tt.expected { + t.Errorf("PathWithin(%q, %q) = %v, expected %v", tt.path, tt.root, result, tt.expected) + } + }) + } +} + +func TestResolveRelativeLocalPath(t *testing.T) { + tempDir, err := os.MkdirTemp("", "resolve_path_test") + if err != nil { + t.Fatalf("failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create a repository structure + repoRoot := filepath.Join(tempDir, "repo") + subfolder := filepath.Join(repoRoot, "apps", "demo") + if err := os.MkdirAll(subfolder, 0755); err != nil { + t.Fatalf("failed to create subfolder: %v", err) + } + + // Create a test file + testFile := filepath.Join(subfolder, "main.py") + if err := os.WriteFile(testFile, []byte("test"), 0644); err != nil { + t.Fatalf("failed to create test file: %v", err) + } + + tests := []struct { + name string + cleanURI string + repoRoot string + subfolder string + absSource string + expected string + }{ + { + name: "relative file exists in subfolder", + cleanURI: "main.py", + repoRoot: repoRoot, + subfolder: "apps/demo", + absSource: subfolder, + expected: testFile, + }, + { + name: "relative file with repo root only", + cleanURI: filepath.Join("apps", "demo", "main.py"), + repoRoot: repoRoot, + subfolder: "", + absSource: "", + expected: testFile, + }, + { + name: "fallback to absSource", + cleanURI: "main.py", + repoRoot: "", + subfolder: "", + absSource: subfolder, + expected: testFile, + }, + { + name: "non-existent file returns constructed path", + cleanURI: "nonexistent.py", + repoRoot: repoRoot, + subfolder: "", + absSource: subfolder, + expected: filepath.Join(repoRoot, "nonexistent.py"), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := ResolveRelativeLocalPath(tt.cleanURI, tt.repoRoot, tt.subfolder, tt.absSource) + if result != tt.expected { + t.Errorf("expected %q, got %q", tt.expected, result) + } + }) + } +} + +func TestConvertToRepoRelativePath(t *testing.T) { + tempDir, err := os.MkdirTemp("", "convert_path_test") + if err != nil { + t.Fatalf("failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + repoRoot := filepath.Join(tempDir, "scanio-test") + subfolder := filepath.Join(repoRoot, "apps", "demo") + if err := os.MkdirAll(subfolder, 0755); err != nil { + t.Fatalf("failed to create subfolder: %v", err) + } + + absoluteFile := filepath.Join(subfolder, "main.py") + if err := os.WriteFile(absoluteFile, []byte("test"), 0644); err != nil { + t.Fatalf("failed to create test file: %v", err) + } + + metadata := &git.RepositoryMetadata{ + RepoRootFolder: repoRoot, + Subfolder: "apps/demo", + } + + tests := []struct { + name string + rawURI string + metadata *git.RepositoryMetadata + sourceFolder string + expected string + }{ + { + name: "empty URI", + rawURI: "", + metadata: metadata, + sourceFolder: subfolder, + expected: "", + }, + { + name: "absolute URI with metadata", + rawURI: absoluteFile, + metadata: metadata, + sourceFolder: subfolder, + expected: "apps/demo/main.py", + }, + { + name: "relative URI with metadata", + rawURI: "main.py", + metadata: metadata, + sourceFolder: subfolder, + expected: "apps/demo/main.py", + }, + { + name: "relative URI with file:// prefix", + rawURI: "file://main.py", + metadata: metadata, + sourceFolder: subfolder, + expected: "apps/demo/main.py", + }, + { + name: "absolute URI without metadata", + rawURI: absoluteFile, + metadata: nil, + sourceFolder: subfolder, + expected: "main.py", + }, + { + name: "relative URI with parent path", + rawURI: filepath.ToSlash(filepath.Join("..", "scanio-test", "apps", "demo", "main.py")), + metadata: metadata, + sourceFolder: subfolder, + expected: "apps/demo/main.py", + }, + { + name: "URI already with subfolder prefix", + rawURI: "apps/demo/main.py", + metadata: metadata, + sourceFolder: subfolder, + expected: "apps/demo/main.py", + }, + { + name: "relative URI without metadata or source folder", + rawURI: "src/main.py", + metadata: nil, + sourceFolder: "", + expected: "src/main.py", + }, + { + name: "whitespace in URI", + rawURI: " main.py ", + metadata: metadata, + sourceFolder: subfolder, + expected: "apps/demo/main.py", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := ConvertToRepoRelativePath(tt.rawURI, tt.metadata, tt.sourceFolder) + if result != tt.expected { + t.Errorf("expected %q, got %q", tt.expected, result) + } + }) + } +} + +func TestConvertToRepoRelativePathWithoutRepoRoot(t *testing.T) { + // Test scenarios where we don't have repository metadata + tests := []struct { + name string + rawURI string + sourceFolder string + expected string + }{ + { + name: "relative path without metadata", + rawURI: "src/main.py", + sourceFolder: "/tmp/project", + expected: "src/main.py", + }, + { + name: "absolute path without metadata falls back to source folder", + rawURI: "/tmp/project/src/main.py", + sourceFolder: "/tmp/project", + expected: "src/main.py", + }, + { + name: "absolute path with no context", + rawURI: "/home/user/project/main.py", + sourceFolder: "", + expected: "home/user/project/main.py", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := ConvertToRepoRelativePath(tt.rawURI, nil, tt.sourceFolder) + if result != tt.expected { + t.Errorf("expected %q, got %q", tt.expected, result) + } + }) + } +} + +func TestConvertToRepoRelativePathCrossPlatform(t *testing.T) { + // Test that paths are always normalized to forward slashes + tempDir, err := os.MkdirTemp("", "cross_platform_test") + if err != nil { + t.Fatalf("failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + repoRoot := filepath.Join(tempDir, "repo") + subfolder := filepath.Join(repoRoot, "apps", "demo") + if err := os.MkdirAll(subfolder, 0755); err != nil { + t.Fatalf("failed to create subfolder: %v", err) + } + + // Create the file + mainFile := filepath.Join(subfolder, "main.py") + if err := os.WriteFile(mainFile, []byte("test"), 0644); err != nil { + t.Fatalf("failed to create test file: %v", err) + } + + metadata := &git.RepositoryMetadata{ + RepoRootFolder: repoRoot, + Subfolder: "apps/demo", + } + + // Test with a relative path - the internal logic will normalize it + rawURI := "main.py" + result := ConvertToRepoRelativePath(rawURI, metadata, subfolder) + + // Result should always use forward slashes + if strings.Contains(result, "\\") { + t.Errorf("expected forward slashes only, got %q", result) + } + + expected := "apps/demo/main.py" + if result != expected { + t.Errorf("expected %q, got %q", expected, result) + } +} + +func TestBuildGitHubPermalink(t *testing.T) { + tests := []struct { + name string + namespace string + repository string + ref string + repoRelativePath string + startLine int + endLine int + expected string + }{ + { + name: "single line", + namespace: "scan-io-git", + repository: "scan-io", + ref: "main", + repoRelativePath: "src/main.go", + startLine: 42, + endLine: 0, + expected: "https://github.com/scan-io-git/scan-io/blob/main/src/main.go#L42", + }, + { + name: "line range", + namespace: "scan-io-git", + repository: "scan-io", + ref: "abc123", + repoRelativePath: "internal/sarif/path_helpers.go", + startLine: 10, + endLine: 15, + expected: "https://github.com/scan-io-git/scan-io/blob/abc123/internal/sarif/path_helpers.go#L10-L15", + }, + { + name: "same start and end line", + namespace: "scan-io-git", + repository: "scan-io", + ref: "main", + repoRelativePath: "cmd/sarif-issues/utils.go", + startLine: 5, + endLine: 5, + expected: "https://github.com/scan-io-git/scan-io/blob/main/cmd/sarif-issues/utils.go#L5", + }, + { + name: "no line numbers", + namespace: "scan-io-git", + repository: "scan-io", + ref: "main", + repoRelativePath: "README.md", + startLine: 0, + endLine: 0, + expected: "https://github.com/scan-io-git/scan-io/blob/main/README.md", + }, + { + name: "negative start line", + namespace: "scan-io-git", + repository: "scan-io", + ref: "main", + repoRelativePath: "src/main.go", + startLine: -1, + endLine: 0, + expected: "https://github.com/scan-io-git/scan-io/blob/main/src/main.go", + }, + { + name: "empty namespace", + namespace: "", + repository: "scan-io", + ref: "main", + repoRelativePath: "src/main.go", + startLine: 1, + endLine: 0, + expected: "", + }, + { + name: "empty repository", + namespace: "scan-io-git", + repository: "", + ref: "main", + repoRelativePath: "src/main.go", + startLine: 1, + endLine: 0, + expected: "", + }, + { + name: "empty ref", + namespace: "scan-io-git", + repository: "scan-io", + ref: "", + repoRelativePath: "src/main.go", + startLine: 1, + endLine: 0, + expected: "", + }, + { + name: "empty file path", + namespace: "scan-io-git", + repository: "scan-io", + ref: "main", + repoRelativePath: "", + startLine: 1, + endLine: 0, + expected: "", + }, + { + name: "file with subdirectory", + namespace: "scan-io-git", + repository: "scan-io", + ref: "feature-branch", + repoRelativePath: "internal/sarif/message_formatter.go", + startLine: 25, + endLine: 30, + expected: "https://github.com/scan-io-git/scan-io/blob/feature-branch/internal/sarif/message_formatter.go#L25-L30", + }, + { + name: "end line less than start line", + namespace: "scan-io-git", + repository: "scan-io", + ref: "main", + repoRelativePath: "src/main.go", + startLine: 10, + endLine: 5, + expected: "https://github.com/scan-io-git/scan-io/blob/main/src/main.go#L10", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := BuildGitHubPermalink(tt.namespace, tt.repository, tt.ref, tt.repoRelativePath, tt.startLine, tt.endLine) + if result != tt.expected { + t.Errorf("BuildGitHubPermalink(%q, %q, %q, %q, %d, %d) = %q, expected %q", + tt.namespace, tt.repository, tt.ref, tt.repoRelativePath, tt.startLine, tt.endLine, result, tt.expected) + } + }) + } +} + +func TestExtractFileURIFromResult(t *testing.T) { + tempDir, err := os.MkdirTemp("", "sarif_extract") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + repoRoot := filepath.Join(tempDir, "scanio-test") + subfolder := filepath.Join(repoRoot, "apps", "demo") + if err := os.MkdirAll(subfolder, 0o755); err != nil { + t.Fatalf("Failed to create subfolder: %v", err) + } + + absoluteFile := filepath.Join(subfolder, "main.py") + metadata := &git.RepositoryMetadata{ + RepoRootFolder: repoRoot, + Subfolder: filepath.ToSlash(filepath.Join("apps", "demo")), + } + + if err := os.WriteFile(absoluteFile, []byte("print('demo')\n"), 0o644); err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + + tests := []struct { + name string + uri string + meta *git.RepositoryMetadata + expectedRepo string + expectedLocal string + sourceFolder string + }{ + { + name: "absolute URI with metadata", + uri: absoluteFile, + meta: metadata, + expectedRepo: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), + expectedLocal: absoluteFile, + sourceFolder: subfolder, + }, + { + name: "relative URI with metadata", + uri: "main.py", + meta: metadata, + expectedRepo: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), + expectedLocal: filepath.Join(repoRoot, "apps", "demo", "main.py"), + sourceFolder: subfolder, + }, + { + name: "relative URI with parent segments", + uri: filepath.ToSlash(filepath.Join("..", "scanio-test", "apps", "demo", "main.py")), + meta: metadata, + expectedRepo: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), + expectedLocal: filepath.Join(repoRoot, "apps", "demo", "main.py"), + sourceFolder: subfolder, + }, + { + name: "relative URI already prefixed", + uri: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), + meta: metadata, + expectedRepo: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), + expectedLocal: filepath.Join(repoRoot, "apps", "demo", "main.py"), + sourceFolder: subfolder, + }, + { + name: "absolute URI without metadata falls back to source folder", + uri: absoluteFile, + meta: nil, + expectedRepo: "main.py", + expectedLocal: absoluteFile, + sourceFolder: subfolder, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + uri := tt.uri + result := &sarif.Result{ + Locations: []*sarif.Location{ + { + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: &uri, + }, + }, + }, + }, + } + + repoPath, localPath := ExtractFileURIFromResult(result, tt.sourceFolder, tt.meta) + if repoPath != tt.expectedRepo { + t.Fatalf("expected repo path %q, got %q", tt.expectedRepo, repoPath) + } + if localPath != tt.expectedLocal { + t.Fatalf("expected local path %q, got %q", tt.expectedLocal, localPath) + } + }) + } +} + +func TestDisplayRuleHeadingPrefersShortDescription(t *testing.T) { + text := "Short desc" + name := "Rule Name" + rule := &sarif.ReportingDescriptor{ + ShortDescription: &sarif.MultiformatMessageString{ + Text: &text, + }, + Name: &name, + } + + got := DisplayRuleHeading(rule) + if got != "Short desc" { + t.Fatalf("expected short description heading, got %q", got) + } +} + +func TestDisplayRuleHeadingFallsBackToName(t *testing.T) { + name := "Rule Name" + rule := &sarif.ReportingDescriptor{ + Name: &name, + } + + got := DisplayRuleHeading(rule) + if got != "Rule Name" { + t.Fatalf("expected name fallback heading, got %q", got) + } +} + +func TestDisplayRuleHeadingFallsBackToID(t *testing.T) { + id := "rule.id" + rule := &sarif.ReportingDescriptor{ + ID: id, + } + + got := DisplayRuleHeading(rule) + if got != "rule.id" { + t.Fatalf("expected rule id fallback heading, got %q", got) + } +} + +func TestDisplayRuleHeadingReturnsEmptyForNil(t *testing.T) { + got := DisplayRuleHeading(nil) + if got != "" { + t.Fatalf("expected empty string for nil rule, got %q", got) + } +} diff --git a/internal/sarif/sarif.go b/internal/sarif/sarif.go index 3951c0c..7653037 100644 --- a/internal/sarif/sarif.go +++ b/internal/sarif/sarif.go @@ -10,6 +10,7 @@ import ( "os" "path/filepath" "sort" + "strings" "github.com/hashicorp/go-hclog" "github.com/owenrumney/go-sarif/v2/sarif" @@ -283,30 +284,72 @@ func (r Report) EnrichResultsCodeFlowProperty(locationWebURLCallback func(artifa } } -// EnrichResultsLevelProperty function to enrich results properties with level taken from corersponding rules propertiues "problem.severity" field +// EnrichResultsLevelProperty enriches result properties with level information taken from either +// the result itself or the corresponding rule metadata. Supports multi-run SARIF reports. func (r Report) EnrichResultsLevelProperty() { - rulesMap := map[string]*sarif.ReportingDescriptor{} - for _, rule := range r.Runs[0].Tool.Driver.Rules { - rulesMap[rule.ID] = rule - } + for _, run := range r.Runs { + rulesMap := map[string]*sarif.ReportingDescriptor{} + if run.Tool.Driver != nil { + for _, rule := range run.Tool.Driver.Rules { + if rule == nil { + continue + } + rulesMap[rule.ID] = rule + } + } - for _, result := range r.Runs[0].Results { - if rule, ok := rulesMap[*result.RuleID]; ok { - if result.Properties["Level"] == nil { - if result.Level != nil { - // used by snyk - result.Properties["Level"] = *result.Level - } else if rule.Properties["problem.severity"] != nil { - // used by codeql - result.Properties["Level"] = rule.Properties["problem.severity"] - } else if rule.DefaultConfiguration != nil { - // used by all tools? - result.Properties["Level"] = rule.DefaultConfiguration.Level - } else { - // just a fallback, should never happen - result.Properties["Level"] = "unknown" + for _, result := range run.Results { + if result == nil { + continue + } + + if result.Properties == nil { + result.Properties = make(map[string]interface{}) + } + + if result.Properties["Level"] != nil { + continue + } + + // Prefer explicit level on the result when available. + if result.Level != nil { + if lvl := strings.TrimSpace(*result.Level); lvl != "" { + result.Properties["Level"] = lvl + continue + } + } + + var ruleDescriptor *sarif.ReportingDescriptor + if result.RuleID != nil { + if rule, ok := rulesMap[*result.RuleID]; ok { + ruleDescriptor = rule + } + } + + if ruleDescriptor != nil && ruleDescriptor.Properties != nil { + if level, ok := ruleDescriptor.Properties["problem.severity"]; ok { + if str, ok := level.(string); ok { + if trimmed := strings.TrimSpace(str); trimmed != "" { + result.Properties["Level"] = trimmed + continue + } + } else if level != nil { + // Preserve non-string values (legacy behaviour) if provided. + result.Properties["Level"] = level + continue + } } } + + if ruleDescriptor != nil && ruleDescriptor.DefaultConfiguration != nil { + if lvl := strings.TrimSpace(ruleDescriptor.DefaultConfiguration.Level); lvl != "" { + result.Properties["Level"] = lvl + continue + } + } + + // Fallback when no metadata provides a level. + result.Properties["Level"] = "unknown" } } } diff --git a/internal/sarif/sarif_test.go b/internal/sarif/sarif_test.go new file mode 100644 index 0000000..21febd6 --- /dev/null +++ b/internal/sarif/sarif_test.go @@ -0,0 +1,155 @@ +package sarif + +import ( + "testing" + + gosarif "github.com/owenrumney/go-sarif/v2/sarif" +) + +func TestEnrichResultsLevelPropertyInitialisesResultProperties(t *testing.T) { + ruleID := "CODEQL-0001" + + rule := &gosarif.ReportingDescriptor{ + ID: ruleID, + Properties: gosarif.Properties{ + "problem.severity": "warning", + }, + } + + result := &gosarif.Result{ + RuleID: &ruleID, + } + + report := Report{ + Report: &gosarif.Report{ + Version: string(gosarif.Version210), + Runs: []*gosarif.Run{ + { + Tool: gosarif.Tool{ + Driver: &gosarif.ToolComponent{ + Name: "CodeQL", + Rules: []*gosarif.ReportingDescriptor{rule}, + }, + }, + Results: []*gosarif.Result{result}, + }, + }, + }, + } + + report.EnrichResultsLevelProperty() + + if result.Properties == nil { + t.Fatalf("expected result properties to be initialised, but it was nil") + } + + level, ok := result.Properties["Level"] + if !ok { + t.Fatalf("expected Level property to be set on result properties") + } + + if level != "warning" { + t.Fatalf("expected Level property to be %q, got %v", "warning", level) + } +} + +func TestEnrichResultsLevelPropertyHandlesMultipleRuns(t *testing.T) { + ruleIDOne := "RULE-ONE" + ruleIDTwo := "RULE-TWO" + resultLevel := "note" + + runOneRule := gosarif.NewRule(ruleIDOne).WithProperties(gosarif.Properties{ + "problem.severity": "warning", + }) + runTwoRule := gosarif.NewRule(ruleIDTwo) + + runOneResult := &gosarif.Result{ + RuleID: &ruleIDOne, + } + runTwoResult := &gosarif.Result{ + RuleID: &ruleIDTwo, + Level: &resultLevel, + } + + report := Report{ + Report: &gosarif.Report{ + Version: string(gosarif.Version210), + Runs: []*gosarif.Run{ + { + Tool: gosarif.Tool{ + Driver: &gosarif.ToolComponent{ + Name: "ToolOne", + Rules: []*gosarif.ReportingDescriptor{runOneRule}, + }, + }, + Results: []*gosarif.Result{runOneResult}, + }, + { + Tool: gosarif.Tool{ + Driver: &gosarif.ToolComponent{ + Name: "ToolTwo", + Rules: []*gosarif.ReportingDescriptor{runTwoRule}, + }, + }, + Results: []*gosarif.Result{runTwoResult}, + }, + }, + }, + } + + report.EnrichResultsLevelProperty() + + if runOneResult.Properties == nil { + t.Fatalf("expected runOneResult properties to be initialised") + } + if lvl := runOneResult.Properties["Level"]; lvl != "warning" { + t.Fatalf("expected runOneResult level to be %q, got %v", "warning", lvl) + } + + if runTwoResult.Properties == nil { + t.Fatalf("expected runTwoResult properties to be initialised") + } + if lvl := runTwoResult.Properties["Level"]; lvl != "note" { + t.Fatalf("expected runTwoResult level to be %q, got %v", "note", lvl) + } +} + +func TestEnrichResultsLevelPropertyUsesDefaultConfigurationLevel(t *testing.T) { + ruleID := "RULE-DEFAULT" + rule := gosarif.NewRule(ruleID) + rule.DefaultConfiguration = gosarif.NewReportingConfiguration().WithLevel("error") + + result := &gosarif.Result{ + RuleID: &ruleID, + } + + report := Report{ + Report: &gosarif.Report{ + Version: string(gosarif.Version210), + Runs: []*gosarif.Run{ + { + Tool: gosarif.Tool{ + Driver: &gosarif.ToolComponent{ + Name: "Tool", + Rules: []*gosarif.ReportingDescriptor{rule}, + }, + }, + Results: []*gosarif.Result{result}, + }, + }, + }, + } + + report.EnrichResultsLevelProperty() + + if result.Properties == nil { + t.Fatalf("expected result properties to be initialised") + } + level, ok := result.Properties["Level"].(string) + if !ok { + t.Fatalf("expected Level property to be a string, got %T", result.Properties["Level"]) + } + if level != "error" { + t.Fatalf("expected Level property to be %q, got %q", "error", level) + } +} diff --git a/pkg/issuecorrelation/correlator.go b/pkg/issuecorrelation/correlator.go new file mode 100644 index 0000000..0764782 --- /dev/null +++ b/pkg/issuecorrelation/correlator.go @@ -0,0 +1,225 @@ +package issuecorrelation + +// IssueMetadata describes the minimal metadata required to correlate issues. +// Fields: +// - IssueID: optional external identifier, not used by correlation logic. +// - Scanner, RuleID: identify the rule that produced the finding. +// - Filename, StartLine, EndLine: location information inside a file. +// - SnippetHash: optional code snippet/content fingerprint used for stronger matching. +type IssueMetadata struct { + IssueID string // issue id in external system or sequence number in report, just to know what issue it is outside of this module. Not used in correlation processing. + Scanner string + RuleID string + Severity string + Filename string + StartLine int + EndLine int + SnippetHash string +} + +// Match groups a known issue with the new issues that correlate to it. +// Match groups a single known issue with the list of new issues that were +// correlated to it. A new issue may appear in multiple Match.New slices if it +// correlates to multiple known issues. +type Match struct { + Known IssueMetadata + New []IssueMetadata +} + +// Correlator accepts slices of new and known issues and can compute correlations +// between them. Every known issue may match multiple new issues and vice versa. +// Correlator accepts slices of new and known issues and computes correlations +// between them. Use NewCorrelator to create an instance and call Process() to +// compute matches. After processing, use Matches(), UnmatchedNew() and +// UnmatchedKnown() to inspect results. The correlator preserves many-to-many +// relationships: a known issue may match multiple new issues and vice versa. +type Correlator struct { + NewIssues []IssueMetadata + KnownIssues []IssueMetadata + + // internal indexes populated by Process() + knownToNew map[int][]int // known index -> list of new indices + newToKnown map[int][]int // new index -> list of known indices + + processed bool +} + +// NewCorrelator creates a Correlator with the provided issues. +// NewCorrelator constructs a Correlator with the provided slices of new and +// known issues. The correlator is inert until Process() is called. +func NewCorrelator(newIssues, knownIssues []IssueMetadata) *Correlator { + return &Correlator{ + NewIssues: newIssues, + KnownIssues: knownIssues, + } +} + +// Process computes correlations between every known and every new issue. +// Correlation strategy (in order): +// 1) If both issues have a non-empty SnippetHash and they are equal => match. +// 2) If Scanner, RuleID, Filename, StartLine and EndLine are all equal => match. +// 3) Fallback: Scanner, RuleID, Filename and StartLine equal => match. +// Process computes correlations between every known and every new issue using +// four ordered stages. Once a known or new issue has been matched in an +// earlier stage it is excluded from later stages. The stages are: +// 1) scanner+ruleid+filename+startline+endline+snippethash +// 2) scanner+ruleid+filename+snippethash +// 3) scanner+ruleid+filename+startline+endline +// 4) scanner+ruleid+filename+startline +// The results are stored internally and can be retrieved via Matches(), +// UnmatchedNew() and UnmatchedKnown(). Process is idempotent. +func (c *Correlator) Process() { + if c.processed { + return + } + c.knownToNew = make(map[int][]int) + c.newToKnown = make(map[int][]int) + + // matchedBefore tracks indices already matched in earlier stages and + // therefore excluded from later stages. matchedThisStage collects items + // matched during the current stage so that multiple matches within the + // same stage are allowed. + matchedKnown := make(map[int]bool) + matchedNew := make(map[int]bool) + + stages := []int{1, 2, 3, 4} + for _, stage := range stages { + matchedKnownThis := make(map[int]bool) + matchedNewThis := make(map[int]bool) + + for ki, k := range c.KnownIssues { + if matchedKnown[ki] { + continue + } + for ni, n := range c.NewIssues { + if matchedNew[ni] { + continue + } + + if matchStage(k, n, stage) { + c.knownToNew[ki] = append(c.knownToNew[ki], ni) + c.newToKnown[ni] = append(c.newToKnown[ni], ki) + matchedKnownThis[ki] = true + matchedNewThis[ni] = true + } + } + } + + // promote this stage's matches to the global matched sets so they are + // excluded from subsequent stages. + for ki := range matchedKnownThis { + matchedKnown[ki] = true + } + for ni := range matchedNewThis { + matchedNew[ni] = true + } + } + + c.processed = true +} + +// matchStage applies the specified stage matching rules. It returns true when +// the two IssueMetadata values should be considered a match for the given +// stage. The function enforces that Scanner and RuleID are present for all +// stages. +// +// Stage details: +// 1: scanner + ruleid + filename + startline + endline + snippethash +// 2: scanner + ruleid + filename + snippethash +// 3: scanner + ruleid + filename + startline + endline +// 4: scanner + ruleid + filename + startline +func matchStage(a, b IssueMetadata, stage int) bool { + // require scanner and ruleid for all stages + if a.Scanner == "" || b.Scanner == "" || a.RuleID == "" || b.RuleID == "" { + return false + } + + if a.Scanner != b.Scanner { + return false + } + + if a.RuleID != b.RuleID { + return false + } + + if a.Filename != b.Filename { + return false + } + + switch stage { + case 1: + return a.StartLine == b.StartLine && a.EndLine == b.EndLine && a.SnippetHash == b.SnippetHash + case 2: + return a.SnippetHash == b.SnippetHash + case 3: + return a.StartLine == b.StartLine && a.EndLine == b.EndLine + case 4: + return a.StartLine == b.StartLine + default: + return false + } +} + +// UnmatchedNew returns new issues that do not have any correlation to known issues. +// UnmatchedNew returns the subset of new issues that were not correlated to +// any known issue after Process() has been executed. If Process() has not +// yet been run it will be invoked. +func (c *Correlator) UnmatchedNew() []IssueMetadata { + if !c.processed { + c.Process() + } + + var out []IssueMetadata + for ni, n := range c.NewIssues { + if len(c.newToKnown[ni]) == 0 { + out = append(out, n) + } + } + return out +} + +// UnmatchedKnown returns known issues that do not have any correlation to new issues. +// UnmatchedKnown returns the subset of known issues that were not correlated +// to any new issue after Process() has been executed. If Process() has not +// yet been run it will be invoked. +func (c *Correlator) UnmatchedKnown() []IssueMetadata { + if !c.processed { + c.Process() + } + + var out []IssueMetadata + for ki, k := range c.KnownIssues { + if len(c.knownToNew[ki]) == 0 { + out = append(out, k) + } + } + return out +} + +// Matches returns a slice of Match entries. Each Match contains one known issue +// and the list of new issues that were correlated to it. A new issue that +// matches multiple known issues will appear under each matching known issue. +// Matches returns a slice of Match entries describing each known issue that +// had at least one correlated new issue. Each Match contains the known issue +// and the list of new issues correlated to it. If Process() has not been run +// it will be invoked. +func (c *Correlator) Matches() []Match { + if !c.processed { + c.Process() + } + + var out []Match + for ki, newIdxs := range c.knownToNew { + if len(newIdxs) == 0 { + continue + } + m := Match{Known: c.KnownIssues[ki], New: make([]IssueMetadata, 0, len(newIdxs))} + for _, ni := range newIdxs { + if ni >= 0 && ni < len(c.NewIssues) { + m.New = append(m.New, c.NewIssues[ni]) + } + } + out = append(out, m) + } + return out +} diff --git a/pkg/issuecorrelation/correlator_test.go b/pkg/issuecorrelation/correlator_test.go new file mode 100644 index 0000000..e0cc018 --- /dev/null +++ b/pkg/issuecorrelation/correlator_test.go @@ -0,0 +1,141 @@ +package issuecorrelation + +import "testing" + +func TestCorrelator_SnippetHashMatch(t *testing.T) { + known := []IssueMetadata{{Scanner: "s1", RuleID: "R1", SnippetHash: "h1"}} + new := []IssueMetadata{{Scanner: "s1", RuleID: "R1", SnippetHash: "h1"}} + + c := NewCorrelator(new, known) + c.Process() + + matches := c.Matches() + if len(matches) != 1 { + t.Fatalf("expected 1 match got %d", len(matches)) + } + if len(matches[0].New) != 1 { + t.Fatalf("expected 1 new in match got %d", len(matches[0].New)) + } + + if got := len(c.UnmatchedNew()); got != 0 { + t.Fatalf("expected 0 unmatched new, got %d", got) + } + if got := len(c.UnmatchedKnown()); got != 0 { + t.Fatalf("expected 0 unmatched known, got %d", got) + } +} + +func TestCorrelator_LineAndRuleMatch(t *testing.T) { + known := []IssueMetadata{{Scanner: "s2", RuleID: "R2", Filename: "f.go", StartLine: 10, EndLine: 12}} + new := []IssueMetadata{{Scanner: "s2", RuleID: "R2", Filename: "f.go", StartLine: 10, EndLine: 12}} + + c := NewCorrelator(new, known) + c.Process() + if len(c.Matches()) != 1 { + t.Fatalf("expected match by lines/rule") + } +} + +func TestCorrelator_Unmatched(t *testing.T) { + known := []IssueMetadata{{Scanner: "s3", RuleID: "R3", Filename: "x.go", StartLine: 1}} + new := []IssueMetadata{{Scanner: "s4", RuleID: "R4", Filename: "y.go", StartLine: 2}} + + c := NewCorrelator(new, known) + c.Process() + + if len(c.UnmatchedNew()) != 1 { + t.Fatalf("expected 1 unmatched new") + } + if len(c.UnmatchedKnown()) != 1 { + t.Fatalf("expected 1 unmatched known") + } + if len(c.Matches()) != 0 { + t.Fatalf("expected 0 matches") + } +} + +func TestCorrelator_SameExceptLines(t *testing.T) { + // known and new have identical scanner, ruleid, filename and snippethash + // but different start and end lines -> should match at stage 2 + known := []IssueMetadata{{Scanner: "s5", RuleID: "R5", Filename: "g.go", StartLine: 10, EndLine: 12, SnippetHash: "sh5"}} + new := []IssueMetadata{{Scanner: "s5", RuleID: "R5", Filename: "g.go", StartLine: 20, EndLine: 22, SnippetHash: "sh5"}} + + c := NewCorrelator(new, known) + c.Process() + + matches := c.Matches() + if len(matches) != 1 { + t.Fatalf("expected 1 match for same metadata except lines, got %d", len(matches)) + } + if len(matches[0].New) != 1 { + t.Fatalf("expected 1 new in match got %d", len(matches[0].New)) + } + if got := len(c.UnmatchedNew()); got != 0 { + t.Fatalf("expected 0 unmatched new, got %d", got) + } + if got := len(c.UnmatchedKnown()); got != 0 { + t.Fatalf("expected 0 unmatched known, got %d", got) + } +} + +func TestCorrelator_KnownPlusSimilarNews(t *testing.T) { + // One known issue. New issues include: + // - an exact issue (same scanner, ruleid, filename, start/end and snippethash) + // - a similar issue (same scanner, ruleid, filename, snippethash but different lines) + // Because stage 1 runs before stage 2 and matches are excluded from later + // stages, the known issue should match only the exact new issue and the + // similar one should remain unmatched. + known := []IssueMetadata{{Scanner: "sx", RuleID: "Rx", Filename: "h.go", StartLine: 5, EndLine: 7, SnippetHash: "shx"}} + new := []IssueMetadata{ + {Scanner: "sx", RuleID: "Rx", Filename: "h.go", StartLine: 5, EndLine: 7, SnippetHash: "shx"}, + {Scanner: "sx", RuleID: "Rx", Filename: "h.go", StartLine: 50, EndLine: 52, SnippetHash: "shx"}, + } + + c := NewCorrelator(new, known) + c.Process() + + matches := c.Matches() + if len(matches) != 1 { + t.Fatalf("expected 1 match for known issue, got %d", len(matches)) + } + if len(matches[0].New) != 1 { + t.Fatalf("expected known to match only the exact new issue, got %d", len(matches[0].New)) + } + + // the similar new issue should be unmatched + unmatchedNew := c.UnmatchedNew() + if len(unmatchedNew) != 1 { + t.Fatalf("expected 1 unmatched new issue (the similar one), got %d", len(unmatchedNew)) + } + if len(c.UnmatchedKnown()) != 0 { + t.Fatalf("expected 0 unmatched known issues, got %d", len(c.UnmatchedKnown())) + } +} + +func TestCorrelator_TwoPairsShiftedLines(t *testing.T) { + // Two known issues. New issues are the same two but with start/end lines + // shifted by +10 and identical SnippetHash. They should match via stage 2. + known := []IssueMetadata{ + {Scanner: "sa", RuleID: "Ra", Filename: "p.go", StartLine: 1, EndLine: 3, SnippetHash: "sha"}, + {Scanner: "sb", RuleID: "Rb", Filename: "q.go", StartLine: 5, EndLine: 8, SnippetHash: "shb"}, + } + new := []IssueMetadata{ + {Scanner: "sa", RuleID: "Ra", Filename: "p.go", StartLine: 11, EndLine: 13, SnippetHash: "sha"}, + {Scanner: "sb", RuleID: "Rb", Filename: "q.go", StartLine: 15, EndLine: 18, SnippetHash: "shb"}, + } + + c := NewCorrelator(new, known) + c.Process() + + matches := c.Matches() + if len(matches) != 2 { + t.Fatalf("expected 2 matches for the two pairs, got %d", len(matches)) + } + + if len(c.UnmatchedNew()) != 0 { + t.Fatalf("expected 0 unmatched new issues, got %d", len(c.UnmatchedNew())) + } + if len(c.UnmatchedKnown()) != 0 { + t.Fatalf("expected 0 unmatched known issues, got %d", len(c.UnmatchedKnown())) + } +} diff --git a/pkg/issuecorrelation/snippet.go b/pkg/issuecorrelation/snippet.go new file mode 100644 index 0000000..a0e11d2 --- /dev/null +++ b/pkg/issuecorrelation/snippet.go @@ -0,0 +1,39 @@ +package issuecorrelation + +import ( + "crypto/sha256" + "fmt" + "os" + "strings" +) + +// ComputeSnippetHash reads the snippet (single line or range) from a local filesystem path +// and returns its SHA256 hex string. Returns empty string on any error or if inputs are invalid. +func ComputeSnippetHash(localPath string, line, endLine int) string { + if strings.TrimSpace(localPath) == "" || line <= 0 { + return "" + } + data, err := os.ReadFile(localPath) + if err != nil { + return "" + } + lines := strings.Split(string(data), "\n") + start := line + end := line + if endLine > line { + end = endLine + } + // Validate bounds (1-based line numbers) + if start < 1 || start > len(lines) { + return "" + } + if end > len(lines) { + end = len(lines) + } + if end < start { + return "" + } + snippet := strings.Join(lines[start-1:end], "\n") + sum := sha256.Sum256([]byte(snippet)) + return fmt.Sprintf("%x", sum[:]) +} diff --git a/pkg/issuecorrelation/snippet_test.go b/pkg/issuecorrelation/snippet_test.go new file mode 100644 index 0000000..4486970 --- /dev/null +++ b/pkg/issuecorrelation/snippet_test.go @@ -0,0 +1,254 @@ +package issuecorrelation + +import ( + "crypto/sha256" + "fmt" + "os" + "path/filepath" + "testing" +) + +func TestComputeSnippetHash(t *testing.T) { + // Create a temporary directory for test files + tempDir, err := os.MkdirTemp("", "snippet_hash_test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create test files with known content + testFileContent := `line 1 +line 2 +line 3 +line 4 +line 5` + + testFilePath := filepath.Join(tempDir, "test.txt") + err = os.WriteFile(testFilePath, []byte(testFileContent), 0644) + if err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + + // Create another test file with different content + singleLineContent := "single line content" + singleLineFilePath := filepath.Join(tempDir, "single.txt") + err = os.WriteFile(singleLineFilePath, []byte(singleLineContent), 0644) + if err != nil { + t.Fatalf("Failed to create single line test file: %v", err) + } + + // Create empty file + emptyFilePath := filepath.Join(tempDir, "empty.txt") + err = os.WriteFile(emptyFilePath, []byte(""), 0644) + if err != nil { + t.Fatalf("Failed to create empty test file: %v", err) + } + + // Helper function to compute expected hash + computeExpectedHash := func(content string) string { + sum := sha256.Sum256([]byte(content)) + return fmt.Sprintf("%x", sum[:]) + } + + tests := []struct { + name string + localPath string + line int + endLine int + expected string + }{ + // Valid cases + { + name: "single line from middle", + localPath: testFilePath, + line: 2, + endLine: 2, + expected: computeExpectedHash("line 2"), + }, + { + name: "multiple lines range", + localPath: testFilePath, + line: 2, + endLine: 4, + expected: computeExpectedHash("line 2\nline 3\nline 4"), + }, + { + name: "first line only", + localPath: testFilePath, + line: 1, + endLine: 1, + expected: computeExpectedHash("line 1"), + }, + { + name: "last line only", + localPath: testFilePath, + line: 5, + endLine: 5, + expected: computeExpectedHash("line 5"), + }, + { + name: "entire file", + localPath: testFilePath, + line: 1, + endLine: 5, + expected: computeExpectedHash(testFileContent), + }, + { + name: "single line file", + localPath: singleLineFilePath, + line: 1, + endLine: 1, + expected: computeExpectedHash(singleLineContent), + }, + { + name: "endLine same as line (no range)", + localPath: testFilePath, + line: 3, + endLine: 3, + expected: computeExpectedHash("line 3"), + }, + { + name: "endLine less than line (should use single line)", + localPath: testFilePath, + line: 3, + endLine: 2, + expected: computeExpectedHash("line 3"), + }, + + // Edge cases that should return empty string + { + name: "empty path", + localPath: "", + line: 1, + endLine: 1, + expected: "", + }, + { + name: "zero line number", + localPath: testFilePath, + line: 0, + endLine: 1, + expected: "", + }, + { + name: "negative line number", + localPath: testFilePath, + line: -1, + endLine: 1, + expected: "", + }, + { + name: "line number beyond file length", + localPath: testFilePath, + line: 10, + endLine: 10, + expected: "", + }, + { + name: "file does not exist", + localPath: filepath.Join(tempDir, "nonexistent.txt"), + line: 1, + endLine: 1, + expected: "", + }, + + // Boundary cases + { + name: "endLine beyond file length (should clamp)", + localPath: testFilePath, + line: 4, + endLine: 10, + expected: computeExpectedHash("line 4\nline 5"), + }, + { + name: "empty file", + localPath: emptyFilePath, + line: 1, + endLine: 1, + expected: computeExpectedHash(""), // Empty file has one empty line + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := ComputeSnippetHash(tt.localPath, tt.line, tt.endLine) + if result != tt.expected { + t.Errorf("ComputeSnippetHash(%q, %d, %d) = %q, want %q", + tt.localPath, tt.line, tt.endLine, result, tt.expected) + } + }) + } +} + +// TestComputeSnippetHash_DifferentContentDifferentHash tests that different +// content produces different hashes +func TestComputeSnippetHash_DifferentContentDifferentHash(t *testing.T) { + // Create a temporary directory for test files + tempDir, err := os.MkdirTemp("", "snippet_hash_different_test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create two files with different content + file1Path := filepath.Join(tempDir, "file1.txt") + file2Path := filepath.Join(tempDir, "file2.txt") + + err = os.WriteFile(file1Path, []byte("content A"), 0644) + if err != nil { + t.Fatalf("Failed to create file1: %v", err) + } + + err = os.WriteFile(file2Path, []byte("content B"), 0644) + if err != nil { + t.Fatalf("Failed to create file2: %v", err) + } + + hash1 := ComputeSnippetHash(file1Path, 1, 1) + hash2 := ComputeSnippetHash(file2Path, 1, 1) + + if hash1 == hash2 { + t.Errorf("Different content produced same hash: %q", hash1) + } + + if hash1 == "" || hash2 == "" { + t.Error("One or both hashes were empty") + } +} + +// TestComputeSnippetHash_SameContentSameHash tests that identical content +// produces identical hashes regardless of file name +func TestComputeSnippetHash_SameContentSameHash(t *testing.T) { + // Create a temporary directory for test files + tempDir, err := os.MkdirTemp("", "snippet_hash_same_test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create two files with identical content but different names + content := "identical content\nline 2\nline 3" + file1Path := filepath.Join(tempDir, "identical1.txt") + file2Path := filepath.Join(tempDir, "identical2.txt") + + err = os.WriteFile(file1Path, []byte(content), 0644) + if err != nil { + t.Fatalf("Failed to create file1: %v", err) + } + + err = os.WriteFile(file2Path, []byte(content), 0644) + if err != nil { + t.Fatalf("Failed to create file2: %v", err) + } + + hash1 := ComputeSnippetHash(file1Path, 1, 2) + hash2 := ComputeSnippetHash(file2Path, 1, 2) + + if hash1 != hash2 { + t.Errorf("Identical content produced different hashes: %q vs %q", hash1, hash2) + } + + if hash1 == "" { + t.Error("Hash was empty for valid content") + } +} diff --git a/pkg/shared/ivcs.go b/pkg/shared/ivcs.go index fe9e4fb..2f299bf 100644 --- a/pkg/shared/ivcs.go +++ b/pkg/shared/ivcs.go @@ -36,6 +36,18 @@ type PRParams struct { UpdatedDate int64 `json:"updated_date"` } +// IssueParams holds the details of an issue. +type IssueParams struct { + Number int `json:"number"` + Title string `json:"title"` + Body string `json:"body,omitempty"` + State string `json:"state"` + Author User `json:"author"` + URL string `json:"url"` + CreatedDate int64 `json:"created_date"` + UpdatedDate int64 `json:"updated_date"` +} + // User holds the details of a user. type User struct { UserName string `json:"user_name"` @@ -97,6 +109,35 @@ type VCSSetStatusOfPRRequest struct { Comment string `json:"comment"` } +// VCSIssueCreationRequest represents a request to create a new issue. +type VCSIssueCreationRequest struct { + VCSRequestBase + Title string `json:"title"` + Body string `json:"body"` + // Logins for Users to assign to this issue. + // github supports multiple assignees + Assignees []string `json:"assignees,omitempty"` + // Labels is an optional list of label names to attach to the created issue. + // Not all VCS providers support labels; providers that don't will ignore this field. + Labels []string `json:"labels,omitempty"` +} + +// VCSIssueUpdateRequest represents a request to update an existing issue. +type VCSIssueUpdateRequest struct { + VCSRequestBase + Number int `json:"number"` + Title string `json:"title"` + Body string `json:"body"` + State string `json:"state"` // optional: "open" or "closed" +} + +// VCSCreateIssueCommentRequest represents a request to create a comment on an issue. +type VCSCreateIssueCommentRequest struct { + VCSRequestBase + Number int `json:"number"` + Body string `json:"body"` +} + // VCSAddCommentToPRRequest represents a request to add a comment to a PR. type VCSAddCommentToPRRequest struct { VCSRequestBase @@ -104,6 +145,13 @@ type VCSAddCommentToPRRequest struct { FilePaths []string `json:"file_paths"` } +// VCSListIssuesRequest represents a request to list issues in a repository. +type VCSListIssuesRequest struct { + VCSRequestBase + State string `json:"state"` // open, closed, all; default open + BodyFilter string `json:"body_filter"` // optional: filter issues by body content (substring match) +} + // ListFuncResult holds the result of a list function. type ListFuncResult struct { Args VCSListRepositoriesRequest `json:"args"` @@ -127,6 +175,11 @@ type VCSRetrievePRInformationResponse struct { PR PRParams `json:"pr"` } +// VCSListIssuesResponse represents a response from listing issues. +type VCSListIssuesResponse struct { + Issues []IssueParams `json:"issues"` +} + // VCS defines the interface for VCS-related operations. type VCS interface { Setup(configData config.Config) (bool, error) @@ -136,6 +189,10 @@ type VCS interface { AddRoleToPR(req VCSAddRoleToPRRequest) (bool, error) SetStatusOfPR(req VCSSetStatusOfPRRequest) (bool, error) AddCommentToPR(req VCSAddCommentToPRRequest) (bool, error) + CreateIssue(req VCSIssueCreationRequest) (int, error) + ListIssues(req VCSListIssuesRequest) ([]IssueParams, error) + UpdateIssue(req VCSIssueUpdateRequest) (bool, error) + CreateIssueComment(req VCSCreateIssueCommentRequest) (bool, error) } // VCSRPCClient implements the VCS interface for RPC clients. @@ -213,6 +270,46 @@ func (c *VCSRPCClient) AddCommentToPR(req VCSAddCommentToPRRequest) (bool, error return resp, nil } +// CreateIssue calls the CreateIssue method on the RPC client. +func (c *VCSRPCClient) CreateIssue(req VCSIssueCreationRequest) (int, error) { + var resp int + err := c.client.Call("Plugin.CreateIssue", req, &resp) + if err != nil { + return 0, fmt.Errorf("RPC client CreateIssue call failed: %w", err) + } + return resp, nil +} + +// ListIssues calls the ListIssues method on the RPC client. +func (c *VCSRPCClient) ListIssues(req VCSListIssuesRequest) ([]IssueParams, error) { + var resp VCSListIssuesResponse + err := c.client.Call("Plugin.ListIssues", req, &resp) + if err != nil { + return nil, fmt.Errorf("RPC client ListIssues call failed: %w", err) + } + return resp.Issues, nil +} + +// UpdateIssue calls the UpdateIssue method on the RPC client. +func (c *VCSRPCClient) UpdateIssue(req VCSIssueUpdateRequest) (bool, error) { + var resp bool + err := c.client.Call("Plugin.UpdateIssue", req, &resp) + if err != nil { + return false, fmt.Errorf("RPC client UpdateIssue call failed: %w", err) + } + return resp, nil +} + +// CreateIssueComment calls the CreateIssueComment method on the RPC client. +func (c *VCSRPCClient) CreateIssueComment(req VCSCreateIssueCommentRequest) (bool, error) { + var resp bool + err := c.client.Call("Plugin.CreateIssueComment", req, &resp) + if err != nil { + return false, fmt.Errorf("RPC client CreateIssueComment call failed: %w", err) + } + return resp, nil +} + // VCSRPCServer wraps a VCS implementation to provide an RPC server. type VCSRPCServer struct { Impl VCS @@ -288,6 +385,46 @@ func (s *VCSRPCServer) AddCommentToPR(args VCSAddCommentToPRRequest, resp *bool) return err } +// CreateIssue calls the CreateIssue method on the VCS implementation. +func (s *VCSRPCServer) CreateIssue(args VCSIssueCreationRequest, resp *int) error { + var err error + *resp, err = s.Impl.CreateIssue(args) + if err != nil { + return fmt.Errorf("VCS CreateIssue failed: %w", err) + } + return nil +} + +// ListIssues calls the ListIssues method on the VCS implementation. +func (s *VCSRPCServer) ListIssues(args VCSListIssuesRequest, resp *VCSListIssuesResponse) error { + issues, err := s.Impl.ListIssues(args) + if err != nil { + return fmt.Errorf("VCS ListIssues failed: %w", err) + } + resp.Issues = issues + return nil +} + +// UpdateIssue calls the UpdateIssue method on the VCS implementation. +func (s *VCSRPCServer) UpdateIssue(args VCSIssueUpdateRequest, resp *bool) error { + var err error + *resp, err = s.Impl.UpdateIssue(args) + if err != nil { + return fmt.Errorf("VCS UpdateIssue failed: %w", err) + } + return nil +} + +// CreateIssueComment calls the CreateIssueComment method on the VCS implementation. +func (s *VCSRPCServer) CreateIssueComment(args VCSCreateIssueCommentRequest, resp *bool) error { + var err error + *resp, err = s.Impl.CreateIssueComment(args) + if err != nil { + return fmt.Errorf("VCS CreateIssueComment failed: %w", err) + } + return nil +} + // VCSPlugin is the implementation of the plugin.Plugin interface for VCS. type VCSPlugin struct { Impl VCS diff --git a/plugins/bitbucket/bitbucket.go b/plugins/bitbucket/bitbucket.go index ec243b7..d6af869 100644 --- a/plugins/bitbucket/bitbucket.go +++ b/plugins/bitbucket/bitbucket.go @@ -252,6 +252,30 @@ func (g *VCSBitbucket) AddCommentToPR(args shared.VCSAddCommentToPRRequest) (boo return true, nil } +// CreateIssue is not implemented for Bitbucket yet. Added to satisfy the VCS interface. +func (g *VCSBitbucket) CreateIssue(args shared.VCSIssueCreationRequest) (int, error) { + g.logger.Error("CreateIssue not implemented for Bitbucket", "repo", fmt.Sprintf("%s/%s", args.RepoParam.Namespace, args.RepoParam.Repository)) + return 0, fmt.Errorf("CreateIssue not implemented for Bitbucket") +} + +// ListIssues is not implemented for Bitbucket yet. Added to satisfy the VCS interface. +func (g *VCSBitbucket) ListIssues(args shared.VCSListIssuesRequest) ([]shared.IssueParams, error) { + g.logger.Error("ListIssues not implemented for Bitbucket", "repo", fmt.Sprintf("%s/%s", args.RepoParam.Namespace, args.RepoParam.Repository)) + return nil, fmt.Errorf("ListIssues not implemented for Bitbucket") +} + +// UpdateIssue is not implemented for Bitbucket yet. Added to satisfy the VCS interface. +func (g *VCSBitbucket) UpdateIssue(args shared.VCSIssueUpdateRequest) (bool, error) { + g.logger.Error("UpdateIssue not implemented for Bitbucket", "repo", fmt.Sprintf("%s/%s", args.RepoParam.Namespace, args.RepoParam.Repository), "number", args.Number) + return false, fmt.Errorf("UpdateIssue not implemented for Bitbucket") +} + +// CreateIssueComment is not implemented for Bitbucket yet. Added to satisfy the VCS interface. +func (g *VCSBitbucket) CreateIssueComment(args shared.VCSCreateIssueCommentRequest) (bool, error) { + g.logger.Error("CreateIssueComment not implemented for Bitbucket", "repo", fmt.Sprintf("%s/%s", args.RepoParam.Namespace, args.RepoParam.Repository), "number", args.Number) + return false, fmt.Errorf("CreateIssueComment not implemented for Bitbucket") +} + // fetchPR handles fetching pull request changes. func (g *VCSBitbucket) fetchPR(args *shared.VCSFetchRequest) (string, error) { g.logger.Info("handling PR changes fetching") diff --git a/plugins/github/github.go b/plugins/github/github.go index d157680..1d575d8 100644 --- a/plugins/github/github.go +++ b/plugins/github/github.go @@ -39,6 +39,76 @@ type VCSGithub struct { name string } +// CreateIssueComment creates a new comment on an existing GitHub issue. +func (g *VCSGithub) CreateIssueComment(args shared.VCSCreateIssueCommentRequest) (bool, error) { + // Basic validation + if strings.TrimSpace(args.RepoParam.Namespace) == "" || strings.TrimSpace(args.RepoParam.Repository) == "" { + return false, fmt.Errorf("namespace and repository are required") + } + if args.Number <= 0 { + return false, fmt.Errorf("valid issue number is required") + } + if strings.TrimSpace(args.Body) == "" { + return false, fmt.Errorf("comment body is required") + } + + client, err := g.initializeGithubClient() + if err != nil { + return false, fmt.Errorf("failed to initialize GitHub client: %w", err) + } + + comment := &github.IssueComment{Body: github.String(args.Body)} + _, _, err = client.Issues.CreateComment(context.Background(), args.RepoParam.Namespace, args.RepoParam.Repository, args.Number, comment) + if err != nil { + return false, fmt.Errorf("failed to create issue comment: %w", err) + } + + return true, nil +} + +// UpdateIssue updates an existing GitHub issue's title and/or body. +func (g *VCSGithub) UpdateIssue(args shared.VCSIssueUpdateRequest) (bool, error) { + // Basic validation + if args.RepoParam.Namespace == "" || args.RepoParam.Repository == "" { + return false, fmt.Errorf("namespace and repository are required") + } + if args.Number <= 0 { + return false, fmt.Errorf("valid issue number is required") + } + + client, err := g.initializeGithubClient() + if err != nil { + return false, fmt.Errorf("failed to initialize GitHub client: %w", err) + } + + req := &github.IssueRequest{} + if strings.TrimSpace(args.Title) != "" { + req.Title = github.String(args.Title) + } + if strings.TrimSpace(args.Body) != "" { + req.Body = github.String(args.Body) + } + if s := strings.ToLower(strings.TrimSpace(args.State)); s != "" { + switch s { + case "open", "closed": + req.State = github.String(s) + default: + return false, fmt.Errorf("invalid state: %s (allowed: open, closed)", args.State) + } + } + + if req.Title == nil && req.Body == nil && req.State == nil { + return false, fmt.Errorf("nothing to update: provide title, body and/or state") + } + + _, _, err = client.Issues.Edit(context.Background(), args.RepoParam.Namespace, args.RepoParam.Repository, args.Number, req) + if err != nil { + return false, fmt.Errorf("failed to update GitHub issue: %w", err) + } + + return true, nil +} + // newVCSGithub creates a new instance of VCSGithub. func newVCSGithub(logger hclog.Logger) *VCSGithub { return &VCSGithub{ @@ -428,6 +498,110 @@ func (g *VCSGithub) Fetch(args shared.VCSFetchRequest) (shared.VCSFetchResponse, return result, nil } +// CreateIssue creates a new GitHub issue using the provided request. +// +// Returns: +// - The number of the created issue +// - An error if the issue creation fails +func (g *VCSGithub) CreateIssue(args shared.VCSIssueCreationRequest) (int, error) { + client, err := g.initializeGithubClient() + if err != nil { + return 0, fmt.Errorf("failed to initialize GitHub client: %w", err) + } + + issue := &github.IssueRequest{ + Title: github.String(args.Title), + Body: github.String(args.Body), + } + + // If labels are provided, attach them to the issue request + if len(args.Labels) > 0 { + issue.Labels = &args.Labels + } + + // If assignees are provided, attach them to the issue request + if len(args.Assignees) > 0 { + issue.Assignees = &args.Assignees + } + + ctx := context.Background() + createdIssue, _, err := client.Issues.Create(ctx, args.RepoParam.Namespace, args.RepoParam.Repository, issue) + if err != nil { + return 0, fmt.Errorf("failed to create GitHub issue: %w", err) + } + + return createdIssue.GetNumber(), nil +} + +// ListIssues lists issues for a repository. +// Supports optional state filter: "open", "closed", or "all" (default: "open"). +func (g *VCSGithub) ListIssues(args shared.VCSListIssuesRequest) ([]shared.IssueParams, error) { + client, err := g.initializeGithubClient() + if err != nil { + return nil, fmt.Errorf("failed to initialize GitHub client: %w", err) + } + + state := strings.ToLower(strings.TrimSpace(args.State)) + switch state { + case "", "open", "closed", "all": + if state == "" { + state = "open" + } + default: + return nil, fmt.Errorf("invalid state: %s (allowed: open, closed, all)", args.State) + } + + opt := &github.IssueListByRepoOptions{ + State: state, + ListOptions: github.ListOptions{PerPage: 100, Page: 1}, + } + + var all []*github.Issue + for { + issues, resp, err := client.Issues.ListByRepo(context.Background(), args.RepoParam.Namespace, args.RepoParam.Repository, opt) + if err != nil { + return nil, fmt.Errorf("failed to list issues: %w", err) + } + all = append(all, issues...) + if resp == nil || resp.NextPage == 0 { + break + } + opt.Page = resp.NextPage + } + + // Filter out pull requests and convert to shared type + var result []shared.IssueParams + for _, it := range all { + if it == nil || it.PullRequestLinks != nil { // skip PRs + continue + } + result = append(result, convertToIssueParams(it)) + } + + // Apply body filter if provided + if args.BodyFilter != "" { + result = filterIssuesByBody(result, args.BodyFilter) + } + + return result, nil +} + +// filterIssuesByBody filters a slice of issues by body content using substring matching. +// Returns only issues whose body contains the specified filter text. +func filterIssuesByBody(issues []shared.IssueParams, bodyFilter string) []shared.IssueParams { + if bodyFilter == "" { + return issues + } + + var filtered []shared.IssueParams + for _, issue := range issues { + if strings.Contains(issue.Body, bodyFilter) { + filtered = append(filtered, issue) + } + } + return filtered +} + // Setup initializes the global configuration for the VCSGithub instance. func (g *VCSGithub) Setup(configData config.Config) (bool, error) { g.setGlobalConfig(&configData) @@ -439,6 +613,7 @@ func (g *VCSGithub) Setup(configData config.Config) (bool, error) { } func main() { + logger := hclog.New(&hclog.LoggerOptions{ Level: hclog.Trace, Output: os.Stderr, diff --git a/plugins/github/github_test.go b/plugins/github/github_test.go new file mode 100644 index 0000000..842ac2d --- /dev/null +++ b/plugins/github/github_test.go @@ -0,0 +1,95 @@ +package main + +import ( + "strings" + "testing" + + "github.com/scan-io-git/scan-io/pkg/shared" +) + +func TestFilterIssuesByBody(t *testing.T) { + tests := []struct { + name string + bodyFilter string + issues []shared.IssueParams + expected int + }{ + { + name: "no filter - return all issues", + bodyFilter: "", + issues: []shared.IssueParams{ + {Number: 1, Body: "Regular issue"}, + {Number: 2, Body: "Scanio managed issue\n> [!NOTE]\n> This issue was created and will be managed by scanio automation"}, + {Number: 3, Body: "Another regular issue"}, + }, + expected: 3, + }, + { + name: "filter for scanio managed issues", + bodyFilter: "> [!NOTE]\n> This issue was created and will be managed by scanio automation", + issues: []shared.IssueParams{ + {Number: 1, Body: "Regular issue"}, + {Number: 2, Body: "Scanio managed issue\n> [!NOTE]\n> This issue was created and will be managed by scanio automation"}, + {Number: 3, Body: "Another regular issue"}, + }, + expected: 1, + }, + { + name: "filter with partial match", + bodyFilter: "scanio automation", + issues: []shared.IssueParams{ + {Number: 1, Body: "Regular issue"}, + {Number: 2, Body: "Scanio managed issue\n> [!NOTE]\n> This issue was created and will be managed by scanio automation"}, + {Number: 3, Body: "Another regular issue"}, + }, + expected: 1, + }, + { + name: "filter with no matches", + bodyFilter: "nonexistent text", + issues: []shared.IssueParams{ + {Number: 1, Body: "Regular issue"}, + {Number: 2, Body: "Another regular issue"}, + }, + expected: 0, + }, + { + name: "filter with multiple matches", + bodyFilter: "issue", + issues: []shared.IssueParams{ + {Number: 1, Body: "Regular issue"}, + {Number: 2, Body: "Another issue"}, + {Number: 3, Body: "Not a problem"}, + }, + expected: 2, + }, + { + name: "case sensitive filtering", + bodyFilter: "Issue", + issues: []shared.IssueParams{ + {Number: 1, Body: "Regular issue"}, + {Number: 2, Body: "Another Issue"}, + }, + expected: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := filterIssuesByBody(tt.issues, tt.bodyFilter) + + if len(result) != tt.expected { + t.Errorf("expected %d issues, got %d", tt.expected, len(result)) + } + + // Verify that the correct issues are returned + if tt.bodyFilter != "" { + for _, issue := range result { + if !strings.Contains(issue.Body, tt.bodyFilter) { + t.Errorf("issue %d does not contain filter text: %s", issue.Number, tt.bodyFilter) + } + } + } + }) + } +} diff --git a/plugins/github/utils.go b/plugins/github/utils.go index cb81fe1..47926dd 100644 --- a/plugins/github/utils.go +++ b/plugins/github/utils.go @@ -33,6 +33,24 @@ func safeTime(t *time.Time) int64 { return t.Unix() } +// convertToIssueParams converts a GitHub Issue object to shared.IssueParams. +func convertToIssueParams(iss *github.Issue) shared.IssueParams { + if iss == nil { + return shared.IssueParams{} + } + + return shared.IssueParams{ + Number: safeInt(iss.Number), + Title: safeString(iss.Title), + Body: safeString(iss.Body), + State: safeString(iss.State), + Author: safeUser(iss.User), + URL: safeString(iss.HTMLURL), + CreatedDate: safeTime(iss.CreatedAt), + UpdatedDate: safeTime(iss.UpdatedAt), + } +} + // safeUser converts a GitHub user to a shared.User, handling nil safely. func safeUser(user *github.User) shared.User { if user == nil || user.Login == nil { diff --git a/plugins/gitlab/gitlab.go b/plugins/gitlab/gitlab.go index e5d2fd4..e7ae544 100644 --- a/plugins/gitlab/gitlab.go +++ b/plugins/gitlab/gitlab.go @@ -405,6 +405,30 @@ func (g *VCSGitlab) AddCommentToPR(args shared.VCSAddCommentToPRRequest) (bool, return true, nil } +// CreateIssue is not implemented for GitLab yet. Added to satisfy the VCS interface. +func (g *VCSGitlab) CreateIssue(args shared.VCSIssueCreationRequest) (int, error) { + g.logger.Error("CreateIssue not implemented for GitLab", "repo", fmt.Sprintf("%s/%s", args.RepoParam.Namespace, args.RepoParam.Repository)) + return 0, fmt.Errorf("CreateIssue not implemented for GitLab") +} + +// ListIssues is not implemented for GitLab yet. Added to satisfy the VCS interface. +func (g *VCSGitlab) ListIssues(args shared.VCSListIssuesRequest) ([]shared.IssueParams, error) { + g.logger.Error("ListIssues not implemented for GitLab", "repo", fmt.Sprintf("%s/%s", args.RepoParam.Namespace, args.RepoParam.Repository)) + return nil, fmt.Errorf("ListIssues not implemented for GitLab") +} + +// UpdateIssue is not implemented for GitLab yet. Added to satisfy the VCS interface. +func (g *VCSGitlab) UpdateIssue(args shared.VCSIssueUpdateRequest) (bool, error) { + g.logger.Error("UpdateIssue not implemented for GitLab", "repo", fmt.Sprintf("%s/%s", args.RepoParam.Namespace, args.RepoParam.Repository), "number", args.Number) + return false, fmt.Errorf("UpdateIssue not implemented for GitLab") +} + +// CreateIssueComment is not implemented for GitLab yet. Added to satisfy the VCS interface. +func (g *VCSGitlab) CreateIssueComment(args shared.VCSCreateIssueCommentRequest) (bool, error) { + g.logger.Error("CreateIssueComment not implemented for GitLab", "repo", fmt.Sprintf("%s/%s", args.RepoParam.Namespace, args.RepoParam.Repository), "number", args.Number) + return false, fmt.Errorf("CreateIssueComment not implemented for GitLab") +} + // buildCommentWithAttachments constructs the full comment text with file attachments. func (g *VCSGitlab) buildCommentWithAttachments(client *gitlab.Client, projectID int, comment string, filePaths []string) (string, error) { var attachmentsText strings.Builder