From dac2098e890a001f7e1a3b09c54dfb5ac9cddd32 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Thu, 21 Aug 2025 09:52:10 +0200 Subject: [PATCH 01/52] feat: introduce CreateIssue vcs plugin method, and create-issue cmd --- VERSION | 2 +- cmd/create-issue/create-issue.go | 97 ++++++++++++++++++++++++++++++++ cmd/root.go | 5 +- pkg/shared/ivcs.go | 28 +++++++++ plugins/bitbucket/bitbucket.go | 6 ++ plugins/github/VERSION | 2 +- plugins/github/github.go | 42 ++++++++++++++ plugins/gitlab/gitlab.go | 6 ++ 8 files changed, 185 insertions(+), 3 deletions(-) create mode 100644 cmd/create-issue/create-issue.go diff --git a/VERSION b/VERSION index 085e3baf..ccd4aa6e 100644 --- a/VERSION +++ b/VERSION @@ -1,3 +1,3 @@ { - "version": "0.3.0" + "version": "0.3.1" } \ No newline at end of file diff --git a/cmd/create-issue/create-issue.go b/cmd/create-issue/create-issue.go new file mode 100644 index 00000000..6f80578a --- /dev/null +++ b/cmd/create-issue/create-issue.go @@ -0,0 +1,97 @@ +package createissue + +import ( + "fmt" + + "github.com/spf13/cobra" + + "github.com/scan-io-git/scan-io/pkg/shared" + "github.com/scan-io-git/scan-io/pkg/shared/config" + "github.com/scan-io-git/scan-io/pkg/shared/errors" + "github.com/scan-io-git/scan-io/pkg/shared/logger" +) + +// RunOptions holds flags for the create-issue command. +type RunOptions struct { + Namespace string `json:"namespace,omitempty"` + Repository string `json:"repository,omitempty"` + Title string `json:"title,omitempty"` + Body string `json:"body,omitempty"` +} + +var ( + AppConfig *config.Config + opts RunOptions + + // CreateIssueCmd represents the command to create a GitHub issue. + CreateIssueCmd = &cobra.Command{ + Use: "create-issue --namespace NAMESPACE --repository REPO --title TITLE [--body BODY]", + Short: "Create a GitHub issue (minimal command)", + SilenceUsage: true, + DisableFlagsInUseLine: true, + RunE: func(cmd *cobra.Command, args []string) error { + if len(args) == 0 && !shared.HasFlags(cmd.Flags()) { + return cmd.Help() + } + + if err := validate(&opts); err != nil { + return errors.NewCommandError(opts, nil, err, 1) + } + + lg := logger.NewLogger(AppConfig, "create-issue") + + // Build request for VCS plugin + req := shared.VCSIssueCreationRequest{ + VCSRequestBase: shared.VCSRequestBase{ + RepoParam: shared.RepositoryParams{ + Namespace: opts.Namespace, + Repository: opts.Repository, + }, + Action: "createIssue", + }, + Title: opts.Title, + Body: opts.Body, + } + + var createdIssueNumber int + err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { + vcs, ok := raw.(shared.VCS) + if !ok { + return fmt.Errorf("invalid VCS plugin type") + } + num, err := vcs.CreateIssue(req) + if err != nil { + return err + } + createdIssueNumber = num + return nil + }) + if err != nil { + lg.Error("failed to create issue via plugin", "error", err) + return errors.NewCommandError(opts, nil, fmt.Errorf("create issue failed: %w", err), 2) + } + + lg.Info("issue created", "number", createdIssueNumber) + fmt.Printf("Created issue #%d\n", createdIssueNumber) + return nil + }, + } +) + +// Init wires config into this command. +func Init(cfg *config.Config) { AppConfig = cfg } + +func init() { + CreateIssueCmd.Flags().StringVar(&opts.Namespace, "namespace", "", "GitHub org/user") + CreateIssueCmd.Flags().StringVar(&opts.Repository, "repository", "", "Repository name") + CreateIssueCmd.Flags().StringVar(&opts.Title, "title", "", "Issue title") + CreateIssueCmd.Flags().StringVar(&opts.Body, "body", "", "Issue body") + CreateIssueCmd.Flags().BoolP("help", "h", false, "Show help for create-issue command.") +} + +func validate(o *RunOptions) error { + if o.Namespace == "" { return fmt.Errorf("--namespace is required") } + if o.Repository == "" { return fmt.Errorf("--repository is required") } + if o.Title == "" { return fmt.Errorf("--title is required") } + return nil +} diff --git a/cmd/root.go b/cmd/root.go index ee8ee381..4fe12dc0 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -8,8 +8,9 @@ import ( "github.com/spf13/cobra" "github.com/scan-io-git/scan-io/cmd/analyse" + createissue "github.com/scan-io-git/scan-io/cmd/create-issue" "github.com/scan-io-git/scan-io/cmd/fetch" - "github.com/scan-io-git/scan-io/cmd/integration-vcs" + integrationvcs "github.com/scan-io-git/scan-io/cmd/integration-vcs" "github.com/scan-io-git/scan-io/cmd/list" "github.com/scan-io-git/scan-io/cmd/version" "github.com/scan-io-git/scan-io/pkg/shared" @@ -70,6 +71,7 @@ func initConfig() { fetch.Init(AppConfig) analyse.Init(AppConfig) integrationvcs.Init(AppConfig) + createissue.Init(AppConfig) version.Init(AppConfig) } @@ -81,6 +83,7 @@ func init() { rootCmd.AddCommand(fetch.FetchCmd) rootCmd.AddCommand(analyse.AnalyseCmd) rootCmd.AddCommand(integrationvcs.IntegrationVCSCmd) + rootCmd.AddCommand(createissue.CreateIssueCmd) rootCmd.AddCommand(version.NewVersionCmd()) // rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file") } diff --git a/pkg/shared/ivcs.go b/pkg/shared/ivcs.go index 615ce224..a56b1d8d 100644 --- a/pkg/shared/ivcs.go +++ b/pkg/shared/ivcs.go @@ -89,6 +89,13 @@ type VCSSetStatusOfPRRequest struct { Comment string `json:"comment"` } +// VCSIssueCreationRequest represents a request to create a new issue. +type VCSIssueCreationRequest struct { + VCSRequestBase + Title string `json:"title"` + Body string `json:"body"` +} + // VCSAddCommentToPRRequest represents a request to add a comment to a PR. type VCSAddCommentToPRRequest struct { VCSRequestBase @@ -128,6 +135,7 @@ type VCS interface { AddRoleToPR(req VCSAddRoleToPRRequest) (bool, error) SetStatusOfPR(req VCSSetStatusOfPRRequest) (bool, error) AddCommentToPR(req VCSAddCommentToPRRequest) (bool, error) + CreateIssue(req VCSIssueCreationRequest) (int, error) } // VCSRPCClient implements the VCS interface for RPC clients. @@ -205,6 +213,16 @@ func (c *VCSRPCClient) AddCommentToPR(req VCSAddCommentToPRRequest) (bool, error return resp, nil } +// CreateIssue calls the CreateIssue method on the RPC client. +func (c *VCSRPCClient) CreateIssue(req VCSIssueCreationRequest) (int, error) { + var resp int + err := c.client.Call("Plugin.CreateIssue", req, &resp) + if err != nil { + return 0, fmt.Errorf("RPC client CreateIssue call failed: %w", err) + } + return resp, nil +} + // VCSRPCServer wraps a VCS implementation to provide an RPC server. type VCSRPCServer struct { Impl VCS @@ -280,6 +298,16 @@ func (s *VCSRPCServer) AddCommentToPR(args VCSAddCommentToPRRequest, resp *bool) return err } +// CreateIssue calls the CreateIssue method on the VCS implementation. +func (s *VCSRPCServer) CreateIssue(args VCSIssueCreationRequest, resp *int) error { + var err error + *resp, err = s.Impl.CreateIssue(args) + if err != nil { + return fmt.Errorf("VCS CreateIssue failed: %w", err) + } + return nil +} + // VCSPlugin is the implementation of the plugin.Plugin interface for VCS. type VCSPlugin struct { Impl VCS diff --git a/plugins/bitbucket/bitbucket.go b/plugins/bitbucket/bitbucket.go index c8664f4f..1d055848 100644 --- a/plugins/bitbucket/bitbucket.go +++ b/plugins/bitbucket/bitbucket.go @@ -247,6 +247,12 @@ func (g *VCSBitbucket) AddCommentToPR(args shared.VCSAddCommentToPRRequest) (boo return true, nil } +// CreateIssue is not implemented for Bitbucket yet. Added to satisfy the VCS interface. +func (g *VCSBitbucket) CreateIssue(args shared.VCSIssueCreationRequest) (int, error) { + g.logger.Error("CreateIssue not implemented for Bitbucket", "repo", fmt.Sprintf("%s/%s", args.RepoParam.Namespace, args.RepoParam.Repository)) + return 0, fmt.Errorf("CreateIssue not implemented for Bitbucket") +} + // fetchPR handles fetching pull request changes. func (g *VCSBitbucket) fetchPR(args *shared.VCSFetchRequest) (string, error) { g.logger.Info("handling PR changes fetching") diff --git a/plugins/github/VERSION b/plugins/github/VERSION index bf08a8b3..038a94ad 100644 --- a/plugins/github/VERSION +++ b/plugins/github/VERSION @@ -1,4 +1,4 @@ { - "version": "0.1.0", + "version": "0.1.1", "plugin_type": "vcs" } \ No newline at end of file diff --git a/plugins/github/github.go b/plugins/github/github.go index 775364d6..e1a5b263 100644 --- a/plugins/github/github.go +++ b/plugins/github/github.go @@ -417,6 +417,47 @@ func (g *VCSGithub) Fetch(args shared.VCSFetchRequest) (shared.VCSFetchResponse, return result, nil } +// CreateIssue creates a new GitHub issue using the provided request. +// +// Parameters: +// +// args - VCSIssueCreationRequest containing repository details and issue content +// +// Examples: +// - Create an issue: +// req := shared.VCSIssueCreationRequest{ +// RepoParam: shared.RepositoryParams{ +// Namespace: "octocat", +// Repository: "hello-world", +// }, +// Title: "New Feature Request", +// Body: "Please add support for...", +// } +// issueNumber, err := githubClient.CreateIssue(req) +// +// Returns: +// - The number of the created issue +// - An error if the issue creation fails +func (g *VCSGithub) CreateIssue(args shared.VCSIssueCreationRequest) (int, error) { + client, err := g.initializeGithubClient() + if err != nil { + return 0, fmt.Errorf("failed to initialize GitHub client: %w", err) + } + + issue := &github.IssueRequest{ + Title: github.String(args.Title), + Body: github.String(args.Body), + } + + ctx := context.Background() + createdIssue, _, err := client.Issues.Create(ctx, args.RepoParam.Namespace, args.RepoParam.Repository, issue) + if err != nil { + return 0, fmt.Errorf("failed to create GitHub issue: %w", err) + } + + return createdIssue.GetNumber(), nil +} + // Setup initializes the global configuration for the VCSGithub instance. func (g *VCSGithub) Setup(configData config.Config) (bool, error) { g.setGlobalConfig(&configData) @@ -428,6 +469,7 @@ func (g *VCSGithub) Setup(configData config.Config) (bool, error) { } func main() { + logger := hclog.New(&hclog.LoggerOptions{ Level: hclog.Trace, Output: os.Stderr, diff --git a/plugins/gitlab/gitlab.go b/plugins/gitlab/gitlab.go index 06a60dee..20ff4a53 100644 --- a/plugins/gitlab/gitlab.go +++ b/plugins/gitlab/gitlab.go @@ -399,6 +399,12 @@ func (g *VCSGitlab) AddCommentToPR(args shared.VCSAddCommentToPRRequest) (bool, return true, nil } +// CreateIssue is not implemented for GitLab yet. Added to satisfy the VCS interface. +func (g *VCSGitlab) CreateIssue(args shared.VCSIssueCreationRequest) (int, error) { + g.logger.Error("CreateIssue not implemented for GitLab", "repo", fmt.Sprintf("%s/%s", args.RepoParam.Namespace, args.RepoParam.Repository)) + return 0, fmt.Errorf("CreateIssue not implemented for GitLab") +} + // buildCommentWithAttachments constructs the full comment text with file attachments. func (g *VCSGitlab) buildCommentWithAttachments(client *gitlab.Client, projectID int, comment string, filePaths []string) (string, error) { var attachmentsText strings.Builder From 13cc07e38e10ec24677b3c38d2c8136e12fd89c2 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Fri, 22 Aug 2025 08:49:32 +0200 Subject: [PATCH 02/52] feat: add gihtub plugins methods to update and list issues --- cmd/create-issue/create-issue.go | 18 +++-- cmd/list-issues/list-issues.go | 118 +++++++++++++++++++++++++++++ cmd/root.go | 6 ++ cmd/update-issue/update-issue.go | 124 +++++++++++++++++++++++++++++++ pkg/shared/ivcs.go | 73 ++++++++++++++++++ plugins/bitbucket/bitbucket.go | 12 +++ plugins/github/github.go | 87 ++++++++++++++++++++++ plugins/github/utils.go | 33 ++++++-- plugins/gitlab/gitlab.go | 12 +++ 9 files changed, 470 insertions(+), 13 deletions(-) create mode 100644 cmd/list-issues/list-issues.go create mode 100644 cmd/update-issue/update-issue.go diff --git a/cmd/create-issue/create-issue.go b/cmd/create-issue/create-issue.go index 6f80578a..6f195be8 100644 --- a/cmd/create-issue/create-issue.go +++ b/cmd/create-issue/create-issue.go @@ -20,13 +20,15 @@ type RunOptions struct { } var ( - AppConfig *config.Config - opts RunOptions + AppConfig *config.Config + opts RunOptions // CreateIssueCmd represents the command to create a GitHub issue. CreateIssueCmd = &cobra.Command{ Use: "create-issue --namespace NAMESPACE --repository REPO --title TITLE [--body BODY]", Short: "Create a GitHub issue (minimal command)", + Example: "go run ./main.go create-issue --namespace scan-io-git --repository scanio-test --title 'My Title' --body 'My Body'", + Hidden: true, SilenceUsage: true, DisableFlagsInUseLine: true, RunE: func(cmd *cobra.Command, args []string) error { @@ -90,8 +92,14 @@ func init() { } func validate(o *RunOptions) error { - if o.Namespace == "" { return fmt.Errorf("--namespace is required") } - if o.Repository == "" { return fmt.Errorf("--repository is required") } - if o.Title == "" { return fmt.Errorf("--title is required") } + if o.Namespace == "" { + return fmt.Errorf("--namespace is required") + } + if o.Repository == "" { + return fmt.Errorf("--repository is required") + } + if o.Title == "" { + return fmt.Errorf("--title is required") + } return nil } diff --git a/cmd/list-issues/list-issues.go b/cmd/list-issues/list-issues.go new file mode 100644 index 00000000..7d8907be --- /dev/null +++ b/cmd/list-issues/list-issues.go @@ -0,0 +1,118 @@ +package listissues + +import ( + "fmt" + "sort" + "time" + + "github.com/spf13/cobra" + + "github.com/scan-io-git/scan-io/pkg/shared" + "github.com/scan-io-git/scan-io/pkg/shared/config" + "github.com/scan-io-git/scan-io/pkg/shared/errors" + "github.com/scan-io-git/scan-io/pkg/shared/logger" +) + +// RunOptions holds flags for the list-issues command. +type RunOptions struct { + Namespace string `json:"namespace,omitempty"` + Repository string `json:"repository,omitempty"` + State string `json:"state,omitempty"` // open|closed|all +} + +var ( + AppConfig *config.Config + opts RunOptions + + // ListIssuesCmd represents the command to list GitHub issues. + ListIssuesCmd = &cobra.Command{ + Use: "list-issues --namespace NAMESPACE --repository REPO [--state open|closed|all]", + Short: "List GitHub issues (minimal command)", + Hidden: true, + SilenceUsage: true, + DisableFlagsInUseLine: true, + RunE: func(cmd *cobra.Command, args []string) error { + if len(args) == 0 && !shared.HasFlags(cmd.Flags()) { + return cmd.Help() + } + + if err := validate(&opts); err != nil { + return errors.NewCommandError(opts, nil, err, 1) + } + + lg := logger.NewLogger(AppConfig, "list-issues") + + // Build request for VCS plugin + req := shared.VCSListIssuesRequest{ + VCSRequestBase: shared.VCSRequestBase{ + RepoParam: shared.RepositoryParams{ + Namespace: opts.Namespace, + Repository: opts.Repository, + }, + Action: "listIssues", + }, + State: opts.State, + } + + var issues []shared.IssueParams + err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { + vcs, ok := raw.(shared.VCS) + if !ok { + return fmt.Errorf("invalid VCS plugin type") + } + list, err := vcs.ListIssues(req) + if err != nil { + return err + } + issues = list + return nil + }) + if err != nil { + lg.Error("failed to list issues via plugin", "error", err) + return errors.NewCommandError(opts, nil, fmt.Errorf("list issues failed: %w", err), 2) + } + + // Sort by updated desc for nicer output + sort.Slice(issues, func(i, j int) bool { return issues[i].UpdatedDate > issues[j].UpdatedDate }) + + if len(issues) == 0 { + fmt.Println("No issues found") + return nil + } + + // Print concise table + fmt.Printf("# %-8s %-7s %-18s %s\n", "NUMBER", "STATE", "AUTHOR", "TITLE") + for _, it := range issues { + upd := time.Unix(it.UpdatedDate, 0).UTC().Format(time.RFC3339) + _ = upd // keep for future verbose mode + fmt.Printf("- %-8d %-7s %-18s %s\n", it.Number, it.State, it.Author.UserName, it.Title) + } + return nil + }, + } +) + +// Init wires config into this command. +func Init(cfg *config.Config) { AppConfig = cfg } + +func init() { + ListIssuesCmd.Flags().StringVar(&opts.Namespace, "namespace", "", "GitHub org/user") + ListIssuesCmd.Flags().StringVar(&opts.Repository, "repository", "", "Repository name") + ListIssuesCmd.Flags().StringVar(&opts.State, "state", "open", "Issue state filter: open|closed|all") + ListIssuesCmd.Flags().BoolP("help", "h", false, "Show help for list-issues command.") +} + +func validate(o *RunOptions) error { + if o.Namespace == "" { + return fmt.Errorf("--namespace is required") + } + if o.Repository == "" { + return fmt.Errorf("--repository is required") + } + switch o.State { + case "", "open", "closed", "all": + return nil + default: + return fmt.Errorf("--state must be one of: open, closed, all") + } +} diff --git a/cmd/root.go b/cmd/root.go index 4fe12dc0..16454a1a 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -12,6 +12,8 @@ import ( "github.com/scan-io-git/scan-io/cmd/fetch" integrationvcs "github.com/scan-io-git/scan-io/cmd/integration-vcs" "github.com/scan-io-git/scan-io/cmd/list" + listissues "github.com/scan-io-git/scan-io/cmd/list-issues" + updateissue "github.com/scan-io-git/scan-io/cmd/update-issue" "github.com/scan-io-git/scan-io/cmd/version" "github.com/scan-io-git/scan-io/pkg/shared" "github.com/scan-io-git/scan-io/pkg/shared/config" @@ -72,6 +74,8 @@ func initConfig() { analyse.Init(AppConfig) integrationvcs.Init(AppConfig) createissue.Init(AppConfig) + listissues.Init(AppConfig) + updateissue.Init(AppConfig) version.Init(AppConfig) } @@ -84,6 +88,8 @@ func init() { rootCmd.AddCommand(analyse.AnalyseCmd) rootCmd.AddCommand(integrationvcs.IntegrationVCSCmd) rootCmd.AddCommand(createissue.CreateIssueCmd) + rootCmd.AddCommand(listissues.ListIssuesCmd) + rootCmd.AddCommand(updateissue.UpdateIssueCmd) rootCmd.AddCommand(version.NewVersionCmd()) // rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file") } diff --git a/cmd/update-issue/update-issue.go b/cmd/update-issue/update-issue.go new file mode 100644 index 00000000..ccaa981d --- /dev/null +++ b/cmd/update-issue/update-issue.go @@ -0,0 +1,124 @@ +package updateissue + +import ( + "fmt" + "strings" + + "github.com/spf13/cobra" + + "github.com/scan-io-git/scan-io/pkg/shared" + "github.com/scan-io-git/scan-io/pkg/shared/config" + "github.com/scan-io-git/scan-io/pkg/shared/errors" + "github.com/scan-io-git/scan-io/pkg/shared/logger" +) + +// RunOptions holds flags for the update-issue command. +type RunOptions struct { + Namespace string `json:"namespace,omitempty"` + Repository string `json:"repository,omitempty"` + Number int `json:"number,omitempty"` + Title string `json:"title,omitempty"` + Body string `json:"body,omitempty"` + State string `json:"state,omitempty"` +} + +var ( + AppConfig *config.Config + opts RunOptions + + // UpdateIssueCmd represents the command to update a GitHub issue. + UpdateIssueCmd = &cobra.Command{ + Use: "update-issue --namespace NAMESPACE --repository REPO --number N [--title TITLE] [--body BODY] [--state STATE]", + Short: "Update a GitHub issue (title/body/state)", + Example: "scanio update-issue --namespace scan-io-git --repository scanio-test --number 4 --state closed", + SilenceUsage: true, + Hidden: true, + DisableFlagsInUseLine: true, + RunE: func(cmd *cobra.Command, args []string) error { + if len(args) == 0 && !shared.HasFlags(cmd.Flags()) { + return cmd.Help() + } + + if err := validate(&opts); err != nil { + return errors.NewCommandError(opts, nil, err, 1) + } + + lg := logger.NewLogger(AppConfig, "update-issue") + + // Build request for VCS plugin + req := shared.VCSIssueUpdateRequest{ + VCSRequestBase: shared.VCSRequestBase{ + RepoParam: shared.RepositoryParams{ + Namespace: opts.Namespace, + Repository: opts.Repository, + }, + Action: "updateIssue", + }, + Number: opts.Number, + Title: opts.Title, + Body: opts.Body, + State: opts.State, + } + + var success bool + err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { + vcs, ok := raw.(shared.VCS) + if !ok { + return fmt.Errorf("invalid VCS plugin type") + } + okResp, err := vcs.UpdateIssue(req) + if err != nil { + return err + } + success = okResp + return nil + }) + if err != nil { + lg.Error("failed to update issue via plugin", "error", err) + return errors.NewCommandError(opts, nil, fmt.Errorf("update issue failed: %w", err), 2) + } + + if success { + lg.Info("issue updated", "number", opts.Number) + fmt.Printf("Updated issue #%d\n", opts.Number) + } else { + lg.Warn("issue update returned false", "number", opts.Number) + fmt.Printf("Issue not updated (no-op?) #%d\n", opts.Number) + } + return nil + }, + } +) + +// Init wires config into this command. +func Init(cfg *config.Config) { AppConfig = cfg } + +func init() { + UpdateIssueCmd.Flags().StringVar(&opts.Namespace, "namespace", "", "GitHub org/user") + UpdateIssueCmd.Flags().StringVar(&opts.Repository, "repository", "", "Repository name") + UpdateIssueCmd.Flags().IntVar(&opts.Number, "number", 0, "Issue number") + UpdateIssueCmd.Flags().StringVar(&opts.Title, "title", "", "New issue title") + UpdateIssueCmd.Flags().StringVar(&opts.Body, "body", "", "New issue body") + UpdateIssueCmd.Flags().StringVar(&opts.State, "state", "", "New issue state: open or closed") + UpdateIssueCmd.Flags().BoolP("help", "h", false, "Show help for update-issue command.") +} + +func validate(o *RunOptions) error { + if o.Namespace == "" { + return fmt.Errorf("--namespace is required") + } + if o.Repository == "" { + return fmt.Errorf("--repository is required") + } + if o.Number <= 0 { + return fmt.Errorf("--number is required and must be > 0") + } + // at least one field to update must be provided + if strings.TrimSpace(o.Title) == "" && strings.TrimSpace(o.Body) == "" && strings.TrimSpace(o.State) == "" { + return fmt.Errorf("provide at least one of --title, --body, or --state") + } + if s := strings.ToLower(strings.TrimSpace(o.State)); s != "" && s != "open" && s != "closed" { + return fmt.Errorf("--state must be 'open' or 'closed' if provided") + } + return nil +} diff --git a/pkg/shared/ivcs.go b/pkg/shared/ivcs.go index a56b1d8d..b491e77d 100644 --- a/pkg/shared/ivcs.go +++ b/pkg/shared/ivcs.go @@ -33,6 +33,17 @@ type PRParams struct { UpdatedDate int64 `json:"updated_date"` } +// IssueParams holds the details of an issue. +type IssueParams struct { + Number int `json:"number"` + Title string `json:"title"` + State string `json:"state"` + Author User `json:"author"` + URL string `json:"url"` + CreatedDate int64 `json:"created_date"` + UpdatedDate int64 `json:"updated_date"` +} + // User holds the details of a user. type User struct { UserName string `json:"user_name"` @@ -96,6 +107,15 @@ type VCSIssueCreationRequest struct { Body string `json:"body"` } +// VCSIssueUpdateRequest represents a request to update an existing issue. +type VCSIssueUpdateRequest struct { + VCSRequestBase + Number int `json:"number"` + Title string `json:"title"` + Body string `json:"body"` + State string `json:"state"` // optional: "open" or "closed" +} + // VCSAddCommentToPRRequest represents a request to add a comment to a PR. type VCSAddCommentToPRRequest struct { VCSRequestBase @@ -103,6 +123,12 @@ type VCSAddCommentToPRRequest struct { FilePaths []string `json:"file_paths"` } +// VCSListIssuesRequest represents a request to list issues in a repository. +type VCSListIssuesRequest struct { + VCSRequestBase + State string `json:"state"` // open, closed, all; default open +} + // ListFuncResult holds the result of a list function. type ListFuncResult struct { Args VCSListRepositoriesRequest `json:"args"` @@ -126,6 +152,11 @@ type VCSRetrievePRInformationResponse struct { PR PRParams `json:"pr"` } +// VCSListIssuesResponse represents a response from listing issues. +type VCSListIssuesResponse struct { + Issues []IssueParams `json:"issues"` +} + // VCS defines the interface for VCS-related operations. type VCS interface { Setup(configData config.Config) (bool, error) @@ -136,6 +167,8 @@ type VCS interface { SetStatusOfPR(req VCSSetStatusOfPRRequest) (bool, error) AddCommentToPR(req VCSAddCommentToPRRequest) (bool, error) CreateIssue(req VCSIssueCreationRequest) (int, error) + ListIssues(req VCSListIssuesRequest) ([]IssueParams, error) + UpdateIssue(req VCSIssueUpdateRequest) (bool, error) } // VCSRPCClient implements the VCS interface for RPC clients. @@ -223,6 +256,26 @@ func (c *VCSRPCClient) CreateIssue(req VCSIssueCreationRequest) (int, error) { return resp, nil } +// ListIssues calls the ListIssues method on the RPC client. +func (c *VCSRPCClient) ListIssues(req VCSListIssuesRequest) ([]IssueParams, error) { + var resp VCSListIssuesResponse + err := c.client.Call("Plugin.ListIssues", req, &resp) + if err != nil { + return nil, fmt.Errorf("RPC client ListIssues call failed: %w", err) + } + return resp.Issues, nil +} + +// UpdateIssue calls the UpdateIssue method on the RPC client. +func (c *VCSRPCClient) UpdateIssue(req VCSIssueUpdateRequest) (bool, error) { + var resp bool + err := c.client.Call("Plugin.UpdateIssue", req, &resp) + if err != nil { + return false, fmt.Errorf("RPC client UpdateIssue call failed: %w", err) + } + return resp, nil +} + // VCSRPCServer wraps a VCS implementation to provide an RPC server. type VCSRPCServer struct { Impl VCS @@ -308,6 +361,26 @@ func (s *VCSRPCServer) CreateIssue(args VCSIssueCreationRequest, resp *int) erro return nil } +// ListIssues calls the ListIssues method on the VCS implementation. +func (s *VCSRPCServer) ListIssues(args VCSListIssuesRequest, resp *VCSListIssuesResponse) error { + issues, err := s.Impl.ListIssues(args) + if err != nil { + return fmt.Errorf("VCS ListIssues failed: %w", err) + } + resp.Issues = issues + return nil +} + +// UpdateIssue calls the UpdateIssue method on the VCS implementation. +func (s *VCSRPCServer) UpdateIssue(args VCSIssueUpdateRequest, resp *bool) error { + var err error + *resp, err = s.Impl.UpdateIssue(args) + if err != nil { + return fmt.Errorf("VCS UpdateIssue failed: %w", err) + } + return nil +} + // VCSPlugin is the implementation of the plugin.Plugin interface for VCS. type VCSPlugin struct { Impl VCS diff --git a/plugins/bitbucket/bitbucket.go b/plugins/bitbucket/bitbucket.go index 1d055848..7283b36c 100644 --- a/plugins/bitbucket/bitbucket.go +++ b/plugins/bitbucket/bitbucket.go @@ -253,6 +253,18 @@ func (g *VCSBitbucket) CreateIssue(args shared.VCSIssueCreationRequest) (int, er return 0, fmt.Errorf("CreateIssue not implemented for Bitbucket") } +// ListIssues is not implemented for Bitbucket yet. Added to satisfy the VCS interface. +func (g *VCSBitbucket) ListIssues(args shared.VCSListIssuesRequest) ([]shared.IssueParams, error) { + g.logger.Error("ListIssues not implemented for Bitbucket", "repo", fmt.Sprintf("%s/%s", args.RepoParam.Namespace, args.RepoParam.Repository)) + return nil, fmt.Errorf("ListIssues not implemented for Bitbucket") +} + +// UpdateIssue is not implemented for Bitbucket yet. Added to satisfy the VCS interface. +func (g *VCSBitbucket) UpdateIssue(args shared.VCSIssueUpdateRequest) (bool, error) { + g.logger.Error("UpdateIssue not implemented for Bitbucket", "repo", fmt.Sprintf("%s/%s", args.RepoParam.Namespace, args.RepoParam.Repository), "number", args.Number) + return false, fmt.Errorf("UpdateIssue not implemented for Bitbucket") +} + // fetchPR handles fetching pull request changes. func (g *VCSBitbucket) fetchPR(args *shared.VCSFetchRequest) (string, error) { g.logger.Info("handling PR changes fetching") diff --git a/plugins/github/github.go b/plugins/github/github.go index e1a5b263..3e89a3b7 100644 --- a/plugins/github/github.go +++ b/plugins/github/github.go @@ -34,6 +34,49 @@ type VCSGithub struct { globalConfig *config.Config } +// UpdateIssue updates an existing GitHub issue's title and/or body. +func (g *VCSGithub) UpdateIssue(args shared.VCSIssueUpdateRequest) (bool, error) { + // Basic validation + if args.RepoParam.Namespace == "" || args.RepoParam.Repository == "" { + return false, fmt.Errorf("namespace and repository are required") + } + if args.Number <= 0 { + return false, fmt.Errorf("valid issue number is required") + } + + client, err := g.initializeGithubClient() + if err != nil { + return false, fmt.Errorf("failed to initialize GitHub client: %w", err) + } + + req := &github.IssueRequest{} + if strings.TrimSpace(args.Title) != "" { + req.Title = github.String(args.Title) + } + if strings.TrimSpace(args.Body) != "" { + req.Body = github.String(args.Body) + } + if s := strings.ToLower(strings.TrimSpace(args.State)); s != "" { + switch s { + case "open", "closed": + req.State = github.String(s) + default: + return false, fmt.Errorf("invalid state: %s (allowed: open, closed)", args.State) + } + } + + if req.Title == nil && req.Body == nil && req.State == nil { + return false, fmt.Errorf("nothing to update: provide title, body and/or state") + } + + _, _, err = client.Issues.Edit(context.Background(), args.RepoParam.Namespace, args.RepoParam.Repository, args.Number, req) + if err != nil { + return false, fmt.Errorf("failed to update GitHub issue: %w", err) + } + + return true, nil +} + // newVCSGithub creates a new instance of VCSGithub. func newVCSGithub(logger hclog.Logger) *VCSGithub { return &VCSGithub{ @@ -458,6 +501,50 @@ func (g *VCSGithub) CreateIssue(args shared.VCSIssueCreationRequest) (int, error return createdIssue.GetNumber(), nil } +// ListIssues lists issues for a repository. +// Supports optional state filter: "open", "closed", or "all" (default: "open"). +func (g *VCSGithub) ListIssues(args shared.VCSListIssuesRequest) ([]shared.IssueParams, error) { + client, err := g.initializeGithubClient() + if err != nil { + return nil, fmt.Errorf("failed to initialize GitHub client: %w", err) + } + + state := strings.ToLower(strings.TrimSpace(args.State)) + switch state { + case "", "open", "closed", "all": + if state == "" { state = "open" } + default: + return nil, fmt.Errorf("invalid state: %s (allowed: open, closed, all)", args.State) + } + + opt := &github.IssueListByRepoOptions{ + State: state, + ListOptions: github.ListOptions{PerPage: 100, Page: 1}, + } + + var all []*github.Issue + for { + issues, resp, err := client.Issues.ListByRepo(context.Background(), args.RepoParam.Namespace, args.RepoParam.Repository, opt) + if err != nil { + return nil, fmt.Errorf("failed to list issues: %w", err) + } + all = append(all, issues...) + if resp == nil || resp.NextPage == 0 { break } + opt.Page = resp.NextPage + } + + // Filter out pull requests and convert to shared type + var result []shared.IssueParams + for _, it := range all { + if it == nil || it.PullRequestLinks != nil { // skip PRs + continue + } + result = append(result, convertToIssueParams(it)) + } + + return result, nil +} + // Setup initializes the global configuration for the VCSGithub instance. func (g *VCSGithub) Setup(configData config.Config) (bool, error) { g.setGlobalConfig(&configData) diff --git a/plugins/github/utils.go b/plugins/github/utils.go index cb81fe16..7f1e823e 100644 --- a/plugins/github/utils.go +++ b/plugins/github/utils.go @@ -11,10 +11,10 @@ import ( // safeString safely dereferences a string pointer, returning an empty string if the pointer is nil. func safeString(s *string) string { - if s == nil { - return "" - } - return *s + if s == nil { + return "" + } + return *s } // safeInt safely dereferences an int pointer, returning 0 if the pointer is nil. @@ -27,10 +27,27 @@ func safeInt(i *int) int { // safeTime safely dereferences a time pointer, returning 0 if the pointer is nil. func safeTime(t *time.Time) int64 { - if t == nil { - return 0 - } - return t.Unix() + if t == nil { + return 0 + } + return t.Unix() +} + +// convertToIssueParams converts a GitHub Issue object to shared.IssueParams. +func convertToIssueParams(iss *github.Issue) shared.IssueParams { + if iss == nil { + return shared.IssueParams{} + } + + return shared.IssueParams{ + Number: safeInt(iss.Number), + Title: safeString(iss.Title), + State: safeString(iss.State), + Author: safeUser(iss.User), + URL: safeString(iss.HTMLURL), + CreatedDate: safeTime(iss.CreatedAt), + UpdatedDate: safeTime(iss.UpdatedAt), + } } // safeUser converts a GitHub user to a shared.User, handling nil safely. diff --git a/plugins/gitlab/gitlab.go b/plugins/gitlab/gitlab.go index 20ff4a53..de9e67d2 100644 --- a/plugins/gitlab/gitlab.go +++ b/plugins/gitlab/gitlab.go @@ -405,6 +405,18 @@ func (g *VCSGitlab) CreateIssue(args shared.VCSIssueCreationRequest) (int, error return 0, fmt.Errorf("CreateIssue not implemented for GitLab") } +// ListIssues is not implemented for GitLab yet. Added to satisfy the VCS interface. +func (g *VCSGitlab) ListIssues(args shared.VCSListIssuesRequest) ([]shared.IssueParams, error) { + g.logger.Error("ListIssues not implemented for GitLab", "repo", fmt.Sprintf("%s/%s", args.RepoParam.Namespace, args.RepoParam.Repository)) + return nil, fmt.Errorf("ListIssues not implemented for GitLab") +} + +// UpdateIssue is not implemented for GitLab yet. Added to satisfy the VCS interface. +func (g *VCSGitlab) UpdateIssue(args shared.VCSIssueUpdateRequest) (bool, error) { + g.logger.Error("UpdateIssue not implemented for GitLab", "repo", fmt.Sprintf("%s/%s", args.RepoParam.Namespace, args.RepoParam.Repository), "number", args.Number) + return false, fmt.Errorf("UpdateIssue not implemented for GitLab") +} + // buildCommentWithAttachments constructs the full comment text with file attachments. func (g *VCSGitlab) buildCommentWithAttachments(client *gitlab.Client, projectID int, comment string, filePaths []string) (string, error) { var attachmentsText strings.Builder From 10ff8b8df306f0c4a3eb3008965816d08f552fb0 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Fri, 22 Aug 2025 09:26:28 +0200 Subject: [PATCH 03/52] feat: introduce a cmd to create gh issues from sarif report --- .../create-issues-from-sarif.go | 296 ++++++++++++++++++ cmd/root.go | 3 + 2 files changed, 299 insertions(+) create mode 100644 cmd/create-issues-from-sarif/create-issues-from-sarif.go diff --git a/cmd/create-issues-from-sarif/create-issues-from-sarif.go b/cmd/create-issues-from-sarif/create-issues-from-sarif.go new file mode 100644 index 00000000..be74e0a3 --- /dev/null +++ b/cmd/create-issues-from-sarif/create-issues-from-sarif.go @@ -0,0 +1,296 @@ +package createissuesfromsarif + +import ( + "crypto/sha256" + "fmt" + "net/url" + "os" + "path/filepath" + "strings" + + "github.com/spf13/cobra" + + "github.com/owenrumney/go-sarif/v2/sarif" + internalsarif "github.com/scan-io-git/scan-io/internal/sarif" + "github.com/scan-io-git/scan-io/pkg/shared" + "github.com/scan-io-git/scan-io/pkg/shared/config" + "github.com/scan-io-git/scan-io/pkg/shared/errors" + "github.com/scan-io-git/scan-io/pkg/shared/logger" +) + +// RunOptions holds flags for the create-issues-from-sarif command. +type RunOptions struct { + Namespace string `json:"namespace,omitempty"` + Repository string `json:"repository,omitempty"` + SarifPath string `json:"sarif_path,omitempty"` + SourceFolder string `json:"source_folder,omitempty"` + Ref string `json:"ref,omitempty"` +} + +var ( + AppConfig *config.Config + opts RunOptions + + // CreateIssuesFromSarifCmd represents the command to create GitHub issues from a SARIF file. + CreateIssuesFromSarifCmd = &cobra.Command{ + Use: "create-issues-from-sarif --namespace NAMESPACE --repository REPO --sarif PATH [--source-folder PATH] [--ref REF]", + Short: "Create GitHub issues for high severity SARIF findings", + Example: "scanio create-issues-from-sarif --namespace org --repository repo --sarif /path/to/report.sarif", + SilenceUsage: true, + Hidden: true, + DisableFlagsInUseLine: true, + RunE: func(cmd *cobra.Command, args []string) error { + if len(args) == 0 && !shared.HasFlags(cmd.Flags()) { + return cmd.Help() + } + + if err := validate(&opts); err != nil { + return errors.NewCommandError(opts, nil, err, 1) + } + + lg := logger.NewLogger(AppConfig, "create-issues-from-sarif") + + report, err := internalsarif.ReadReport(opts.SarifPath, lg, opts.SourceFolder, true) + if err != nil { + lg.Error("failed to read SARIF report", "error", err) + return errors.NewCommandError(opts, nil, fmt.Errorf("failed to read SARIF report: %w", err), 2) + } + + // Enrich to ensure Levels and Titles are present + report.EnrichResultsLevelProperty() + report.EnrichResultsTitleProperty() + // No need to enrich locations here; we'll compute file path from URI directly + + created := 0 + // Iterate runs and results + for _, run := range report.Runs { + for _, res := range run.Results { + // Only high severity: map to Level == "error" + level, _ := res.Properties["Level"].(string) + if strings.ToLower(level) != "error" { + continue + } + + // Basic fields + ruleID := "" + if res.RuleID != nil { + ruleID = *res.RuleID + } + // Prefer human-readable rule description from the SARIF rules table + titleBase := getRuleFullDescription(run, ruleID) + if titleBase == "" { + // fallback to result provided title or message + titleBase = getStringProp(res.Properties, "Title") + } + if titleBase == "" && res.Message.Text != nil { + titleBase = *res.Message.Text + } + if titleBase == "" { + titleBase = ruleID + } + titleText := fmt.Sprintf("[SARIF][%s][%s]", ruleID, titleBase) + + fileURI := "" + line := 0 + endLine := 0 + if len(res.Locations) > 0 { + loc := res.Locations[0] + if loc.PhysicalLocation != nil && loc.PhysicalLocation.ArtifactLocation != nil && loc.PhysicalLocation.ArtifactLocation.URI != nil { + uri := *loc.PhysicalLocation.ArtifactLocation.URI + if filepath.IsAbs(uri) && opts.SourceFolder != "" { + rel := strings.TrimPrefix(uri, opts.SourceFolder) + if strings.HasPrefix(rel, string(filepath.Separator)) { + rel = rel[1:] + } + fileURI = rel + } else { + fileURI = uri + } + } + if loc.PhysicalLocation != nil && loc.PhysicalLocation.Region != nil { + if loc.PhysicalLocation.Region.StartLine != nil { + line = *loc.PhysicalLocation.Region.StartLine + } + if loc.PhysicalLocation.Region.EndLine != nil { + endLine = *loc.PhysicalLocation.Region.EndLine + } + } + } + // Normalize file path for title readability + shortPath := filepath.ToSlash(fileURI) + if shortPath == "" { + shortPath = "" + } + if line > 0 { + if endLine > line { + titleText = fmt.Sprintf("%s at %s:%d-%d", titleText, shortPath, line, endLine) + } else { + titleText = fmt.Sprintf("%s at %s:%d", titleText, shortPath, line) + } + } else { + titleText = fmt.Sprintf("%s at %s", titleText, shortPath) + } + + desc := getStringProp(res.Properties, "Description") + if desc == "" && res.Message.Text != nil { + desc = *res.Message.Text + } + + // Optionally include a GitHub permalink if ref is provided + // If EndLine is present, use a range anchor: #Lstart-Lend + permalink := "" + if opts.Ref != "" && shortPath != "" && line > 0 { + encodedPath := encodePathSegments(shortPath) + if endLine > line { + permalink = fmt.Sprintf("https://github.com/%s/%s/blob/%s/%s#L%d-L%d", opts.Namespace, opts.Repository, opts.Ref, encodedPath, line, endLine) + } else { + permalink = fmt.Sprintf("https://github.com/%s/%s/blob/%s/%s#L%d", opts.Namespace, opts.Repository, opts.Ref, encodedPath, line) + } + } + + // Include line or line range in the body + lineInfo := fmt.Sprintf("Line: %d", line) + if endLine > line { + lineInfo = fmt.Sprintf("Lines: %d-%d", line, endLine) + } + + // Compute SHA256 over the referenced snippet (single line or range) + snippetHash := "" + if shortPath != "" && line > 0 && opts.SourceFolder != "" { + absPath := filepath.Join(opts.SourceFolder, filepath.FromSlash(shortPath)) + if data, err := os.ReadFile(absPath); err == nil { + lines := strings.Split(string(data), "\n") + start := line + end := line + if endLine > line { + end = endLine + } + // Validate bounds (1-based line numbers) + if start >= 1 && start <= len(lines) { + if end > len(lines) { + end = len(lines) + } + if end >= start { + snippet := strings.Join(lines[start-1:end], "\n") + sum := sha256.Sum256([]byte(snippet)) + snippetHash = fmt.Sprintf("%x", sum[:]) + } + } + } + } + + body := fmt.Sprintf("Severity: %s\nRule: %s\nFile: %s\n%s\n", strings.ToUpper(level), ruleID, shortPath, lineInfo) + if permalink != "" { + body += fmt.Sprintf("Permalink: %s\n", permalink) + } + if snippetHash != "" { + body += fmt.Sprintf("Snippet SHA256: %s\n", snippetHash) + } + body += "\n" + desc + + // Build request for VCS plugin + req := shared.VCSIssueCreationRequest{ + VCSRequestBase: shared.VCSRequestBase{ + RepoParam: shared.RepositoryParams{ + Namespace: opts.Namespace, + Repository: opts.Repository, + }, + Action: "createIssue", + }, + Title: titleText, + Body: body, + } + + // Call plugin + err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { + vcs, ok := raw.(shared.VCS) + if !ok { + return fmt.Errorf("invalid VCS plugin type") + } + _, err := vcs.CreateIssue(req) + return err + }) + if err != nil { + lg.Error("failed to create issue via plugin", "error", err, "rule", ruleID, "file", shortPath, "line", line) + return errors.NewCommandError(opts, nil, fmt.Errorf("create issue failed: %w", err), 2) + } + created++ + } + } + + lg.Info("issues created from SARIF high severity findings", "count", created) + fmt.Printf("Created %d issue(s) from SARIF high severity findings\n", created) + return nil + }, + } +) + +// Init wires config into this command. +func Init(cfg *config.Config) { AppConfig = cfg } + +func init() { + CreateIssuesFromSarifCmd.Flags().StringVar(&opts.Namespace, "namespace", "", "GitHub org/user") + CreateIssuesFromSarifCmd.Flags().StringVar(&opts.Repository, "repository", "", "Repository name") + CreateIssuesFromSarifCmd.Flags().StringVar(&opts.SarifPath, "sarif", "", "Path to SARIF file") + CreateIssuesFromSarifCmd.Flags().StringVar(&opts.SourceFolder, "source-folder", "", "Optional: source folder to improve file path resolution in SARIF (used for absolute paths)") + CreateIssuesFromSarifCmd.Flags().StringVar(&opts.Ref, "ref", "", "Git ref (branch or commit SHA) to build a permalink to the vulnerable code") + CreateIssuesFromSarifCmd.Flags().BoolP("help", "h", false, "Show help for create-issues-from-sarif command.") +} + +func validate(o *RunOptions) error { + if o.Namespace == "" { + return fmt.Errorf("--namespace is required") + } + if o.Repository == "" { + return fmt.Errorf("--repository is required") + } + if strings.TrimSpace(o.SarifPath) == "" { + return fmt.Errorf("--sarif is required") + } + return nil +} + +// helper to fetch a string property safely +func getStringProp(m map[string]interface{}, key string) string { + if m == nil { + return "" + } + if v, ok := m[key]; ok { + if s, ok := v.(string); ok { + return s + } + } + return "" +} + +// encodePathSegments safely encodes each path segment without encoding slashes +func encodePathSegments(p string) string { + if p == "" { + return "" + } + parts := strings.Split(p, "/") + for i, seg := range parts { + parts[i] = url.PathEscape(seg) + } + return strings.Join(parts, "/") +} + +// getRuleFullDescription returns the human-readable description for a rule from the run's rules table. +// It prefers rule.FullDescription.Text, falls back to rule.ShortDescription.Text, otherwise empty string. +func getRuleFullDescription(run *sarif.Run, ruleID string) string { + if run == nil || run.Tool.Driver == nil { + return "" + } + for _, rule := range run.Tool.Driver.Rules { + if rule == nil { + continue + } + if rule.ID == ruleID { + if rule.FullDescription != nil && rule.FullDescription.Text != nil && *rule.FullDescription.Text != "" { + return *rule.FullDescription.Text + } + return "" + } + } + return "" +} diff --git a/cmd/root.go b/cmd/root.go index 16454a1a..4f73458d 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -9,6 +9,7 @@ import ( "github.com/scan-io-git/scan-io/cmd/analyse" createissue "github.com/scan-io-git/scan-io/cmd/create-issue" + createissuesfromsarif "github.com/scan-io-git/scan-io/cmd/create-issues-from-sarif" "github.com/scan-io-git/scan-io/cmd/fetch" integrationvcs "github.com/scan-io-git/scan-io/cmd/integration-vcs" "github.com/scan-io-git/scan-io/cmd/list" @@ -76,6 +77,7 @@ func initConfig() { createissue.Init(AppConfig) listissues.Init(AppConfig) updateissue.Init(AppConfig) + createissuesfromsarif.Init(AppConfig) version.Init(AppConfig) } @@ -90,6 +92,7 @@ func init() { rootCmd.AddCommand(createissue.CreateIssueCmd) rootCmd.AddCommand(listissues.ListIssuesCmd) rootCmd.AddCommand(updateissue.UpdateIssueCmd) + rootCmd.AddCommand(createissuesfromsarif.CreateIssuesFromSarifCmd) rootCmd.AddCommand(version.NewVersionCmd()) // rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file") } From 2a60d2d9423cc2ae462e5a57b546d120ff234698 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Mon, 1 Sep 2025 08:54:57 +0200 Subject: [PATCH 04/52] fix: better relative path handling on create-issue-from-sarif - extract SARIF location parsing to a dedicated function - simplify title --- .github/copilot-instructions.md | 10 ++ .../create-issues-from-sarif.go | 122 ++++++++++++------ 2 files changed, 92 insertions(+), 40 deletions(-) create mode 100644 .github/copilot-instructions.md diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 00000000..ae92de41 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,10 @@ +# AGENTS.md + +## Commands +- Build cli with: `make build-cli` +- Build plugins with: `make build-plugins` +- Test with: `make test` +- Use `go fmt` for formatting + +## Code style +- Use early returns when handling errors or special cases to reduce nesting and improve readability. diff --git a/cmd/create-issues-from-sarif/create-issues-from-sarif.go b/cmd/create-issues-from-sarif/create-issues-from-sarif.go index be74e0a3..24547e44 100644 --- a/cmd/create-issues-from-sarif/create-issues-from-sarif.go +++ b/cmd/create-issues-from-sarif/create-issues-from-sarif.go @@ -76,46 +76,12 @@ var ( if res.RuleID != nil { ruleID = *res.RuleID } - // Prefer human-readable rule description from the SARIF rules table - titleBase := getRuleFullDescription(run, ruleID) - if titleBase == "" { - // fallback to result provided title or message - titleBase = getStringProp(res.Properties, "Title") - } - if titleBase == "" && res.Message.Text != nil { - titleBase = *res.Message.Text - } - if titleBase == "" { - titleBase = ruleID - } - titleText := fmt.Sprintf("[SARIF][%s][%s]", ruleID, titleBase) - - fileURI := "" - line := 0 - endLine := 0 - if len(res.Locations) > 0 { - loc := res.Locations[0] - if loc.PhysicalLocation != nil && loc.PhysicalLocation.ArtifactLocation != nil && loc.PhysicalLocation.ArtifactLocation.URI != nil { - uri := *loc.PhysicalLocation.ArtifactLocation.URI - if filepath.IsAbs(uri) && opts.SourceFolder != "" { - rel := strings.TrimPrefix(uri, opts.SourceFolder) - if strings.HasPrefix(rel, string(filepath.Separator)) { - rel = rel[1:] - } - fileURI = rel - } else { - fileURI = uri - } - } - if loc.PhysicalLocation != nil && loc.PhysicalLocation.Region != nil { - if loc.PhysicalLocation.Region.StartLine != nil { - line = *loc.PhysicalLocation.Region.StartLine - } - if loc.PhysicalLocation.Region.EndLine != nil { - endLine = *loc.PhysicalLocation.Region.EndLine - } - } - } + + titleText := fmt.Sprintf("[SARIF][%s]", ruleID) + + // derive file path and region info from SARIF result + fileURI := extractFileURIFromResult(res, opts.SourceFolder) + line, endLine := extractRegionFromResult(res) // Normalize file path for title readability shortPath := filepath.ToSlash(fileURI) if shortPath == "" { @@ -275,6 +241,82 @@ func encodePathSegments(p string) string { return strings.Join(parts, "/") } +// extractLocationInfo derives a file path (relative when appropriate), start line and end line +// from a SARIF result's first location. It mirrors the previous inline logic used in the +// command handler. Returns (fileURI, startLine, endLine). +// extractFileURIFromResult returns a file path derived from the SARIF result's first location. +// If the URI is absolute and a non-empty sourceFolder is provided, the returned path will be +// made relative to sourceFolder (matching previous behaviour). +func extractFileURIFromResult(res *sarif.Result, sourceFolder string) string { + if res == nil || len(res.Locations) == 0 { + return "" + } + loc := res.Locations[0] + if loc.PhysicalLocation == nil { + return "" + } + art := loc.PhysicalLocation.ArtifactLocation + if art == nil || art.URI == nil { + return "" + } + uri := *art.URI + // If URI is not absolute or there's no sourceFolder provided, return it unchanged. + if !filepath.IsAbs(uri) || sourceFolder == "" { + return uri + } + + // Normalize sourceFolder to an absolute, cleaned path so relative inputs like + // "../scanio-test" match absolute URIs such as "/home/jekos/.../scanio-test/...". + if absSource, err := filepath.Abs(sourceFolder); err == nil { + absSource = filepath.Clean(absSource) + + // Prefer filepath.Rel which will produce a relative path when uri is under absSource. + if rel, err := filepath.Rel(absSource, uri); err == nil { + // If rel does not escape to parent directories, it's a proper subpath. + if rel != "" && !strings.HasPrefix(rel, "..") { + return rel + } + } + + // Fallback: trim the absolute source prefix explicitly when possible. + prefix := absSource + string(filepath.Separator) + if strings.HasPrefix(uri, prefix) { + return strings.TrimPrefix(uri, prefix) + } + if strings.HasPrefix(uri, absSource) { + return strings.TrimPrefix(uri, absSource) + } + } + + // Last-resort: try trimming the raw sourceFolder string provided by the user. + rel := strings.TrimPrefix(uri, sourceFolder) + if strings.HasPrefix(rel, string(filepath.Separator)) { + return rel[1:] + } + return rel +} + +// extractRegionFromResult returns start and end line numbers (0 when not present) +// taken from the SARIF result's first location region. +func extractRegionFromResult(res *sarif.Result) (int, int) { + if res == nil || len(res.Locations) == 0 { + return 0, 0 + } + loc := res.Locations[0] + if loc.PhysicalLocation == nil || loc.PhysicalLocation.Region == nil { + return 0, 0 + } + start := 0 + end := 0 + if loc.PhysicalLocation.Region.StartLine != nil { + start = *loc.PhysicalLocation.Region.StartLine + } + if loc.PhysicalLocation.Region.EndLine != nil { + end = *loc.PhysicalLocation.Region.EndLine + } + return start, end +} + // getRuleFullDescription returns the human-readable description for a rule from the run's rules table. // It prefers rule.FullDescription.Text, falls back to rule.ShortDescription.Text, otherwise empty string. func getRuleFullDescription(run *sarif.Run, ruleID string) string { From 8a5f1cffc3fe550bb6a373df44835e5f80f21e0b Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Tue, 2 Sep 2025 09:01:36 +0200 Subject: [PATCH 05/52] introduce body parser for later gh issue correlation --- .github/copilot-instructions.md | 1 + .../create-issues-from-sarif.go | 394 ++++++++++++------ pkg/shared/ivcs.go | 1 + plugins/github/github.go | 91 ++-- plugins/github/utils.go | 43 +- 5 files changed, 346 insertions(+), 184 deletions(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index ae92de41..aa1118c4 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -3,6 +3,7 @@ ## Commands - Build cli with: `make build-cli` - Build plugins with: `make build-plugins` +- Build everything with: `make build` - Test with: `make test` - Use `go fmt` for formatting diff --git a/cmd/create-issues-from-sarif/create-issues-from-sarif.go b/cmd/create-issues-from-sarif/create-issues-from-sarif.go index 24547e44..84fcc53b 100644 --- a/cmd/create-issues-from-sarif/create-issues-from-sarif.go +++ b/cmd/create-issues-from-sarif/create-issues-from-sarif.go @@ -6,10 +6,12 @@ import ( "net/url" "os" "path/filepath" + "strconv" "strings" "github.com/spf13/cobra" + hclog "github.com/hashicorp/go-hclog" "github.com/owenrumney/go-sarif/v2/sarif" internalsarif "github.com/scan-io-git/scan-io/internal/sarif" "github.com/scan-io-git/scan-io/pkg/shared" @@ -61,127 +63,16 @@ var ( report.EnrichResultsTitleProperty() // No need to enrich locations here; we'll compute file path from URI directly - created := 0 - // Iterate runs and results - for _, run := range report.Runs { - for _, res := range run.Results { - // Only high severity: map to Level == "error" - level, _ := res.Properties["Level"].(string) - if strings.ToLower(level) != "error" { - continue - } - - // Basic fields - ruleID := "" - if res.RuleID != nil { - ruleID = *res.RuleID - } - - titleText := fmt.Sprintf("[SARIF][%s]", ruleID) - - // derive file path and region info from SARIF result - fileURI := extractFileURIFromResult(res, opts.SourceFolder) - line, endLine := extractRegionFromResult(res) - // Normalize file path for title readability - shortPath := filepath.ToSlash(fileURI) - if shortPath == "" { - shortPath = "" - } - if line > 0 { - if endLine > line { - titleText = fmt.Sprintf("%s at %s:%d-%d", titleText, shortPath, line, endLine) - } else { - titleText = fmt.Sprintf("%s at %s:%d", titleText, shortPath, line) - } - } else { - titleText = fmt.Sprintf("%s at %s", titleText, shortPath) - } - - desc := getStringProp(res.Properties, "Description") - if desc == "" && res.Message.Text != nil { - desc = *res.Message.Text - } - - // Optionally include a GitHub permalink if ref is provided - // If EndLine is present, use a range anchor: #Lstart-Lend - permalink := "" - if opts.Ref != "" && shortPath != "" && line > 0 { - encodedPath := encodePathSegments(shortPath) - if endLine > line { - permalink = fmt.Sprintf("https://github.com/%s/%s/blob/%s/%s#L%d-L%d", opts.Namespace, opts.Repository, opts.Ref, encodedPath, line, endLine) - } else { - permalink = fmt.Sprintf("https://github.com/%s/%s/blob/%s/%s#L%d", opts.Namespace, opts.Repository, opts.Ref, encodedPath, line) - } - } - - // Include line or line range in the body - lineInfo := fmt.Sprintf("Line: %d", line) - if endLine > line { - lineInfo = fmt.Sprintf("Lines: %d-%d", line, endLine) - } - - // Compute SHA256 over the referenced snippet (single line or range) - snippetHash := "" - if shortPath != "" && line > 0 && opts.SourceFolder != "" { - absPath := filepath.Join(opts.SourceFolder, filepath.FromSlash(shortPath)) - if data, err := os.ReadFile(absPath); err == nil { - lines := strings.Split(string(data), "\n") - start := line - end := line - if endLine > line { - end = endLine - } - // Validate bounds (1-based line numbers) - if start >= 1 && start <= len(lines) { - if end > len(lines) { - end = len(lines) - } - if end >= start { - snippet := strings.Join(lines[start-1:end], "\n") - sum := sha256.Sum256([]byte(snippet)) - snippetHash = fmt.Sprintf("%x", sum[:]) - } - } - } - } - - body := fmt.Sprintf("Severity: %s\nRule: %s\nFile: %s\n%s\n", strings.ToUpper(level), ruleID, shortPath, lineInfo) - if permalink != "" { - body += fmt.Sprintf("Permalink: %s\n", permalink) - } - if snippetHash != "" { - body += fmt.Sprintf("Snippet SHA256: %s\n", snippetHash) - } - body += "\n" + desc - - // Build request for VCS plugin - req := shared.VCSIssueCreationRequest{ - VCSRequestBase: shared.VCSRequestBase{ - RepoParam: shared.RepositoryParams{ - Namespace: opts.Namespace, - Repository: opts.Repository, - }, - Action: "createIssue", - }, - Title: titleText, - Body: body, - } - - // Call plugin - err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { - vcs, ok := raw.(shared.VCS) - if !ok { - return fmt.Errorf("invalid VCS plugin type") - } - _, err := vcs.CreateIssue(req) - return err - }) - if err != nil { - lg.Error("failed to create issue via plugin", "error", err, "rule", ruleID, "file", shortPath, "line", line) - return errors.NewCommandError(opts, nil, fmt.Errorf("create issue failed: %w", err), 2) - } - created++ - } + //TODO: get all open github issues + openIssues, err := listOpenIssues(opts) + if err != nil { + return err + } + lg.Info("fetched open issues from repository", "count", len(openIssues)) + + created, err := processSARIFReport(report, opts, lg) + if err != nil { + return err } lg.Info("issues created from SARIF high severity findings", "count", created) @@ -241,6 +132,267 @@ func encodePathSegments(p string) string { return strings.Join(parts, "/") } +// buildSARIFTitle creates a concise issue title for a SARIF result using ruleID and location info. +// It formats as "[SARIF][] at :" or with a range when endLine > line. +func buildSARIFTitle(ruleID, fileURI string, line, endLine int) string { + title := fmt.Sprintf("[SARIF][%s]", ruleID) + if line > 0 { + if endLine > line { + return fmt.Sprintf("%s at %s:%d-%d", title, fileURI, line, endLine) + } + return fmt.Sprintf("%s at %s:%d", title, fileURI, line) + } + return fmt.Sprintf("%s at %s", title, fileURI) +} + +// computeSnippetHash reads the snippet (single line or range) from sourceFolder + fileURI +// and returns its SHA256 hex string. Returns empty string on any error or if inputs are invalid. +func computeSnippetHash(fileURI string, line, endLine int, sourceFolder string) string { + if fileURI == "" || fileURI == "" || line <= 0 || sourceFolder == "" { + return "" + } + absPath := filepath.Join(sourceFolder, filepath.FromSlash(fileURI)) + data, err := os.ReadFile(absPath) + if err != nil { + return "" + } + lines := strings.Split(string(data), "\n") + start := line + end := line + if endLine > line { + end = endLine + } + // Validate bounds (1-based line numbers) + if start < 1 || start > len(lines) { + return "" + } + if end > len(lines) { + end = len(lines) + } + if end < start { + return "" + } + snippet := strings.Join(lines[start-1:end], "\n") + sum := sha256.Sum256([]byte(snippet)) + return fmt.Sprintf("%x", sum[:]) +} + +// getScannerName returns the tool/driver name for a SARIF run when available. +func getScannerName(run *sarif.Run) string { + if run == nil { + return "" + } + if run.Tool.Driver == nil { + return "" + } + if run.Tool.Driver.Name != "" { + return run.Tool.Driver.Name + } + return "" +} + +// OpenIssueReport represents parsed metadata from an open issue body. +type OpenIssueReport struct { + Severity string + Scanner string + FilePath string + StartLine int + EndLine int + Hash string + Description string +} + +// OpenIssueEntry combines parsed metadata from an open issue body with the +// original IssueParams returned by the VCS plugin. The map returned by +// listOpenIssues uses the issue number as key and this struct as value. +type OpenIssueEntry struct { + OpenIssueReport + Params shared.IssueParams +} + +// parseIssueBody attempts to read the body produced by this command and extract +// known metadata lines (Severity, Scanner, File, Line(s), Snippet SHA256, Description). +// Returns an OpenIssueReport with zero values when fields are missing. +func parseIssueBody(body string) OpenIssueReport { + rep := OpenIssueReport{} + for _, line := range strings.Split(body, "\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "Severity:") { + rep.Severity = strings.TrimSpace(strings.TrimPrefix(line, "Severity:")) + continue + } + if strings.HasPrefix(line, "Scanner:") { + rep.Scanner = strings.TrimSpace(strings.TrimPrefix(line, "Scanner:")) + continue + } + if strings.HasPrefix(line, "File:") { + rep.FilePath = strings.TrimSpace(strings.TrimPrefix(line, "File:")) + continue + } + if strings.HasPrefix(line, "Line:") { + v := strings.TrimSpace(strings.TrimPrefix(line, "Line:")) + if n, err := strconv.Atoi(v); err == nil { + rep.StartLine = n + rep.EndLine = n + } + continue + } + if strings.HasPrefix(line, "Lines:") { + v := strings.TrimSpace(strings.TrimPrefix(line, "Lines:")) + parts := strings.Split(v, "-") + if len(parts) == 2 { + if s, err := strconv.Atoi(strings.TrimSpace(parts[0])); err == nil { + rep.StartLine = s + } + if e, err := strconv.Atoi(strings.TrimSpace(parts[1])); err == nil { + rep.EndLine = e + } + } + continue + } + if strings.HasPrefix(line, "Snippet SHA256:") { + rep.Hash = strings.TrimSpace(strings.TrimPrefix(line, "Snippet SHA256:")) + continue + } + // When we hit a non-metadata line and description is empty, assume rest is description + if rep.Description == "" && line != "" { + rep.Description = line + } + } + return rep +} + +// listOpenIssues calls the VCS plugin to list open issues for the configured repo +// and parses their bodies into OpenIssueReport structures. +func listOpenIssues(options RunOptions) (map[int]OpenIssueEntry, error) { + req := shared.VCSListIssuesRequest{ + VCSRequestBase: shared.VCSRequestBase{ + RepoParam: shared.RepositoryParams{ + Namespace: options.Namespace, + Repository: options.Repository, + }, + Action: "listIssues", + }, + State: "open", + } + + var issues []shared.IssueParams + err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { + vcs, ok := raw.(shared.VCS) + if !ok { + return fmt.Errorf("invalid VCS plugin type") + } + list, err := vcs.ListIssues(req) + if err != nil { + return err + } + issues = list + return nil + }) + if err != nil { + return nil, err + } + + reports := make(map[int]OpenIssueEntry, len(issues)) + for _, it := range issues { + rep := parseIssueBody(it.Body) + reports[it.Number] = OpenIssueEntry{ + OpenIssueReport: rep, + Params: it, + } + } + return reports, nil +} + +// processSARIFReport iterates runs/results in the SARIF report and creates VCS issues for +// high severity findings. Returns number of created issues or an error. +func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hclog.Logger) (int, error) { + created := 0 + for _, run := range report.Runs { + for _, res := range run.Results { + level, _ := res.Properties["Level"].(string) + if strings.ToLower(level) != "error" { + continue + } + + ruleID := "" + if res.RuleID != nil { + ruleID = *res.RuleID + } + + fileURI := filepath.ToSlash(extractFileURIFromResult(res, options.SourceFolder)) + if fileURI == "" { + fileURI = "" + } + line, endLine := extractRegionFromResult(res) + + titleText := buildSARIFTitle(ruleID, fileURI, line, endLine) + + desc := getStringProp(res.Properties, "Description") + if desc == "" && res.Message.Text != nil { + desc = *res.Message.Text + } + + permalink := "" + if options.Ref != "" && fileURI != "" && line > 0 { + encodedPath := encodePathSegments(fileURI) + if endLine > line { + permalink = fmt.Sprintf("https://github.com/%s/%s/blob/%s/%s#L%d-L%d", options.Namespace, options.Repository, options.Ref, encodedPath, line, endLine) + } else { + permalink = fmt.Sprintf("https://github.com/%s/%s/blob/%s/%s#L%d", options.Namespace, options.Repository, options.Ref, encodedPath, line) + } + } + + lineInfo := fmt.Sprintf("Line: %d", line) + if endLine > line { + lineInfo = fmt.Sprintf("Lines: %d-%d", line, endLine) + } + + snippetHash := computeSnippetHash(fileURI, line, endLine, options.SourceFolder) + scannerName := getScannerName(run) + + body := fmt.Sprintf("Severity: %s\nRule: %s\nFile: %s\n%s\n", strings.ToUpper(level), ruleID, fileURI, lineInfo) + if scannerName != "" { + body += fmt.Sprintf("Scanner: %s\n", scannerName) + } + if permalink != "" { + body += fmt.Sprintf("Permalink: %s\n", permalink) + } + if snippetHash != "" { + body += fmt.Sprintf("Snippet SHA256: %s\n", snippetHash) + } + body += "\n" + desc + + req := shared.VCSIssueCreationRequest{ + VCSRequestBase: shared.VCSRequestBase{ + RepoParam: shared.RepositoryParams{ + Namespace: options.Namespace, + Repository: options.Repository, + }, + Action: "createIssue", + }, + Title: titleText, + Body: body, + } + + err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { + vcs, ok := raw.(shared.VCS) + if !ok { + return fmt.Errorf("invalid VCS plugin type") + } + _, err := vcs.CreateIssue(req) + return err + }) + if err != nil { + lg.Error("failed to create issue via plugin", "error", err, "rule", ruleID, "file", fileURI, "line", line) + return created, errors.NewCommandError(options, nil, fmt.Errorf("create issue failed: %w", err), 2) + } + created++ + } + } + return created, nil +} + // extractLocationInfo derives a file path (relative when appropriate), start line and end line // from a SARIF result's first location. It mirrors the previous inline logic used in the // command handler. Returns (fileURI, startLine, endLine). diff --git a/pkg/shared/ivcs.go b/pkg/shared/ivcs.go index b491e77d..67a25d38 100644 --- a/pkg/shared/ivcs.go +++ b/pkg/shared/ivcs.go @@ -37,6 +37,7 @@ type PRParams struct { type IssueParams struct { Number int `json:"number"` Title string `json:"title"` + Body string `json:"body,omitempty"` State string `json:"state"` Author User `json:"author"` URL string `json:"url"` diff --git a/plugins/github/github.go b/plugins/github/github.go index 3e89a3b7..169bafb8 100644 --- a/plugins/github/github.go +++ b/plugins/github/github.go @@ -36,45 +36,45 @@ type VCSGithub struct { // UpdateIssue updates an existing GitHub issue's title and/or body. func (g *VCSGithub) UpdateIssue(args shared.VCSIssueUpdateRequest) (bool, error) { - // Basic validation - if args.RepoParam.Namespace == "" || args.RepoParam.Repository == "" { - return false, fmt.Errorf("namespace and repository are required") - } - if args.Number <= 0 { - return false, fmt.Errorf("valid issue number is required") - } - - client, err := g.initializeGithubClient() - if err != nil { - return false, fmt.Errorf("failed to initialize GitHub client: %w", err) - } - - req := &github.IssueRequest{} - if strings.TrimSpace(args.Title) != "" { - req.Title = github.String(args.Title) - } - if strings.TrimSpace(args.Body) != "" { - req.Body = github.String(args.Body) - } - if s := strings.ToLower(strings.TrimSpace(args.State)); s != "" { - switch s { - case "open", "closed": - req.State = github.String(s) - default: - return false, fmt.Errorf("invalid state: %s (allowed: open, closed)", args.State) - } - } - - if req.Title == nil && req.Body == nil && req.State == nil { - return false, fmt.Errorf("nothing to update: provide title, body and/or state") - } - - _, _, err = client.Issues.Edit(context.Background(), args.RepoParam.Namespace, args.RepoParam.Repository, args.Number, req) - if err != nil { - return false, fmt.Errorf("failed to update GitHub issue: %w", err) - } - - return true, nil + // Basic validation + if args.RepoParam.Namespace == "" || args.RepoParam.Repository == "" { + return false, fmt.Errorf("namespace and repository are required") + } + if args.Number <= 0 { + return false, fmt.Errorf("valid issue number is required") + } + + client, err := g.initializeGithubClient() + if err != nil { + return false, fmt.Errorf("failed to initialize GitHub client: %w", err) + } + + req := &github.IssueRequest{} + if strings.TrimSpace(args.Title) != "" { + req.Title = github.String(args.Title) + } + if strings.TrimSpace(args.Body) != "" { + req.Body = github.String(args.Body) + } + if s := strings.ToLower(strings.TrimSpace(args.State)); s != "" { + switch s { + case "open", "closed": + req.State = github.String(s) + default: + return false, fmt.Errorf("invalid state: %s (allowed: open, closed)", args.State) + } + } + + if req.Title == nil && req.Body == nil && req.State == nil { + return false, fmt.Errorf("nothing to update: provide title, body and/or state") + } + + _, _, err = client.Issues.Edit(context.Background(), args.RepoParam.Namespace, args.RepoParam.Repository, args.Number, req) + if err != nil { + return false, fmt.Errorf("failed to update GitHub issue: %w", err) + } + + return true, nil } // newVCSGithub creates a new instance of VCSGithub. @@ -512,13 +512,15 @@ func (g *VCSGithub) ListIssues(args shared.VCSListIssuesRequest) ([]shared.Issue state := strings.ToLower(strings.TrimSpace(args.State)) switch state { case "", "open", "closed", "all": - if state == "" { state = "open" } + if state == "" { + state = "open" + } default: return nil, fmt.Errorf("invalid state: %s (allowed: open, closed, all)", args.State) } opt := &github.IssueListByRepoOptions{ - State: state, + State: state, ListOptions: github.ListOptions{PerPage: 100, Page: 1}, } @@ -528,8 +530,13 @@ func (g *VCSGithub) ListIssues(args shared.VCSListIssuesRequest) ([]shared.Issue if err != nil { return nil, fmt.Errorf("failed to list issues: %w", err) } + if len(issues) > 0 { + g.logger.Debug("first issue", "number", issues[0].GetNumber(), "title", issues[0].GetTitle(), "body", issues[0].GetBody()) + } all = append(all, issues...) - if resp == nil || resp.NextPage == 0 { break } + if resp == nil || resp.NextPage == 0 { + break + } opt.Page = resp.NextPage } diff --git a/plugins/github/utils.go b/plugins/github/utils.go index 7f1e823e..47926dd5 100644 --- a/plugins/github/utils.go +++ b/plugins/github/utils.go @@ -11,10 +11,10 @@ import ( // safeString safely dereferences a string pointer, returning an empty string if the pointer is nil. func safeString(s *string) string { - if s == nil { - return "" - } - return *s + if s == nil { + return "" + } + return *s } // safeInt safely dereferences an int pointer, returning 0 if the pointer is nil. @@ -27,27 +27,28 @@ func safeInt(i *int) int { // safeTime safely dereferences a time pointer, returning 0 if the pointer is nil. func safeTime(t *time.Time) int64 { - if t == nil { - return 0 - } - return t.Unix() + if t == nil { + return 0 + } + return t.Unix() } // convertToIssueParams converts a GitHub Issue object to shared.IssueParams. func convertToIssueParams(iss *github.Issue) shared.IssueParams { - if iss == nil { - return shared.IssueParams{} - } - - return shared.IssueParams{ - Number: safeInt(iss.Number), - Title: safeString(iss.Title), - State: safeString(iss.State), - Author: safeUser(iss.User), - URL: safeString(iss.HTMLURL), - CreatedDate: safeTime(iss.CreatedAt), - UpdatedDate: safeTime(iss.UpdatedAt), - } + if iss == nil { + return shared.IssueParams{} + } + + return shared.IssueParams{ + Number: safeInt(iss.Number), + Title: safeString(iss.Title), + Body: safeString(iss.Body), + State: safeString(iss.State), + Author: safeUser(iss.User), + URL: safeString(iss.HTMLURL), + CreatedDate: safeTime(iss.CreatedAt), + UpdatedDate: safeTime(iss.UpdatedAt), + } } // safeUser converts a GitHub user to a shared.User, handling nil safely. From a7a8afd1312a13b0e54d6f20daf95fc6daa7703e Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Wed, 3 Sep 2025 08:45:02 +0200 Subject: [PATCH 06/52] feat: introduce a Correlator struct to match known and new issues --- pkg/issuecorrelation/correlator.go | 225 ++++++++++++++++++++++++ pkg/issuecorrelation/correlator_test.go | 141 +++++++++++++++ 2 files changed, 366 insertions(+) create mode 100644 pkg/issuecorrelation/correlator.go create mode 100644 pkg/issuecorrelation/correlator_test.go diff --git a/pkg/issuecorrelation/correlator.go b/pkg/issuecorrelation/correlator.go new file mode 100644 index 00000000..07647829 --- /dev/null +++ b/pkg/issuecorrelation/correlator.go @@ -0,0 +1,225 @@ +package issuecorrelation + +// IssueMetadata describes the minimal metadata required to correlate issues. +// Fields: +// - IssueID: optional external identifier, not used by correlation logic. +// - Scanner, RuleID: identify the rule that produced the finding. +// - Filename, StartLine, EndLine: location information inside a file. +// - SnippetHash: optional code snippet/content fingerprint used for stronger matching. +type IssueMetadata struct { + IssueID string // issue id in external system or sequence number in report, just to know what issue it is outside of this module. Not used in correlation processing. + Scanner string + RuleID string + Severity string + Filename string + StartLine int + EndLine int + SnippetHash string +} + +// Match groups a known issue with the new issues that correlate to it. +// Match groups a single known issue with the list of new issues that were +// correlated to it. A new issue may appear in multiple Match.New slices if it +// correlates to multiple known issues. +type Match struct { + Known IssueMetadata + New []IssueMetadata +} + +// Correlator accepts slices of new and known issues and can compute correlations +// between them. Every known issue may match multiple new issues and vice versa. +// Correlator accepts slices of new and known issues and computes correlations +// between them. Use NewCorrelator to create an instance and call Process() to +// compute matches. After processing, use Matches(), UnmatchedNew() and +// UnmatchedKnown() to inspect results. The correlator preserves many-to-many +// relationships: a known issue may match multiple new issues and vice versa. +type Correlator struct { + NewIssues []IssueMetadata + KnownIssues []IssueMetadata + + // internal indexes populated by Process() + knownToNew map[int][]int // known index -> list of new indices + newToKnown map[int][]int // new index -> list of known indices + + processed bool +} + +// NewCorrelator creates a Correlator with the provided issues. +// NewCorrelator constructs a Correlator with the provided slices of new and +// known issues. The correlator is inert until Process() is called. +func NewCorrelator(newIssues, knownIssues []IssueMetadata) *Correlator { + return &Correlator{ + NewIssues: newIssues, + KnownIssues: knownIssues, + } +} + +// Process computes correlations between every known and every new issue. +// Correlation strategy (in order): +// 1) If both issues have a non-empty SnippetHash and they are equal => match. +// 2) If Scanner, RuleID, Filename, StartLine and EndLine are all equal => match. +// 3) Fallback: Scanner, RuleID, Filename and StartLine equal => match. +// Process computes correlations between every known and every new issue using +// four ordered stages. Once a known or new issue has been matched in an +// earlier stage it is excluded from later stages. The stages are: +// 1) scanner+ruleid+filename+startline+endline+snippethash +// 2) scanner+ruleid+filename+snippethash +// 3) scanner+ruleid+filename+startline+endline +// 4) scanner+ruleid+filename+startline +// The results are stored internally and can be retrieved via Matches(), +// UnmatchedNew() and UnmatchedKnown(). Process is idempotent. +func (c *Correlator) Process() { + if c.processed { + return + } + c.knownToNew = make(map[int][]int) + c.newToKnown = make(map[int][]int) + + // matchedBefore tracks indices already matched in earlier stages and + // therefore excluded from later stages. matchedThisStage collects items + // matched during the current stage so that multiple matches within the + // same stage are allowed. + matchedKnown := make(map[int]bool) + matchedNew := make(map[int]bool) + + stages := []int{1, 2, 3, 4} + for _, stage := range stages { + matchedKnownThis := make(map[int]bool) + matchedNewThis := make(map[int]bool) + + for ki, k := range c.KnownIssues { + if matchedKnown[ki] { + continue + } + for ni, n := range c.NewIssues { + if matchedNew[ni] { + continue + } + + if matchStage(k, n, stage) { + c.knownToNew[ki] = append(c.knownToNew[ki], ni) + c.newToKnown[ni] = append(c.newToKnown[ni], ki) + matchedKnownThis[ki] = true + matchedNewThis[ni] = true + } + } + } + + // promote this stage's matches to the global matched sets so they are + // excluded from subsequent stages. + for ki := range matchedKnownThis { + matchedKnown[ki] = true + } + for ni := range matchedNewThis { + matchedNew[ni] = true + } + } + + c.processed = true +} + +// matchStage applies the specified stage matching rules. It returns true when +// the two IssueMetadata values should be considered a match for the given +// stage. The function enforces that Scanner and RuleID are present for all +// stages. +// +// Stage details: +// 1: scanner + ruleid + filename + startline + endline + snippethash +// 2: scanner + ruleid + filename + snippethash +// 3: scanner + ruleid + filename + startline + endline +// 4: scanner + ruleid + filename + startline +func matchStage(a, b IssueMetadata, stage int) bool { + // require scanner and ruleid for all stages + if a.Scanner == "" || b.Scanner == "" || a.RuleID == "" || b.RuleID == "" { + return false + } + + if a.Scanner != b.Scanner { + return false + } + + if a.RuleID != b.RuleID { + return false + } + + if a.Filename != b.Filename { + return false + } + + switch stage { + case 1: + return a.StartLine == b.StartLine && a.EndLine == b.EndLine && a.SnippetHash == b.SnippetHash + case 2: + return a.SnippetHash == b.SnippetHash + case 3: + return a.StartLine == b.StartLine && a.EndLine == b.EndLine + case 4: + return a.StartLine == b.StartLine + default: + return false + } +} + +// UnmatchedNew returns new issues that do not have any correlation to known issues. +// UnmatchedNew returns the subset of new issues that were not correlated to +// any known issue after Process() has been executed. If Process() has not +// yet been run it will be invoked. +func (c *Correlator) UnmatchedNew() []IssueMetadata { + if !c.processed { + c.Process() + } + + var out []IssueMetadata + for ni, n := range c.NewIssues { + if len(c.newToKnown[ni]) == 0 { + out = append(out, n) + } + } + return out +} + +// UnmatchedKnown returns known issues that do not have any correlation to new issues. +// UnmatchedKnown returns the subset of known issues that were not correlated +// to any new issue after Process() has been executed. If Process() has not +// yet been run it will be invoked. +func (c *Correlator) UnmatchedKnown() []IssueMetadata { + if !c.processed { + c.Process() + } + + var out []IssueMetadata + for ki, k := range c.KnownIssues { + if len(c.knownToNew[ki]) == 0 { + out = append(out, k) + } + } + return out +} + +// Matches returns a slice of Match entries. Each Match contains one known issue +// and the list of new issues that were correlated to it. A new issue that +// matches multiple known issues will appear under each matching known issue. +// Matches returns a slice of Match entries describing each known issue that +// had at least one correlated new issue. Each Match contains the known issue +// and the list of new issues correlated to it. If Process() has not been run +// it will be invoked. +func (c *Correlator) Matches() []Match { + if !c.processed { + c.Process() + } + + var out []Match + for ki, newIdxs := range c.knownToNew { + if len(newIdxs) == 0 { + continue + } + m := Match{Known: c.KnownIssues[ki], New: make([]IssueMetadata, 0, len(newIdxs))} + for _, ni := range newIdxs { + if ni >= 0 && ni < len(c.NewIssues) { + m.New = append(m.New, c.NewIssues[ni]) + } + } + out = append(out, m) + } + return out +} diff --git a/pkg/issuecorrelation/correlator_test.go b/pkg/issuecorrelation/correlator_test.go new file mode 100644 index 00000000..e0cc018d --- /dev/null +++ b/pkg/issuecorrelation/correlator_test.go @@ -0,0 +1,141 @@ +package issuecorrelation + +import "testing" + +func TestCorrelator_SnippetHashMatch(t *testing.T) { + known := []IssueMetadata{{Scanner: "s1", RuleID: "R1", SnippetHash: "h1"}} + new := []IssueMetadata{{Scanner: "s1", RuleID: "R1", SnippetHash: "h1"}} + + c := NewCorrelator(new, known) + c.Process() + + matches := c.Matches() + if len(matches) != 1 { + t.Fatalf("expected 1 match got %d", len(matches)) + } + if len(matches[0].New) != 1 { + t.Fatalf("expected 1 new in match got %d", len(matches[0].New)) + } + + if got := len(c.UnmatchedNew()); got != 0 { + t.Fatalf("expected 0 unmatched new, got %d", got) + } + if got := len(c.UnmatchedKnown()); got != 0 { + t.Fatalf("expected 0 unmatched known, got %d", got) + } +} + +func TestCorrelator_LineAndRuleMatch(t *testing.T) { + known := []IssueMetadata{{Scanner: "s2", RuleID: "R2", Filename: "f.go", StartLine: 10, EndLine: 12}} + new := []IssueMetadata{{Scanner: "s2", RuleID: "R2", Filename: "f.go", StartLine: 10, EndLine: 12}} + + c := NewCorrelator(new, known) + c.Process() + if len(c.Matches()) != 1 { + t.Fatalf("expected match by lines/rule") + } +} + +func TestCorrelator_Unmatched(t *testing.T) { + known := []IssueMetadata{{Scanner: "s3", RuleID: "R3", Filename: "x.go", StartLine: 1}} + new := []IssueMetadata{{Scanner: "s4", RuleID: "R4", Filename: "y.go", StartLine: 2}} + + c := NewCorrelator(new, known) + c.Process() + + if len(c.UnmatchedNew()) != 1 { + t.Fatalf("expected 1 unmatched new") + } + if len(c.UnmatchedKnown()) != 1 { + t.Fatalf("expected 1 unmatched known") + } + if len(c.Matches()) != 0 { + t.Fatalf("expected 0 matches") + } +} + +func TestCorrelator_SameExceptLines(t *testing.T) { + // known and new have identical scanner, ruleid, filename and snippethash + // but different start and end lines -> should match at stage 2 + known := []IssueMetadata{{Scanner: "s5", RuleID: "R5", Filename: "g.go", StartLine: 10, EndLine: 12, SnippetHash: "sh5"}} + new := []IssueMetadata{{Scanner: "s5", RuleID: "R5", Filename: "g.go", StartLine: 20, EndLine: 22, SnippetHash: "sh5"}} + + c := NewCorrelator(new, known) + c.Process() + + matches := c.Matches() + if len(matches) != 1 { + t.Fatalf("expected 1 match for same metadata except lines, got %d", len(matches)) + } + if len(matches[0].New) != 1 { + t.Fatalf("expected 1 new in match got %d", len(matches[0].New)) + } + if got := len(c.UnmatchedNew()); got != 0 { + t.Fatalf("expected 0 unmatched new, got %d", got) + } + if got := len(c.UnmatchedKnown()); got != 0 { + t.Fatalf("expected 0 unmatched known, got %d", got) + } +} + +func TestCorrelator_KnownPlusSimilarNews(t *testing.T) { + // One known issue. New issues include: + // - an exact issue (same scanner, ruleid, filename, start/end and snippethash) + // - a similar issue (same scanner, ruleid, filename, snippethash but different lines) + // Because stage 1 runs before stage 2 and matches are excluded from later + // stages, the known issue should match only the exact new issue and the + // similar one should remain unmatched. + known := []IssueMetadata{{Scanner: "sx", RuleID: "Rx", Filename: "h.go", StartLine: 5, EndLine: 7, SnippetHash: "shx"}} + new := []IssueMetadata{ + {Scanner: "sx", RuleID: "Rx", Filename: "h.go", StartLine: 5, EndLine: 7, SnippetHash: "shx"}, + {Scanner: "sx", RuleID: "Rx", Filename: "h.go", StartLine: 50, EndLine: 52, SnippetHash: "shx"}, + } + + c := NewCorrelator(new, known) + c.Process() + + matches := c.Matches() + if len(matches) != 1 { + t.Fatalf("expected 1 match for known issue, got %d", len(matches)) + } + if len(matches[0].New) != 1 { + t.Fatalf("expected known to match only the exact new issue, got %d", len(matches[0].New)) + } + + // the similar new issue should be unmatched + unmatchedNew := c.UnmatchedNew() + if len(unmatchedNew) != 1 { + t.Fatalf("expected 1 unmatched new issue (the similar one), got %d", len(unmatchedNew)) + } + if len(c.UnmatchedKnown()) != 0 { + t.Fatalf("expected 0 unmatched known issues, got %d", len(c.UnmatchedKnown())) + } +} + +func TestCorrelator_TwoPairsShiftedLines(t *testing.T) { + // Two known issues. New issues are the same two but with start/end lines + // shifted by +10 and identical SnippetHash. They should match via stage 2. + known := []IssueMetadata{ + {Scanner: "sa", RuleID: "Ra", Filename: "p.go", StartLine: 1, EndLine: 3, SnippetHash: "sha"}, + {Scanner: "sb", RuleID: "Rb", Filename: "q.go", StartLine: 5, EndLine: 8, SnippetHash: "shb"}, + } + new := []IssueMetadata{ + {Scanner: "sa", RuleID: "Ra", Filename: "p.go", StartLine: 11, EndLine: 13, SnippetHash: "sha"}, + {Scanner: "sb", RuleID: "Rb", Filename: "q.go", StartLine: 15, EndLine: 18, SnippetHash: "shb"}, + } + + c := NewCorrelator(new, known) + c.Process() + + matches := c.Matches() + if len(matches) != 2 { + t.Fatalf("expected 2 matches for the two pairs, got %d", len(matches)) + } + + if len(c.UnmatchedNew()) != 0 { + t.Fatalf("expected 0 unmatched new issues, got %d", len(c.UnmatchedNew())) + } + if len(c.UnmatchedKnown()) != 0 { + t.Fatalf("expected 0 unmatched known issues, got %d", len(c.UnmatchedKnown())) + } +} From 8a8483d8319a882139f1667e251d05925393c3b3 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Thu, 4 Sep 2025 08:42:59 +0200 Subject: [PATCH 07/52] feat(create-issues-from-sarif): managed opening, correlation and closure of github issues --- .../create-issues-from-sarif.go | 293 ++++++++++++------ 1 file changed, 205 insertions(+), 88 deletions(-) diff --git a/cmd/create-issues-from-sarif/create-issues-from-sarif.go b/cmd/create-issues-from-sarif/create-issues-from-sarif.go index 84fcc53b..badbcca5 100644 --- a/cmd/create-issues-from-sarif/create-issues-from-sarif.go +++ b/cmd/create-issues-from-sarif/create-issues-from-sarif.go @@ -3,7 +3,6 @@ package createissuesfromsarif import ( "crypto/sha256" "fmt" - "net/url" "os" "path/filepath" "strconv" @@ -14,12 +13,19 @@ import ( hclog "github.com/hashicorp/go-hclog" "github.com/owenrumney/go-sarif/v2/sarif" internalsarif "github.com/scan-io-git/scan-io/internal/sarif" + "github.com/scan-io-git/scan-io/internal/git" + issuecorrelation "github.com/scan-io-git/scan-io/pkg/issuecorrelation" "github.com/scan-io-git/scan-io/pkg/shared" "github.com/scan-io-git/scan-io/pkg/shared/config" "github.com/scan-io-git/scan-io/pkg/shared/errors" "github.com/scan-io-git/scan-io/pkg/shared/logger" ) +// scanioManagedAnnotation is appended to issue bodies created by this command +// and is required for correlation/auto-closure to consider an issue +// managed by automation. +const scanioManagedAnnotation = "> This issue was created and will be managed by scanio automation. Don't change body manually for proper processing, unless you know what you do" + // RunOptions holds flags for the create-issues-from-sarif command. type RunOptions struct { Namespace string `json:"namespace,omitempty"` @@ -63,7 +69,7 @@ var ( report.EnrichResultsTitleProperty() // No need to enrich locations here; we'll compute file path from URI directly - //TODO: get all open github issues + // get all open github issues openIssues, err := listOpenIssues(opts) if err != nil { return err @@ -120,29 +126,22 @@ func getStringProp(m map[string]interface{}, key string) string { return "" } -// encodePathSegments safely encodes each path segment without encoding slashes -func encodePathSegments(p string) string { - if p == "" { - return "" - } - parts := strings.Split(p, "/") - for i, seg := range parts { - parts[i] = url.PathEscape(seg) - } - return strings.Join(parts, "/") -} - -// buildSARIFTitle creates a concise issue title for a SARIF result using ruleID and location info. -// It formats as "[SARIF][] at :" or with a range when endLine > line. -func buildSARIFTitle(ruleID, fileURI string, line, endLine int) string { - title := fmt.Sprintf("[SARIF][%s]", ruleID) - if line > 0 { - if endLine > line { - return fmt.Sprintf("%s at %s:%d-%d", title, fileURI, line, endLine) - } - return fmt.Sprintf("%s at %s:%d", title, fileURI, line) - } - return fmt.Sprintf("%s at %s", title, fileURI) +// buildIssueTitle creates a concise issue title using scanner name (fallback to SARIF), +// ruleID and location info. It formats as "[][] at :" +// or with a range when endLine > line. +func buildIssueTitle(scannerName, ruleID, fileURI string, line, endLine int) string { + label := strings.TrimSpace(scannerName) + if label == "" { + label = "SARIF" + } + title := fmt.Sprintf("[%s][%s]", label, ruleID) + if line > 0 { + if endLine > line { + return fmt.Sprintf("%s at %s:%d-%d", title, fileURI, line, endLine) + } + return fmt.Sprintf("%s at %s:%d", title, fileURI, line) + } + return fmt.Sprintf("%s at %s", title, fileURI) } // computeSnippetHash reads the snippet (single line or range) from sourceFolder + fileURI @@ -191,10 +190,41 @@ func getScannerName(run *sarif.Run) string { return "" } +// buildGitHubPermalink builds a permalink to a file and region in GitHub. +// It prefers the CLI --ref when provided; otherwise attempts to read the +// current commit hash from --source-folder using git metadata. When neither +// is available, returns an empty string. +func buildGitHubPermalink(options RunOptions, fileURI string, start, end int) string { + base := fmt.Sprintf("https://github.com/%s/%s", options.Namespace, options.Repository) + ref := strings.TrimSpace(options.Ref) + + if ref == "" && options.SourceFolder != "" { + if md, err := git.CollectRepositoryMetadata(options.SourceFolder); err == nil && md.CommitHash != nil && *md.CommitHash != "" { + ref = *md.CommitHash + } + } + + if ref == "" || fileURI == "" || fileURI == "" { + return "" + } + + path := filepath.ToSlash(fileURI) + anchor := "" + if start > 0 { + anchor = fmt.Sprintf("#L%d", start) + if end > start { + anchor = fmt.Sprintf("%s-L%d", anchor, end) + } + } + + return fmt.Sprintf("%s/blob/%s/%s%s", base, ref, path, anchor) +} + // OpenIssueReport represents parsed metadata from an open issue body. type OpenIssueReport struct { Severity string Scanner string + RuleID string FilePath string StartLine int EndLine int @@ -225,6 +255,10 @@ func parseIssueBody(body string) OpenIssueReport { rep.Scanner = strings.TrimSpace(strings.TrimPrefix(line, "Scanner:")) continue } + if strings.HasPrefix(line, "Rule:") { + rep.RuleID = strings.TrimSpace(strings.TrimPrefix(line, "Rule:")) + continue + } if strings.HasPrefix(line, "File:") { rep.FilePath = strings.TrimSpace(strings.TrimPrefix(line, "File:")) continue @@ -307,7 +341,12 @@ func listOpenIssues(options RunOptions) (map[int]OpenIssueEntry, error) { // processSARIFReport iterates runs/results in the SARIF report and creates VCS issues for // high severity findings. Returns number of created issues or an error. func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hclog.Logger) (int, error) { - created := 0 + // Build list of new issues from SARIF (only high severity -> "error"). + newIssues := make([]issuecorrelation.IssueMetadata, 0) + // Also keep parallel arrays of the text bodies and titles so we can create issues later. + newBodies := make([]string, 0) + newTitles := make([]string, 0) + for _, run := range report.Runs { for _, res := range run.Results { level, _ := res.Properties["Level"].(string) @@ -326,70 +365,168 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl } line, endLine := extractRegionFromResult(res) - titleText := buildSARIFTitle(ruleID, fileURI, line, endLine) - desc := getStringProp(res.Properties, "Description") if desc == "" && res.Message.Text != nil { desc = *res.Message.Text } - permalink := "" - if options.Ref != "" && fileURI != "" && line > 0 { - encodedPath := encodePathSegments(fileURI) - if endLine > line { - permalink = fmt.Sprintf("https://github.com/%s/%s/blob/%s/%s#L%d-L%d", options.Namespace, options.Repository, options.Ref, encodedPath, line, endLine) - } else { - permalink = fmt.Sprintf("https://github.com/%s/%s/blob/%s/%s#L%d", options.Namespace, options.Repository, options.Ref, encodedPath, line) - } - } + snippetHash := computeSnippetHash(fileURI, line, endLine, options.SourceFolder) + scannerName := getScannerName(run) + // build body and title with scanner name label + titleText := buildIssueTitle(scannerName, ruleID, fileURI, line, endLine) lineInfo := fmt.Sprintf("Line: %d", line) if endLine > line { lineInfo = fmt.Sprintf("Lines: %d-%d", line, endLine) } - snippetHash := computeSnippetHash(fileURI, line, endLine, options.SourceFolder) - scannerName := getScannerName(run) - body := fmt.Sprintf("Severity: %s\nRule: %s\nFile: %s\n%s\n", strings.ToUpper(level), ruleID, fileURI, lineInfo) if scannerName != "" { body += fmt.Sprintf("Scanner: %s\n", scannerName) } - if permalink != "" { - body += fmt.Sprintf("Permalink: %s\n", permalink) - } - if snippetHash != "" { - body += fmt.Sprintf("Snippet SHA256: %s\n", snippetHash) + if snippetHash != "" { + body += fmt.Sprintf("Snippet SHA256: %s\n", snippetHash) + } + if link := buildGitHubPermalink(options, fileURI, line, endLine); link != "" { + body += fmt.Sprintf("Permalink: %s\n", link) + } + body += "\n" + desc + "\n\n" + scanioManagedAnnotation + + newIssues = append(newIssues, issuecorrelation.IssueMetadata{ + IssueID: "", + Scanner: scannerName, + RuleID: ruleID, + Severity: level, + Filename: fileURI, + StartLine: line, + EndLine: endLine, + SnippetHash: snippetHash, + }) + newBodies = append(newBodies, body) + newTitles = append(newTitles, titleText) + } + } + + // Build list of known issues (open issues fetched previously by caller via listOpenIssues) + openIssues, err := listOpenIssues(options) + if err != nil { + return 0, err + } + + knownIssues := make([]issuecorrelation.IssueMetadata, 0, len(openIssues)) + for num, entry := range openIssues { + rep := entry.OpenIssueReport + // Only include well-structured issues for automatic closure. + // If an open issue doesn't include basic metadata we skip it so + // we don't accidentally close unrelated or free-form issues. + if rep.Scanner == "" || rep.RuleID == "" || rep.FilePath == "" { + lg.Debug("skipping malformed open issue (won't be auto-closed)", "number", num) + continue + } + + // Only consider issues that contain the scanio-managed annotation. + // If the annotation is absent, treat the issue as manually managed and + // exclude it from correlation/auto-closure logic. + if !strings.Contains(entry.Params.Body, scanioManagedAnnotation) { + lg.Debug("skipping non-scanio-managed issue (won't be auto-closed)", "number", num) + continue + } + knownIssues = append(knownIssues, issuecorrelation.IssueMetadata{ + IssueID: fmt.Sprintf("%d", num), + Scanner: rep.Scanner, + RuleID: rep.RuleID, + Severity: rep.Severity, + Filename: rep.FilePath, + StartLine: rep.StartLine, + EndLine: rep.EndLine, + SnippetHash: rep.Hash, + }) + } + + // correlate + corr := issuecorrelation.NewCorrelator(newIssues, knownIssues) + corr.Process() + + // Create only unmatched new issues + unmatchedNew := corr.UnmatchedNew() + created := 0 + for _, u := range unmatchedNew { + // find corresponding index in newIssues to retrieve body/title + var idx int = -1 + for ni, n := range newIssues { + if n == u { + idx = ni + break } - body += "\n" + desc - - req := shared.VCSIssueCreationRequest{ - VCSRequestBase: shared.VCSRequestBase{ - RepoParam: shared.RepositoryParams{ - Namespace: options.Namespace, - Repository: options.Repository, - }, - Action: "createIssue", + } + if idx == -1 { + // shouldn't happen + continue + } + + req := shared.VCSIssueCreationRequest{ + VCSRequestBase: shared.VCSRequestBase{ + RepoParam: shared.RepositoryParams{ + Namespace: options.Namespace, + Repository: options.Repository, }, - Title: titleText, - Body: body, + Action: "createIssue", + }, + Title: newTitles[idx], + Body: newBodies[idx], + } + + err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { + vcs, ok := raw.(shared.VCS) + if !ok { + return fmt.Errorf("invalid VCS plugin type") } + _, err := vcs.CreateIssue(req) + return err + }) + if err != nil { + lg.Error("failed to create issue via plugin", "error", err, "file", u.Filename, "line", u.StartLine) + return created, errors.NewCommandError(options, nil, fmt.Errorf("create issue failed: %w", err), 2) + } + created++ + } - err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { - vcs, ok := raw.(shared.VCS) - if !ok { - return fmt.Errorf("invalid VCS plugin type") - } - _, err := vcs.CreateIssue(req) - return err - }) - if err != nil { - lg.Error("failed to create issue via plugin", "error", err, "rule", ruleID, "file", fileURI, "line", line) - return created, errors.NewCommandError(options, nil, fmt.Errorf("create issue failed: %w", err), 2) + // Close unmatched known issues (open issues that did not correlate) + unmatchedKnown := corr.UnmatchedKnown() + for _, k := range unmatchedKnown { + // known IssueID contains the number as string + num, err := strconv.Atoi(k.IssueID) + if err != nil { + // skip if we can't parse number + continue + } + upd := shared.VCSIssueUpdateRequest{ + VCSRequestBase: shared.VCSRequestBase{ + RepoParam: shared.RepositoryParams{ + Namespace: options.Namespace, + Repository: options.Repository, + }, + Action: "updateIssue", + }, + Number: num, + State: "closed", + } + + err = shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { + vcs, ok := raw.(shared.VCS) + if !ok { + return fmt.Errorf("invalid VCS plugin type") } - created++ + _, err := vcs.UpdateIssue(upd) + return err + }) + if err != nil { + lg.Error("failed to close issue via plugin", "error", err, "number", num) + // continue closing others but report an error at end + return created, errors.NewCommandError(options, nil, fmt.Errorf("close issue failed: %w", err), 2) } } + return created, nil } @@ -468,23 +605,3 @@ func extractRegionFromResult(res *sarif.Result) (int, int) { } return start, end } - -// getRuleFullDescription returns the human-readable description for a rule from the run's rules table. -// It prefers rule.FullDescription.Text, falls back to rule.ShortDescription.Text, otherwise empty string. -func getRuleFullDescription(run *sarif.Run, ruleID string) string { - if run == nil || run.Tool.Driver == nil { - return "" - } - for _, rule := range run.Tool.Driver.Rules { - if rule == nil { - continue - } - if rule.ID == ruleID { - if rule.FullDescription != nil && rule.FullDescription.Text != nil && *rule.FullDescription.Text != "" { - return *rule.FullDescription.Text - } - return "" - } - } - return "" -} From 5c32c0950e610e3dc2289554e2d3421ba467e786 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Thu, 4 Sep 2025 08:51:49 +0200 Subject: [PATCH 08/52] chore: remove github copilot instructions --- .github/copilot-instructions.md | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 .github/copilot-instructions.md diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md deleted file mode 100644 index aa1118c4..00000000 --- a/.github/copilot-instructions.md +++ /dev/null @@ -1,11 +0,0 @@ -# AGENTS.md - -## Commands -- Build cli with: `make build-cli` -- Build plugins with: `make build-plugins` -- Build everything with: `make build` -- Test with: `make test` -- Use `go fmt` for formatting - -## Code style -- Use early returns when handling errors or special cases to reduce nesting and improve readability. From cf1076f46d39f2b05d525485a5286b96c5071d54 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Fri, 5 Sep 2025 07:52:53 +0200 Subject: [PATCH 09/52] add git to dockerfile runtime stage --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 06da677c..de48b003 100644 --- a/Dockerfile +++ b/Dockerfile @@ -61,6 +61,7 @@ RUN set -euxo pipefail && \ openssl \ ca-certificates \ curl \ + git \ musl-dev && \ PLUGIN_VENVS_DIR="/opt/venvs" && \ mkdir -p "$PLUGIN_VENVS_DIR" && \ From 9baa3e6e6f031f2dee17b6ef7d96ddb36458af59 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Fri, 5 Sep 2025 08:03:47 +0200 Subject: [PATCH 10/52] move git from .build-deps --- Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index de48b003..3c9cc487 100644 --- a/Dockerfile +++ b/Dockerfile @@ -53,7 +53,7 @@ RUN set -euxo pipefail && \ echo "Building dependencies for '$TARGETOS/$TARGETARCH'" && \ apk update && \ apk upgrade && \ - apk add --no-cache bash python3 py3-pip openssh && \ + apk add --no-cache bash python3 py3-pip openssh git && \ apk add --no-cache --virtual .build-deps \ jq \ libc6-compat \ @@ -61,7 +61,6 @@ RUN set -euxo pipefail && \ openssl \ ca-certificates \ curl \ - git \ musl-dev && \ PLUGIN_VENVS_DIR="/opt/venvs" && \ mkdir -p "$PLUGIN_VENVS_DIR" && \ From 109c97594e916fa6f4b4cd47e658c953bc2a2347 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Fri, 5 Sep 2025 08:47:23 +0200 Subject: [PATCH 11/52] feat: crerate-issues-from-sarif takes fallback namepsace, repository and ref values from env vars --- .../create-issues-from-sarif.go | 126 +++++++++++------- 1 file changed, 76 insertions(+), 50 deletions(-) diff --git a/cmd/create-issues-from-sarif/create-issues-from-sarif.go b/cmd/create-issues-from-sarif/create-issues-from-sarif.go index badbcca5..d3c8b260 100644 --- a/cmd/create-issues-from-sarif/create-issues-from-sarif.go +++ b/cmd/create-issues-from-sarif/create-issues-from-sarif.go @@ -12,8 +12,8 @@ import ( hclog "github.com/hashicorp/go-hclog" "github.com/owenrumney/go-sarif/v2/sarif" - internalsarif "github.com/scan-io-git/scan-io/internal/sarif" "github.com/scan-io-git/scan-io/internal/git" + internalsarif "github.com/scan-io-git/scan-io/internal/sarif" issuecorrelation "github.com/scan-io-git/scan-io/pkg/issuecorrelation" "github.com/scan-io-git/scan-io/pkg/shared" "github.com/scan-io-git/scan-io/pkg/shared/config" @@ -41,9 +41,9 @@ var ( // CreateIssuesFromSarifCmd represents the command to create GitHub issues from a SARIF file. CreateIssuesFromSarifCmd = &cobra.Command{ - Use: "create-issues-from-sarif --namespace NAMESPACE --repository REPO --sarif PATH [--source-folder PATH] [--ref REF]", + Use: "create-issues-from-sarif --sarif PATH [--namespace NAMESPACE] [--repository REPO] [--source-folder PATH] [--ref REF]", Short: "Create GitHub issues for high severity SARIF findings", - Example: "scanio create-issues-from-sarif --namespace org --repository repo --sarif /path/to/report.sarif", + Example: "scanio create-issues-from-sarif --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif", SilenceUsage: true, Hidden: true, DisableFlagsInUseLine: true, @@ -52,6 +52,32 @@ var ( return cmd.Help() } + // Fallback: if --namespace not provided, try $GITHUB_REPOSITORY_OWNER + if strings.TrimSpace(opts.Namespace) == "" { + if ns := strings.TrimSpace(os.Getenv("GITHUB_REPOSITORY_OWNER")); ns != "" { + opts.Namespace = ns + } + } + + // Fallback: if --repository not provided, try ${GITHUB_REPOSITORY#*/} + if strings.TrimSpace(opts.Repository) == "" { + if gr := strings.TrimSpace(os.Getenv("GITHUB_REPOSITORY")); gr != "" { + if idx := strings.Index(gr, "/"); idx >= 0 && idx < len(gr)-1 { + opts.Repository = gr[idx+1:] + } else { + // No slash present; fall back to the whole value + opts.Repository = gr + } + } + } + + // Fallback: if --ref not provided, try $GITHUB_SHA + if strings.TrimSpace(opts.Ref) == "" { + if sha := strings.TrimSpace(os.Getenv("GITHUB_SHA")); sha != "" { + opts.Ref = sha + } + } + if err := validate(&opts); err != nil { return errors.NewCommandError(opts, nil, err, 1) } @@ -92,11 +118,11 @@ var ( func Init(cfg *config.Config) { AppConfig = cfg } func init() { - CreateIssuesFromSarifCmd.Flags().StringVar(&opts.Namespace, "namespace", "", "GitHub org/user") - CreateIssuesFromSarifCmd.Flags().StringVar(&opts.Repository, "repository", "", "Repository name") + CreateIssuesFromSarifCmd.Flags().StringVar(&opts.Namespace, "namespace", "", "GitHub org/user (defaults to $GITHUB_REPOSITORY_OWNER when unset)") + CreateIssuesFromSarifCmd.Flags().StringVar(&opts.Repository, "repository", "", "Repository name (defaults to ${GITHUB_REPOSITORY#*/} when unset)") CreateIssuesFromSarifCmd.Flags().StringVar(&opts.SarifPath, "sarif", "", "Path to SARIF file") CreateIssuesFromSarifCmd.Flags().StringVar(&opts.SourceFolder, "source-folder", "", "Optional: source folder to improve file path resolution in SARIF (used for absolute paths)") - CreateIssuesFromSarifCmd.Flags().StringVar(&opts.Ref, "ref", "", "Git ref (branch or commit SHA) to build a permalink to the vulnerable code") + CreateIssuesFromSarifCmd.Flags().StringVar(&opts.Ref, "ref", "", "Git ref (branch or commit SHA) to build a permalink to the vulnerable code (defaults to $GITHUB_SHA when unset)") CreateIssuesFromSarifCmd.Flags().BoolP("help", "h", false, "Show help for create-issues-from-sarif command.") } @@ -130,18 +156,18 @@ func getStringProp(m map[string]interface{}, key string) string { // ruleID and location info. It formats as "[][] at :" // or with a range when endLine > line. func buildIssueTitle(scannerName, ruleID, fileURI string, line, endLine int) string { - label := strings.TrimSpace(scannerName) - if label == "" { - label = "SARIF" - } - title := fmt.Sprintf("[%s][%s]", label, ruleID) - if line > 0 { - if endLine > line { - return fmt.Sprintf("%s at %s:%d-%d", title, fileURI, line, endLine) - } - return fmt.Sprintf("%s at %s:%d", title, fileURI, line) - } - return fmt.Sprintf("%s at %s", title, fileURI) + label := strings.TrimSpace(scannerName) + if label == "" { + label = "SARIF" + } + title := fmt.Sprintf("[%s][%s]", label, ruleID) + if line > 0 { + if endLine > line { + return fmt.Sprintf("%s at %s:%d-%d", title, fileURI, line, endLine) + } + return fmt.Sprintf("%s at %s:%d", title, fileURI, line) + } + return fmt.Sprintf("%s at %s", title, fileURI) } // computeSnippetHash reads the snippet (single line or range) from sourceFolder + fileURI @@ -195,29 +221,29 @@ func getScannerName(run *sarif.Run) string { // current commit hash from --source-folder using git metadata. When neither // is available, returns an empty string. func buildGitHubPermalink(options RunOptions, fileURI string, start, end int) string { - base := fmt.Sprintf("https://github.com/%s/%s", options.Namespace, options.Repository) - ref := strings.TrimSpace(options.Ref) - - if ref == "" && options.SourceFolder != "" { - if md, err := git.CollectRepositoryMetadata(options.SourceFolder); err == nil && md.CommitHash != nil && *md.CommitHash != "" { - ref = *md.CommitHash - } - } - - if ref == "" || fileURI == "" || fileURI == "" { - return "" - } - - path := filepath.ToSlash(fileURI) - anchor := "" - if start > 0 { - anchor = fmt.Sprintf("#L%d", start) - if end > start { - anchor = fmt.Sprintf("%s-L%d", anchor, end) - } - } - - return fmt.Sprintf("%s/blob/%s/%s%s", base, ref, path, anchor) + base := fmt.Sprintf("https://github.com/%s/%s", options.Namespace, options.Repository) + ref := strings.TrimSpace(options.Ref) + + if ref == "" && options.SourceFolder != "" { + if md, err := git.CollectRepositoryMetadata(options.SourceFolder); err == nil && md.CommitHash != nil && *md.CommitHash != "" { + ref = *md.CommitHash + } + } + + if ref == "" || fileURI == "" || fileURI == "" { + return "" + } + + path := filepath.ToSlash(fileURI) + anchor := "" + if start > 0 { + anchor = fmt.Sprintf("#L%d", start) + if end > start { + anchor = fmt.Sprintf("%s-L%d", anchor, end) + } + } + + return fmt.Sprintf("%s/blob/%s/%s%s", base, ref, path, anchor) } // OpenIssueReport represents parsed metadata from an open issue body. @@ -373,8 +399,8 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl snippetHash := computeSnippetHash(fileURI, line, endLine, options.SourceFolder) scannerName := getScannerName(run) - // build body and title with scanner name label - titleText := buildIssueTitle(scannerName, ruleID, fileURI, line, endLine) + // build body and title with scanner name label + titleText := buildIssueTitle(scannerName, ruleID, fileURI, line, endLine) lineInfo := fmt.Sprintf("Line: %d", line) if endLine > line { lineInfo = fmt.Sprintf("Lines: %d-%d", line, endLine) @@ -384,13 +410,13 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl if scannerName != "" { body += fmt.Sprintf("Scanner: %s\n", scannerName) } - if snippetHash != "" { - body += fmt.Sprintf("Snippet SHA256: %s\n", snippetHash) - } - if link := buildGitHubPermalink(options, fileURI, line, endLine); link != "" { - body += fmt.Sprintf("Permalink: %s\n", link) - } - body += "\n" + desc + "\n\n" + scanioManagedAnnotation + if snippetHash != "" { + body += fmt.Sprintf("Snippet SHA256: %s\n", snippetHash) + } + if link := buildGitHubPermalink(options, fileURI, line, endLine); link != "" { + body += fmt.Sprintf("Permalink: %s\n", link) + } + body += "\n" + desc + "\n\n" + scanioManagedAnnotation newIssues = append(newIssues, issuecorrelation.IssueMetadata{ IssueID: "", From 5d547d3bf9a7119e1e4d6554aff375acc0540848 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Tue, 16 Sep 2025 20:56:12 +0200 Subject: [PATCH 12/52] feat: add labels and assignees support --- .../create-issues-from-sarif.go | 28 ++++++++++++------- pkg/shared/ivcs.go | 6 ++++ plugins/github/github.go | 13 +++++++-- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/cmd/create-issues-from-sarif/create-issues-from-sarif.go b/cmd/create-issues-from-sarif/create-issues-from-sarif.go index d3c8b260..9d4e810e 100644 --- a/cmd/create-issues-from-sarif/create-issues-from-sarif.go +++ b/cmd/create-issues-from-sarif/create-issues-from-sarif.go @@ -24,15 +24,17 @@ import ( // scanioManagedAnnotation is appended to issue bodies created by this command // and is required for correlation/auto-closure to consider an issue // managed by automation. -const scanioManagedAnnotation = "> This issue was created and will be managed by scanio automation. Don't change body manually for proper processing, unless you know what you do" +const scanioManagedAnnotation = "> [!NOTE]\n> This issue was created and will be managed by scanio automation. Don't change body manually for proper processing, unless you know what you do" // RunOptions holds flags for the create-issues-from-sarif command. type RunOptions struct { - Namespace string `json:"namespace,omitempty"` - Repository string `json:"repository,omitempty"` - SarifPath string `json:"sarif_path,omitempty"` - SourceFolder string `json:"source_folder,omitempty"` - Ref string `json:"ref,omitempty"` + Namespace string `json:"namespace,omitempty"` + Repository string `json:"repository,omitempty"` + SarifPath string `json:"sarif_path,omitempty"` + SourceFolder string `json:"source_folder,omitempty"` + Ref string `json:"ref,omitempty"` + Labels []string `json:"labels,omitempty"` + Assignees []string `json:"assignees,omitempty"` } var ( @@ -41,9 +43,9 @@ var ( // CreateIssuesFromSarifCmd represents the command to create GitHub issues from a SARIF file. CreateIssuesFromSarifCmd = &cobra.Command{ - Use: "create-issues-from-sarif --sarif PATH [--namespace NAMESPACE] [--repository REPO] [--source-folder PATH] [--ref REF]", + Use: "create-issues-from-sarif --sarif PATH [--namespace NAMESPACE] [--repository REPO] [--source-folder PATH] [--ref REF] [--labels label[,label...]] [--assignees user[,user...]]", Short: "Create GitHub issues for high severity SARIF findings", - Example: "scanio create-issues-from-sarif --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif", + Example: "scanio create-issues-from-sarif --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --labels bug,security --assignees alice,bob", SilenceUsage: true, Hidden: true, DisableFlagsInUseLine: true, @@ -123,6 +125,10 @@ func init() { CreateIssuesFromSarifCmd.Flags().StringVar(&opts.SarifPath, "sarif", "", "Path to SARIF file") CreateIssuesFromSarifCmd.Flags().StringVar(&opts.SourceFolder, "source-folder", "", "Optional: source folder to improve file path resolution in SARIF (used for absolute paths)") CreateIssuesFromSarifCmd.Flags().StringVar(&opts.Ref, "ref", "", "Git ref (branch or commit SHA) to build a permalink to the vulnerable code (defaults to $GITHUB_SHA when unset)") + // --labels supports multiple usages (e.g., --labels bug --labels security) or comma-separated values + CreateIssuesFromSarifCmd.Flags().StringSliceVar(&opts.Labels, "labels", nil, "Optional: labels to assign to created GitHub issues (repeat flag or use comma-separated values)") + // --assignees supports multiple usages or comma-separated values + CreateIssuesFromSarifCmd.Flags().StringSliceVar(&opts.Assignees, "assignees", nil, "Optional: assignees (GitHub logins) to assign to created issues (repeat flag or use comma-separated values)") CreateIssuesFromSarifCmd.Flags().BoolP("help", "h", false, "Show help for create-issues-from-sarif command.") } @@ -498,8 +504,10 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl }, Action: "createIssue", }, - Title: newTitles[idx], - Body: newBodies[idx], + Title: newTitles[idx], + Body: newBodies[idx], + Labels: opts.Labels, + Assignees: opts.Assignees, } err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { diff --git a/pkg/shared/ivcs.go b/pkg/shared/ivcs.go index 67a25d38..f8c15cb1 100644 --- a/pkg/shared/ivcs.go +++ b/pkg/shared/ivcs.go @@ -106,6 +106,12 @@ type VCSIssueCreationRequest struct { VCSRequestBase Title string `json:"title"` Body string `json:"body"` + // Logins for Users to assign to this issue. + // github supports multiple assignees + Assignees []string `json:"assignees,omitempty"` + // Labels is an optional list of label names to attach to the created issue. + // Not all VCS providers support labels; providers that don't will ignore this field. + Labels []string `json:"labels,omitempty"` } // VCSIssueUpdateRequest represents a request to update an existing issue. diff --git a/plugins/github/github.go b/plugins/github/github.go index 169bafb8..ce8bb16b 100644 --- a/plugins/github/github.go +++ b/plugins/github/github.go @@ -492,6 +492,16 @@ func (g *VCSGithub) CreateIssue(args shared.VCSIssueCreationRequest) (int, error Body: github.String(args.Body), } + // If labels are provided, attach them to the issue request + if len(args.Labels) > 0 { + issue.Labels = &args.Labels + } + + // If assignees are provided, attach them to the issue request + if len(args.Assignees) > 0 { + issue.Assignees = &args.Assignees + } + ctx := context.Background() createdIssue, _, err := client.Issues.Create(ctx, args.RepoParam.Namespace, args.RepoParam.Repository, issue) if err != nil { @@ -530,9 +540,6 @@ func (g *VCSGithub) ListIssues(args shared.VCSListIssuesRequest) ([]shared.Issue if err != nil { return nil, fmt.Errorf("failed to list issues: %w", err) } - if len(issues) > 0 { - g.logger.Debug("first issue", "number", issues[0].GetNumber(), "title", issues[0].GetTitle(), "body", issues[0].GetBody()) - } all = append(all, issues...) if resp == nil || resp.NextPage == 0 { break From f8a7a61be823810774569d9e064ed60df0facd3b Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Wed, 17 Sep 2025 09:06:37 +0200 Subject: [PATCH 13/52] feat: add reference information to github issue --- .../create-issues-from-sarif.go | 93 ++++++++++++++++++- 1 file changed, 88 insertions(+), 5 deletions(-) diff --git a/cmd/create-issues-from-sarif/create-issues-from-sarif.go b/cmd/create-issues-from-sarif/create-issues-from-sarif.go index 9d4e810e..597e2983 100644 --- a/cmd/create-issues-from-sarif/create-issues-from-sarif.go +++ b/cmd/create-issues-from-sarif/create-issues-from-sarif.go @@ -3,6 +3,7 @@ package createissuesfromsarif import ( "crypto/sha256" "fmt" + "regexp" "os" "path/filepath" "strconv" @@ -380,6 +381,22 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl newTitles := make([]string, 0) for _, run := range report.Runs { + + // Build a map of rules keyed by rule ID for quick lookups + rulesByID := map[string]*sarif.ReportingDescriptor{} + if run.Tool.Driver != nil { + for _, r := range run.Tool.Driver.Rules { + if r == nil { + continue + } + id := strings.TrimSpace(r.ID) + if id == "" { + continue + } + rulesByID[id] = r + } + } + for _, res := range run.Results { level, _ := res.Properties["Level"].(string) if strings.ToLower(level) != "error" { @@ -397,10 +414,10 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl } line, endLine := extractRegionFromResult(res) - desc := getStringProp(res.Properties, "Description") - if desc == "" && res.Message.Text != nil { - desc = *res.Message.Text - } + // desc := getStringProp(res.Properties, "Description") + // if desc == "" && res.Message.Text != nil { + // desc = *res.Message.Text + // } snippetHash := computeSnippetHash(fileURI, line, endLine, options.SourceFolder) scannerName := getScannerName(run) @@ -422,7 +439,73 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl if link := buildGitHubPermalink(options, fileURI, line, endLine); link != "" { body += fmt.Sprintf("Permalink: %s\n", link) } - body += "\n" + desc + "\n\n" + scanioManagedAnnotation + // Append rule help markdown if available + if r, ok := rulesByID[ruleID]; ok && r != nil && r.Help != nil && r.Help.Markdown != nil { + if hm := strings.TrimSpace(*r.Help.Markdown); hm != "" { + // Remove specific promotional footer from help markdown if present + hm = strings.ReplaceAll(hm, "#### 💎 Enable cross-file analysis and Pro rules for free at sg.run/pro\n\n", "") + if cleaned := strings.TrimSpace(hm); cleaned != "" { + body += "\n\n" + cleaned + } + } + } + + // Append security identifier tags (CWE, OWASP) with links if available in rule properties + if r, ok := rulesByID[ruleID]; ok && r != nil && r.Properties != nil { + var tags []string + if v, ok := r.Properties["tags"]; ok && v != nil { + switch tv := v.(type) { + case []string: + tags = tv + case []interface{}: + for _, it := range tv { + if s, ok := it.(string); ok { + tags = append(tags, s) + } + } + } + } + + if len(tags) > 0 { + cweRe := regexp.MustCompile(`^CWE-(\d+)\b`) + owaspRe := regexp.MustCompile(`^OWASP[- ]?A(\d{2}):(\d{4})\s*-\s*(.+)$`) + var lines []string + for _, tag := range tags { + t := strings.TrimSpace(tag) + if t == "" { + continue + } + if m := cweRe.FindStringSubmatch(t); len(m) == 2 { + num := m[1] + url := fmt.Sprintf("https://cwe.mitre.org/data/definitions/%s.html", num) + lines = append(lines, fmt.Sprintf("- [%s](%s)", t, url)) + continue + } + if m := owaspRe.FindStringSubmatch(t); len(m) == 4 { + rank := m[1] + year := m[2] + title := m[3] + slug := strings.ReplaceAll(strings.TrimSpace(title), " ", "_") + // Remove characters that are not letters, numbers, underscore, or hyphen + clean := make([]rune, 0, len(slug)) + for _, r := range slug { + if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' || r == '-' { + clean = append(clean, r) + } + } + slug = string(clean) + url := fmt.Sprintf("https://owasp.org/Top10/A%s_%s-%s/", rank, year, slug) + lines = append(lines, fmt.Sprintf("- [%s](%s)", t, url)) + continue + } + } + if len(lines) > 0 { + body += "\n" + strings.Join(lines, "\n") + } + } + } + + body += "\n\n" + scanioManagedAnnotation newIssues = append(newIssues, issuecorrelation.IssueMetadata{ IssueID: "", From 3a309cc02a9b34cbba13ef5449c26b829aa80dbb Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Thu, 18 Sep 2025 20:25:02 +0200 Subject: [PATCH 14/52] feat: create an issue comment on issue closure - create a vcs interface to create an issue comment - github plugin implements create an issue comment method - gitlab and bitbucket implement stubs - create-issues-from-sarif command writes a comment to an issue before closure when a vulnerability is resolved --- .../create-issues-from-sarif.go | 26 +++++++++++++++++ pkg/shared/ivcs.go | 28 +++++++++++++++++++ plugins/bitbucket/bitbucket.go | 6 ++++ plugins/github/github.go | 27 ++++++++++++++++++ plugins/gitlab/gitlab.go | 6 ++++ 5 files changed, 93 insertions(+) diff --git a/cmd/create-issues-from-sarif/create-issues-from-sarif.go b/cmd/create-issues-from-sarif/create-issues-from-sarif.go index 597e2983..b9d7358c 100644 --- a/cmd/create-issues-from-sarif/create-issues-from-sarif.go +++ b/cmd/create-issues-from-sarif/create-issues-from-sarif.go @@ -617,6 +617,32 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl // skip if we can't parse number continue } + // Leave a comment before closing the issue to explain why it is being closed + commentReq := shared.VCSCreateIssueCommentRequest{ + VCSRequestBase: shared.VCSRequestBase{ + RepoParam: shared.RepositoryParams{ + Namespace: options.Namespace, + Repository: options.Repository, + }, + Action: "createIssueComment", + }, + Number: num, + Body: "Recent scan didn't see the issue; closing this as resolved.", + } + + err = shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { + vcs, ok := raw.(shared.VCS) + if !ok { + return fmt.Errorf("invalid VCS plugin type") + } + _, err := vcs.CreateIssueComment(commentReq) + return err + }) + if err != nil { + lg.Error("failed to add comment before closing issue", "error", err, "number", num) + // continue to attempt closing even if commenting failed + } + upd := shared.VCSIssueUpdateRequest{ VCSRequestBase: shared.VCSRequestBase{ RepoParam: shared.RepositoryParams{ diff --git a/pkg/shared/ivcs.go b/pkg/shared/ivcs.go index f8c15cb1..a413680f 100644 --- a/pkg/shared/ivcs.go +++ b/pkg/shared/ivcs.go @@ -123,6 +123,13 @@ type VCSIssueUpdateRequest struct { State string `json:"state"` // optional: "open" or "closed" } +// VCSCreateIssueCommentRequest represents a request to create a comment on an issue. +type VCSCreateIssueCommentRequest struct { + VCSRequestBase + Number int `json:"number"` + Body string `json:"body"` +} + // VCSAddCommentToPRRequest represents a request to add a comment to a PR. type VCSAddCommentToPRRequest struct { VCSRequestBase @@ -176,6 +183,7 @@ type VCS interface { CreateIssue(req VCSIssueCreationRequest) (int, error) ListIssues(req VCSListIssuesRequest) ([]IssueParams, error) UpdateIssue(req VCSIssueUpdateRequest) (bool, error) + CreateIssueComment(req VCSCreateIssueCommentRequest) (bool, error) } // VCSRPCClient implements the VCS interface for RPC clients. @@ -283,6 +291,16 @@ func (c *VCSRPCClient) UpdateIssue(req VCSIssueUpdateRequest) (bool, error) { return resp, nil } +// CreateIssueComment calls the CreateIssueComment method on the RPC client. +func (c *VCSRPCClient) CreateIssueComment(req VCSCreateIssueCommentRequest) (bool, error) { + var resp bool + err := c.client.Call("Plugin.CreateIssueComment", req, &resp) + if err != nil { + return false, fmt.Errorf("RPC client CreateIssueComment call failed: %w", err) + } + return resp, nil +} + // VCSRPCServer wraps a VCS implementation to provide an RPC server. type VCSRPCServer struct { Impl VCS @@ -388,6 +406,16 @@ func (s *VCSRPCServer) UpdateIssue(args VCSIssueUpdateRequest, resp *bool) error return nil } +// CreateIssueComment calls the CreateIssueComment method on the VCS implementation. +func (s *VCSRPCServer) CreateIssueComment(args VCSCreateIssueCommentRequest, resp *bool) error { + var err error + *resp, err = s.Impl.CreateIssueComment(args) + if err != nil { + return fmt.Errorf("VCS CreateIssueComment failed: %w", err) + } + return nil +} + // VCSPlugin is the implementation of the plugin.Plugin interface for VCS. type VCSPlugin struct { Impl VCS diff --git a/plugins/bitbucket/bitbucket.go b/plugins/bitbucket/bitbucket.go index 7283b36c..9b42daa2 100644 --- a/plugins/bitbucket/bitbucket.go +++ b/plugins/bitbucket/bitbucket.go @@ -265,6 +265,12 @@ func (g *VCSBitbucket) UpdateIssue(args shared.VCSIssueUpdateRequest) (bool, err return false, fmt.Errorf("UpdateIssue not implemented for Bitbucket") } +// CreateIssueComment is not implemented for Bitbucket yet. Added to satisfy the VCS interface. +func (g *VCSBitbucket) CreateIssueComment(args shared.VCSCreateIssueCommentRequest) (bool, error) { + g.logger.Error("CreateIssueComment not implemented for Bitbucket", "repo", fmt.Sprintf("%s/%s", args.RepoParam.Namespace, args.RepoParam.Repository), "number", args.Number) + return false, fmt.Errorf("CreateIssueComment not implemented for Bitbucket") +} + // fetchPR handles fetching pull request changes. func (g *VCSBitbucket) fetchPR(args *shared.VCSFetchRequest) (string, error) { g.logger.Info("handling PR changes fetching") diff --git a/plugins/github/github.go b/plugins/github/github.go index ce8bb16b..85abc0b6 100644 --- a/plugins/github/github.go +++ b/plugins/github/github.go @@ -34,6 +34,33 @@ type VCSGithub struct { globalConfig *config.Config } +// CreateIssueComment creates a new comment on an existing GitHub issue. +func (g *VCSGithub) CreateIssueComment(args shared.VCSCreateIssueCommentRequest) (bool, error) { + // Basic validation + if strings.TrimSpace(args.RepoParam.Namespace) == "" || strings.TrimSpace(args.RepoParam.Repository) == "" { + return false, fmt.Errorf("namespace and repository are required") + } + if args.Number <= 0 { + return false, fmt.Errorf("valid issue number is required") + } + if strings.TrimSpace(args.Body) == "" { + return false, fmt.Errorf("comment body is required") + } + + client, err := g.initializeGithubClient() + if err != nil { + return false, fmt.Errorf("failed to initialize GitHub client: %w", err) + } + + comment := &github.IssueComment{Body: github.String(args.Body)} + _, _, err = client.Issues.CreateComment(context.Background(), args.RepoParam.Namespace, args.RepoParam.Repository, args.Number, comment) + if err != nil { + return false, fmt.Errorf("failed to create issue comment: %w", err) + } + + return true, nil +} + // UpdateIssue updates an existing GitHub issue's title and/or body. func (g *VCSGithub) UpdateIssue(args shared.VCSIssueUpdateRequest) (bool, error) { // Basic validation diff --git a/plugins/gitlab/gitlab.go b/plugins/gitlab/gitlab.go index de9e67d2..2258d1e9 100644 --- a/plugins/gitlab/gitlab.go +++ b/plugins/gitlab/gitlab.go @@ -417,6 +417,12 @@ func (g *VCSGitlab) UpdateIssue(args shared.VCSIssueUpdateRequest) (bool, error) return false, fmt.Errorf("UpdateIssue not implemented for GitLab") } +// CreateIssueComment is not implemented for GitLab yet. Added to satisfy the VCS interface. +func (g *VCSGitlab) CreateIssueComment(args shared.VCSCreateIssueCommentRequest) (bool, error) { + g.logger.Error("CreateIssueComment not implemented for GitLab", "repo", fmt.Sprintf("%s/%s", args.RepoParam.Namespace, args.RepoParam.Repository), "number", args.Number) + return false, fmt.Errorf("CreateIssueComment not implemented for GitLab") +} + // buildCommentWithAttachments constructs the full comment text with file attachments. func (g *VCSGitlab) buildCommentWithAttachments(client *gitlab.Client, projectID int, comment string, filePaths []string) (string, error) { var attachmentsText strings.Builder From 2aa16d78ff98281d03778a6eb3b6abd7fc2429a5 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Mon, 22 Sep 2025 21:25:18 +0200 Subject: [PATCH 15/52] feat: update issue header --- .../create-issues-from-sarif.go | 111 ++++++++++++++++-- 1 file changed, 102 insertions(+), 9 deletions(-) diff --git a/cmd/create-issues-from-sarif/create-issues-from-sarif.go b/cmd/create-issues-from-sarif/create-issues-from-sarif.go index b9d7358c..d2d5088d 100644 --- a/cmd/create-issues-from-sarif/create-issues-from-sarif.go +++ b/cmd/create-issues-from-sarif/create-issues-from-sarif.go @@ -3,12 +3,15 @@ package createissuesfromsarif import ( "crypto/sha256" "fmt" - "regexp" "os" "path/filepath" + "regexp" "strconv" "strings" + "golang.org/x/text/cases" + "golang.org/x/text/language" + "github.com/spf13/cobra" hclog "github.com/hashicorp/go-hclog" @@ -278,8 +281,63 @@ type OpenIssueEntry struct { // Returns an OpenIssueReport with zero values when fields are missing. func parseIssueBody(body string) OpenIssueReport { rep := OpenIssueReport{} + // Prefer new-style rule ID header first; fallback to legacy "Rule:" line if absent. + if rid := extractRuleIDFromBody(body); rid != "" { + rep.RuleID = rid + } for _, line := range strings.Split(body, "\n") { line = strings.TrimSpace(line) + // Support new blockquote compact metadata lines + // "> **Severity**: Error, **Scanner**: Semgrep OSS" + // "> **File**: app.py, **Lines**: 11-29" + if strings.HasPrefix(line, "> ") { + // Remove "> " prefix for easier parsing + l := strings.TrimSpace(strings.TrimPrefix(line, "> ")) + // Normalize bold markers to plain keys + l = strings.ReplaceAll(l, "**", "") + // Split into comma-separated parts first + parts := strings.Split(l, ",") + for _, p := range parts { + seg := strings.TrimSpace(p) + if strings.HasPrefix(seg, "Severity:") { + rep.Severity = strings.TrimSpace(strings.TrimPrefix(seg, "Severity:")) + continue + } + if strings.HasPrefix(seg, "Scanner:") { + rep.Scanner = strings.TrimSpace(strings.TrimPrefix(seg, "Scanner:")) + continue + } + if strings.HasPrefix(seg, "File:") { + // If File appears on the first line with comma, capture + v := strings.TrimSpace(strings.TrimPrefix(seg, "File:")) + if v != "" { + rep.FilePath = v + } + continue + } + if strings.HasPrefix(seg, "Lines:") { + v := strings.TrimSpace(strings.TrimPrefix(seg, "Lines:")) + if strings.Contains(v, "-") { + lr := strings.SplitN(v, "-", 2) + if len(lr) == 2 { + if s, err := strconv.Atoi(strings.TrimSpace(lr[0])); err == nil { + rep.StartLine = s + } + if e, err := strconv.Atoi(strings.TrimSpace(lr[1])); err == nil { + rep.EndLine = e + } + } + } else { + if n, err := strconv.Atoi(v); err == nil { + rep.StartLine = n + rep.EndLine = n + } + } + continue + } + } + continue + } if strings.HasPrefix(line, "Severity:") { rep.Severity = strings.TrimSpace(strings.TrimPrefix(line, "Severity:")) continue @@ -289,7 +347,10 @@ func parseIssueBody(body string) OpenIssueReport { continue } if strings.HasPrefix(line, "Rule:") { - rep.RuleID = strings.TrimSpace(strings.TrimPrefix(line, "Rule:")) + // Legacy fallback only if not already populated by new header format + if rep.RuleID == "" { + rep.RuleID = strings.TrimSpace(strings.TrimPrefix(line, "Rule:")) + } continue } if strings.HasPrefix(line, "File:") { @@ -329,6 +390,24 @@ func parseIssueBody(body string) OpenIssueReport { return rep } +// extractRuleIDFromBody attempts to parse a rule ID from the new body format header line: +// "## " where can be any single or combined emoji/symbol token. +// Returns empty string if not found. +func extractRuleIDFromBody(body string) string { + // Compile regex once per call; trivial cost compared to network IO. If needed, lift to package scope. + re := regexp.MustCompile(`^##\s+[^\w\s]+\s+(.+)$`) + for _, line := range strings.Split(body, "\n") { + l := strings.TrimSpace(line) + if !strings.HasPrefix(l, "##") { + continue + } + if m := re.FindStringSubmatch(l); len(m) == 2 { + return strings.TrimSpace(m[1]) + } + } + return "" +} + // listOpenIssues calls the VCS plugin to list open issues for the configured repo // and parses their bodies into OpenIssueReport structures. func listOpenIssues(options RunOptions) (map[int]OpenIssueEntry, error) { @@ -424,15 +503,29 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl // build body and title with scanner name label titleText := buildIssueTitle(scannerName, ruleID, fileURI, line, endLine) - lineInfo := fmt.Sprintf("Line: %d", line) - if endLine > line { - lineInfo = fmt.Sprintf("Lines: %d-%d", line, endLine) - } - body := fmt.Sprintf("Severity: %s\nRule: %s\nFile: %s\n%s\n", strings.ToUpper(level), ruleID, fileURI, lineInfo) - if scannerName != "" { - body += fmt.Sprintf("Scanner: %s\n", scannerName) + // New body header and compact metadata blockquote + header := "" + if strings.TrimSpace(ruleID) != "" { + header = fmt.Sprintf("## 🐞 %s\n\n", ruleID) + } + sev := cases.Title(language.Und).String(strings.ToLower(level)) + scannerDisp := scannerName + if scannerDisp == "" { + scannerDisp = "SARIF" + } + fileDisp := fileURI + linesDisp := fmt.Sprintf("%d", line) + if endLine > line { + linesDisp = fmt.Sprintf("%d-%d", line, endLine) } + meta := fmt.Sprintf( + "> **Severity**: %s, **Scanner**: %s\n> **File**: %s, **Lines**: %s\n", + sev, scannerDisp, fileDisp, linesDisp, + ) + // Only use the new header and blockquote metadata + body := header + meta + "\n" + // Do not append legacy Scanner line; scanner is already present in blockquote if snippetHash != "" { body += fmt.Sprintf("Snippet SHA256: %s\n", snippetHash) } From fc6487e83db3717e88a1c76057d57e80f1b34d57 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Wed, 24 Sep 2025 21:20:19 +0200 Subject: [PATCH 16/52] misc: better markdown property parsing --- .../create-issues-from-sarif.go | 104 +++++++++++++++--- 1 file changed, 86 insertions(+), 18 deletions(-) diff --git a/cmd/create-issues-from-sarif/create-issues-from-sarif.go b/cmd/create-issues-from-sarif/create-issues-from-sarif.go index d2d5088d..bc5e72aa 100644 --- a/cmd/create-issues-from-sarif/create-issues-from-sarif.go +++ b/cmd/create-issues-from-sarif/create-issues-from-sarif.go @@ -28,7 +28,10 @@ import ( // scanioManagedAnnotation is appended to issue bodies created by this command // and is required for correlation/auto-closure to consider an issue // managed by automation. -const scanioManagedAnnotation = "> [!NOTE]\n> This issue was created and will be managed by scanio automation. Don't change body manually for proper processing, unless you know what you do" +const ( + scanioManagedAnnotation = "> [!NOTE]\n> This issue was created and will be managed by scanio automation. Don't change body manually for proper processing, unless you know what you do" + semgrepPromoFooter = "#### 💎 Enable cross-file analysis and Pro rules for free at sg.run/pro\n\n" +) // RunOptions holds flags for the create-issues-from-sarif command. type RunOptions struct { @@ -212,6 +215,46 @@ func computeSnippetHash(fileURI string, line, endLine int, sourceFolder string) return fmt.Sprintf("%x", sum[:]) } +// parseRuleHelpMarkdown removes promotional content from help markdown and splits +// it into the descriptive details and a list of reference bullet points. +func parseRuleHelpMarkdown(markdown string) (string, []string) { + cleaned := strings.ReplaceAll(markdown, semgrepPromoFooter, "") + cleaned = strings.TrimSpace(cleaned) + if cleaned == "" { + return "", nil + } + + lines := strings.Split(cleaned, "\n") + referencesStart := -1 + for idx, raw := range lines { + if strings.TrimSpace(raw) == "References:" { + referencesStart = idx + break + } + } + + if referencesStart == -1 { + return cleaned, nil + } + + detail := strings.TrimSpace(strings.Join(lines[:referencesStart], "\n")) + var references []string + for _, raw := range lines[referencesStart+1:] { + trimmed := strings.TrimSpace(raw) + if trimmed == "" { + continue + } + // Normalise to Markdown bullet points regardless of the original marker. + trimmed = strings.TrimLeft(trimmed, "-* \t") + if trimmed == "" { + continue + } + references = append(references, "- "+trimmed) + } + + return detail, references +} + // getScannerName returns the tool/driver name for a SARIF run when available. func getScannerName(run *sarif.Run) string { if run == nil { @@ -265,6 +308,7 @@ type OpenIssueReport struct { StartLine int EndLine int Hash string + Permalink string Description string } @@ -335,6 +379,11 @@ func parseIssueBody(body string) OpenIssueReport { } continue } + // Support snippet hash in blockquoted metadata line at end of issue + if strings.HasPrefix(seg, "Snippet SHA256:") { + rep.Hash = strings.TrimSpace(strings.TrimPrefix(seg, "Snippet SHA256:")) + continue + } } continue } @@ -382,6 +431,15 @@ func parseIssueBody(body string) OpenIssueReport { rep.Hash = strings.TrimSpace(strings.TrimPrefix(line, "Snippet SHA256:")) continue } + if strings.HasPrefix(line, "Permalink:") { + rep.Permalink = strings.TrimSpace(strings.TrimPrefix(line, "Permalink:")) + continue + } + // Check if line is a URL (for new format without "Permalink:" prefix) + if strings.HasPrefix(line, "https://github.com/") && strings.Contains(line, "/blob/") { + rep.Permalink = strings.TrimSpace(line) + continue + } // When we hit a non-metadata line and description is empty, assume rest is description if rep.Description == "" && line != "" { rep.Description = line @@ -525,24 +583,26 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl ) // Only use the new header and blockquote metadata body := header + meta + "\n" - // Do not append legacy Scanner line; scanner is already present in blockquote - if snippetHash != "" { - body += fmt.Sprintf("Snippet SHA256: %s\n", snippetHash) - } - if link := buildGitHubPermalink(options, fileURI, line, endLine); link != "" { - body += fmt.Sprintf("Permalink: %s\n", link) - } + var references []string + // Append rule help markdown if available if r, ok := rulesByID[ruleID]; ok && r != nil && r.Help != nil && r.Help.Markdown != nil { if hm := strings.TrimSpace(*r.Help.Markdown); hm != "" { - // Remove specific promotional footer from help markdown if present - hm = strings.ReplaceAll(hm, "#### 💎 Enable cross-file analysis and Pro rules for free at sg.run/pro\n\n", "") - if cleaned := strings.TrimSpace(hm); cleaned != "" { - body += "\n\n" + cleaned + detail, helpRefs := parseRuleHelpMarkdown(hm) + if detail != "" { + body += "\n\n" + detail + } + if len(helpRefs) > 0 { + references = append(references, helpRefs...) } } } + // Append permalink if available + if link := buildGitHubPermalink(options, fileURI, line, endLine); link != "" { + body += fmt.Sprintf("\n%s\n", link) + } + // Append security identifier tags (CWE, OWASP) with links if available in rule properties if r, ok := rulesByID[ruleID]; ok && r != nil && r.Properties != nil { var tags []string @@ -562,7 +622,7 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl if len(tags) > 0 { cweRe := regexp.MustCompile(`^CWE-(\d+)\b`) owaspRe := regexp.MustCompile(`^OWASP[- ]?A(\d{2}):(\d{4})\s*-\s*(.+)$`) - var lines []string + var tagRefs []string for _, tag := range tags { t := strings.TrimSpace(tag) if t == "" { @@ -571,7 +631,7 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl if m := cweRe.FindStringSubmatch(t); len(m) == 2 { num := m[1] url := fmt.Sprintf("https://cwe.mitre.org/data/definitions/%s.html", num) - lines = append(lines, fmt.Sprintf("- [%s](%s)", t, url)) + tagRefs = append(tagRefs, fmt.Sprintf("- [%s](%s)", t, url)) continue } if m := owaspRe.FindStringSubmatch(t); len(m) == 4 { @@ -588,17 +648,25 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl } slug = string(clean) url := fmt.Sprintf("https://owasp.org/Top10/A%s_%s-%s/", rank, year, slug) - lines = append(lines, fmt.Sprintf("- [%s](%s)", t, url)) + tagRefs = append(tagRefs, fmt.Sprintf("- [%s](%s)", t, url)) continue } } - if len(lines) > 0 { - body += "\n" + strings.Join(lines, "\n") + if len(tagRefs) > 0 { + references = append(references, tagRefs...) } } } - body += "\n\n" + scanioManagedAnnotation + if len(references) > 0 { + body += "\n\nReferences:\n" + strings.Join(references, "\n") + } + + // Add a second snippet hash right before the scanio-managed note, as a blockquote + if snippetHash != "" { + body += fmt.Sprintf("\n\n> **Snippet SHA256**: %s\n", snippetHash) + } + body += "\n" + scanioManagedAnnotation newIssues = append(newIssues, issuecorrelation.IssueMetadata{ IssueID: "", From b0b0f083f1d2443535b444dc6787ffc453e35ff8 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Wed, 24 Sep 2025 21:27:33 +0200 Subject: [PATCH 17/52] feat: write display severity instead of raw sarif level value --- .../create-issues-from-sarif.go | 38 ++++++++++++++++--- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/cmd/create-issues-from-sarif/create-issues-from-sarif.go b/cmd/create-issues-from-sarif/create-issues-from-sarif.go index bc5e72aa..c179f55e 100644 --- a/cmd/create-issues-from-sarif/create-issues-from-sarif.go +++ b/cmd/create-issues-from-sarif/create-issues-from-sarif.go @@ -152,6 +152,26 @@ func validate(o *RunOptions) error { return nil } +// displaySeverity normalizes SARIF severity levels to more descriptive labels. +func displaySeverity(level string) string { + normalized := strings.ToLower(strings.TrimSpace(level)) + switch normalized { + case "error": + return "High" + case "warning": + return "Medium" + case "note": + return "Low" + case "none": + return "Info" + default: + if normalized == "" { + return "" + } + return cases.Title(language.Und).String(normalized) + } +} + // helper to fetch a string property safely func getStringProp(m map[string]interface{}, key string) string { if m == nil { @@ -166,14 +186,20 @@ func getStringProp(m map[string]interface{}, key string) string { } // buildIssueTitle creates a concise issue title using scanner name (fallback to SARIF), -// ruleID and location info. It formats as "[][] at :" -// or with a range when endLine > line. -func buildIssueTitle(scannerName, ruleID, fileURI string, line, endLine int) string { +// severity, ruleID and location info. It formats as "[][][] at" +// and includes a range when endLine > line. +func buildIssueTitle(scannerName, severity, ruleID, fileURI string, line, endLine int) string { label := strings.TrimSpace(scannerName) if label == "" { label = "SARIF" } - title := fmt.Sprintf("[%s][%s]", label, ruleID) + sev := strings.TrimSpace(severity) + parts := []string{label} + if sev != "" { + parts = append(parts, sev) + } + parts = append(parts, ruleID) + title := fmt.Sprintf("[%s]", strings.Join(parts, "][")) if line > 0 { if endLine > line { return fmt.Sprintf("%s at %s:%d-%d", title, fileURI, line, endLine) @@ -558,16 +584,16 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl snippetHash := computeSnippetHash(fileURI, line, endLine, options.SourceFolder) scannerName := getScannerName(run) + sev := displaySeverity(level) // build body and title with scanner name label - titleText := buildIssueTitle(scannerName, ruleID, fileURI, line, endLine) + titleText := buildIssueTitle(scannerName, sev, ruleID, fileURI, line, endLine) // New body header and compact metadata blockquote header := "" if strings.TrimSpace(ruleID) != "" { header = fmt.Sprintf("## 🐞 %s\n\n", ruleID) } - sev := cases.Title(language.Und).String(strings.ToLower(level)) scannerDisp := scannerName if scannerDisp == "" { scannerDisp = "SARIF" From 6e56599c552964b4875572aa72bf4c28c342fd1f Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Thu, 25 Sep 2025 21:30:37 +0200 Subject: [PATCH 18/52] chore: rename cmd --- cmd/root.go | 6 ++-- .../sarif-issues.go} | 30 +++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) rename cmd/{create-issues-from-sarif/create-issues-from-sarif.go => sarif-issues/sarif-issues.go} (93%) diff --git a/cmd/root.go b/cmd/root.go index 4f73458d..432fe998 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -9,11 +9,11 @@ import ( "github.com/scan-io-git/scan-io/cmd/analyse" createissue "github.com/scan-io-git/scan-io/cmd/create-issue" - createissuesfromsarif "github.com/scan-io-git/scan-io/cmd/create-issues-from-sarif" "github.com/scan-io-git/scan-io/cmd/fetch" integrationvcs "github.com/scan-io-git/scan-io/cmd/integration-vcs" "github.com/scan-io-git/scan-io/cmd/list" listissues "github.com/scan-io-git/scan-io/cmd/list-issues" + sarifissues "github.com/scan-io-git/scan-io/cmd/sarif-issues" updateissue "github.com/scan-io-git/scan-io/cmd/update-issue" "github.com/scan-io-git/scan-io/cmd/version" "github.com/scan-io-git/scan-io/pkg/shared" @@ -77,7 +77,7 @@ func initConfig() { createissue.Init(AppConfig) listissues.Init(AppConfig) updateissue.Init(AppConfig) - createissuesfromsarif.Init(AppConfig) + sarifissues.Init(AppConfig) version.Init(AppConfig) } @@ -92,7 +92,7 @@ func init() { rootCmd.AddCommand(createissue.CreateIssueCmd) rootCmd.AddCommand(listissues.ListIssuesCmd) rootCmd.AddCommand(updateissue.UpdateIssueCmd) - rootCmd.AddCommand(createissuesfromsarif.CreateIssuesFromSarifCmd) + rootCmd.AddCommand(sarifissues.SarifIssuesCmd) rootCmd.AddCommand(version.NewVersionCmd()) // rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file") } diff --git a/cmd/create-issues-from-sarif/create-issues-from-sarif.go b/cmd/sarif-issues/sarif-issues.go similarity index 93% rename from cmd/create-issues-from-sarif/create-issues-from-sarif.go rename to cmd/sarif-issues/sarif-issues.go index c179f55e..088ee94c 100644 --- a/cmd/create-issues-from-sarif/create-issues-from-sarif.go +++ b/cmd/sarif-issues/sarif-issues.go @@ -1,4 +1,4 @@ -package createissuesfromsarif +package sarifissues import ( "crypto/sha256" @@ -48,13 +48,13 @@ var ( AppConfig *config.Config opts RunOptions - // CreateIssuesFromSarifCmd represents the command to create GitHub issues from a SARIF file. - CreateIssuesFromSarifCmd = &cobra.Command{ - Use: "create-issues-from-sarif --sarif PATH [--namespace NAMESPACE] [--repository REPO] [--source-folder PATH] [--ref REF] [--labels label[,label...]] [--assignees user[,user...]]", + // SarifIssuesCmd represents the command to create GitHub issues from a SARIF file. + SarifIssuesCmd = &cobra.Command{ + Use: "sarif-issues --sarif PATH [--namespace NAMESPACE] [--repository REPO] [--source-folder PATH] [--ref REF] [--labels label[,label...]] [--assignees user[,user...]]", Short: "Create GitHub issues for high severity SARIF findings", - Example: "scanio create-issues-from-sarif --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --labels bug,security --assignees alice,bob", - SilenceUsage: true, - Hidden: true, + Example: "scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --labels bug,security --assignees alice,bob", + SilenceUsage: false, + Hidden: false, DisableFlagsInUseLine: true, RunE: func(cmd *cobra.Command, args []string) error { if len(args) == 0 && !shared.HasFlags(cmd.Flags()) { @@ -127,16 +127,16 @@ var ( func Init(cfg *config.Config) { AppConfig = cfg } func init() { - CreateIssuesFromSarifCmd.Flags().StringVar(&opts.Namespace, "namespace", "", "GitHub org/user (defaults to $GITHUB_REPOSITORY_OWNER when unset)") - CreateIssuesFromSarifCmd.Flags().StringVar(&opts.Repository, "repository", "", "Repository name (defaults to ${GITHUB_REPOSITORY#*/} when unset)") - CreateIssuesFromSarifCmd.Flags().StringVar(&opts.SarifPath, "sarif", "", "Path to SARIF file") - CreateIssuesFromSarifCmd.Flags().StringVar(&opts.SourceFolder, "source-folder", "", "Optional: source folder to improve file path resolution in SARIF (used for absolute paths)") - CreateIssuesFromSarifCmd.Flags().StringVar(&opts.Ref, "ref", "", "Git ref (branch or commit SHA) to build a permalink to the vulnerable code (defaults to $GITHUB_SHA when unset)") + SarifIssuesCmd.Flags().StringVar(&opts.Namespace, "namespace", "", "GitHub org/user (defaults to $GITHUB_REPOSITORY_OWNER when unset)") + SarifIssuesCmd.Flags().StringVar(&opts.Repository, "repository", "", "Repository name (defaults to ${GITHUB_REPOSITORY#*/} when unset)") + SarifIssuesCmd.Flags().StringVar(&opts.SarifPath, "sarif", "", "Path to SARIF file") + SarifIssuesCmd.Flags().StringVar(&opts.SourceFolder, "source-folder", "", "Optional: source folder to improve file path resolution in SARIF (used for absolute paths)") + SarifIssuesCmd.Flags().StringVar(&opts.Ref, "ref", "", "Git ref (branch or commit SHA) to build a permalink to the vulnerable code (defaults to $GITHUB_SHA when unset)") // --labels supports multiple usages (e.g., --labels bug --labels security) or comma-separated values - CreateIssuesFromSarifCmd.Flags().StringSliceVar(&opts.Labels, "labels", nil, "Optional: labels to assign to created GitHub issues (repeat flag or use comma-separated values)") + SarifIssuesCmd.Flags().StringSliceVar(&opts.Labels, "labels", nil, "Optional: labels to assign to created GitHub issues (repeat flag or use comma-separated values)") // --assignees supports multiple usages or comma-separated values - CreateIssuesFromSarifCmd.Flags().StringSliceVar(&opts.Assignees, "assignees", nil, "Optional: assignees (GitHub logins) to assign to created issues (repeat flag or use comma-separated values)") - CreateIssuesFromSarifCmd.Flags().BoolP("help", "h", false, "Show help for create-issues-from-sarif command.") + SarifIssuesCmd.Flags().StringSliceVar(&opts.Assignees, "assignees", nil, "Optional: assignees (GitHub logins) to assign to created issues (repeat flag or use comma-separated values)") + SarifIssuesCmd.Flags().BoolP("help", "h", false, "Show help for create-issues-from-sarif command.") } func validate(o *RunOptions) error { From 1467fce58c45937b2dd503516b9f3854653b0bd2 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Thu, 25 Sep 2025 22:09:15 +0200 Subject: [PATCH 19/52] docs: add sarif-issues command reference documentation --- docs/reference/README.md | 1 + docs/reference/cmd-sarif-issues.md | 233 +++++++++++++++++++++++++++++ 2 files changed, 234 insertions(+) create mode 100644 docs/reference/cmd-sarif-issues.md diff --git a/docs/reference/README.md b/docs/reference/README.md index 52dc68d0..95bb9a19 100644 --- a/docs/reference/README.md +++ b/docs/reference/README.md @@ -11,6 +11,7 @@ This section provides detailed technical documentation for Scanio’s commands, - [List Command](cmd-list.md): Describes repository discovery functionality across supported VCS platforms, available filtering options, and command output structure. - [Fetch Command](cmd-fetch.md): Explains repository fetching logic, supported authentication types, URL formats, and command output structure. - [Analyse Command](cmd-analyse.md): Provides details on running security scanners, handling input data, configuring output formats, and command output structure. +- [SARIF Issues Command](cmd-sarif-issues.md): Explains how to create GitHub issues from high severity SARIF findings, with automated lifecycle management. - [To-HTML Command](cmd-to-html.md): Explains conversion of SARIF reports to human-friendly HTML format, code snippet inclusion, and template customization options. - [Report Patch Command](cmd-report-patch.md): Details how to make structured modifications to SARIF reports, including different filtering capabilities and actions. diff --git a/docs/reference/cmd-sarif-issues.md b/docs/reference/cmd-sarif-issues.md new file mode 100644 index 00000000..84798bc4 --- /dev/null +++ b/docs/reference/cmd-sarif-issues.md @@ -0,0 +1,233 @@ +# SARIF Issues Command +The `sarif-issues` command creates GitHub issues from high severity findings in SARIF reports. It implements intelligent issue correlation to avoid duplicates and automatically closes issues that are no longer present in recent scans. + +This command is designed and recommended for CI/CD integration and automated security issue management, enabling teams to track and manage security findings directly in their GitHub repositories. + +## Table of Contents + +- [Key Features](#key-features) +- [Syntax](#syntax) +- [Options](#options) +- [Core Validation](#core-validation) +- [GitHub Authentication Setup](#github-authentication-setup) +- [Usage Examples](#usage-examples) +- [Command Output Format](#command-output-format) +- [Issue Correlation Logic](#issue-correlation-logic) +- [Issue Format](#issue-format) + +## Key Features + +| Feature | Description | +|-------------------------------------------|----------------------------------------------------------| +| Create issues from high severity findings | Automatically creates GitHub issues for SARIF findings with "error" level | +| Correlate with existing issues | Matches new findings against open issues to prevent duplicates | +| Auto-close resolved issues | Closes open issues that are no longer present in current scan results | +| Add metadata and permalinks | Enriches issues with file links, severity, scanner info, and code snippets | + +## Syntax +```bash +scanio sarif-issues --sarif PATH [--namespace NAMESPACE] [--repository REPO] [--source-folder PATH] [--ref REF] [--labels label[,label...]] [--assignees user[,user...]] +``` + +## Options + +| Option | Type | Required | Default Value | Description | +|---------------------|----------|-------------|----------------------------------|-----------------------------------------------------------------------------| +| `--sarif` | string | Yes | `none` | Path to SARIF report file containing security findings. | +| `--namespace` | string | Conditional | `$GITHUB_REPOSITORY_OWNER` | GitHub organization or user name. Required if environment variable not set. | +| `--repository` | string | Conditional | `${GITHUB_REPOSITORY#*/}` | Repository name. Required if environment variable not set. | +| `--source-folder` | string | No | `none` | Path to source code folder for improved file path resolution and snippets. | +| `--ref` | string | No | `$GITHUB_SHA` | Git ref (branch or commit SHA) for building permalinks to vulnerable code. | +| `--labels` | strings | No | `none` | Labels to assign to created GitHub issues (comma-separated or repeat flag). | +| `--assignees` | strings | No | `none` | GitHub usernames to assign to created issues (comma-separated or repeat flag). | +| `--help`, `-h` | flag | No | `false` | Displays help for the `sarif-issues` command. | + +**Environment Variable Fallbacks**
+The command automatically uses GitHub Actions environment variables when flags are not provided: +- `GITHUB_REPOSITORY_OWNER` → `--namespace` +- `GITHUB_REPOSITORY` → `--repository` (extracts repo name after `/`) +- `GITHUB_SHA` → `--ref` + +This enables seamless integration with GitHub Actions workflows without explicit configuration. + +## Core Validation +The `sarif-issues` command includes several validation layers to ensure robust execution: +- **Required Parameters**: Validates that `--sarif`, `--namespace`, and `--repository` are provided either via flags or environment variables. +- **SARIF File Validation**: Ensures the SARIF file exists and can be parsed successfully. +- **GitHub Authentication**: Requires valid GitHub credentials configured through the GitHub plugin. +- **High Severity Filtering**: Only processes SARIF results with `Level: "error"` to focus on critical findings. + +## GitHub Authentication Setup + +The `sarif-issues` command requires GitHub authentication to create and manage issues. Configure authentication using one of the following methods: + +### Environment Variables (Recommended for CI/CD) +```bash +export SCANIO_GITHUB_TOKEN="your-github-token" +export SCANIO_GITHUB_USERNAME="your-github-username" # Optional for HTTP auth +``` + +### Configuration File +Add to your `config.yml`: +```yaml +github_plugin: + token: "your-github-token" + username: "your-github-username" # Optional for HTTP auth +``` + +### Required Token Permissions +The GitHub token must have the following scopes: +- **`repo`** - Required for creating, updating, and listing issues +- **`read:org`** - Required for organizational repositories (optional for personal repos) + +For detailed GitHub plugin configuration, refer to [GitHub Plugin Documentation](plugin-github.md#configuration-references). + +## Usage Examples + +### Basic Usage in GitHub Actions +Create issues from SARIF report using environment variables: +```bash +scanio sarif-issues --sarif results/semgrep.sarif +``` + +### Manual Usage with Explicit Parameters +Create issues with custom namespace and repository: +```bash +scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif +``` + +### Enhanced Issue Creation +Create issues with source code snippets, labels, and assignees: +```bash +scanio sarif-issues --sarif results/semgrep.sarif --source-folder . --labels bug,security --assignees alice,bob +``` + +### With Custom Git Reference +Create issues with specific commit reference for permalinks: +```bash +scanio sarif-issues --sarif results/semgrep.sarif --source-folder . --ref feature-branch +``` + +## Command Output Format + +### User Mode Output +``` +Created 3 issue(s) from SARIF high severity findings +``` + +### Logging Information +The command provides some logging information including: +- Number of open issues fetched from the repository +- Issue correlation results (matched/unmatched) +- Created and closed issue counts +- Error details for failed operations + +## Issue Correlation Logic + +The command implements intelligent issue correlation to manage the lifecycle of security findings: + +### New Issue Creation +- **High Severity Only**: Only creates issues for SARIF findings with `Level: "error"` +- **Duplicate Prevention**: Uses hierarchical correlation to match new findings against existing open issues +- **Unmatched Findings**: Creates GitHub issues only for findings that don't match existing open issues through any correlation stage + +### Automatic Issue Closure +- **Resolved Findings**: Automatically closes open issues that don't correlate with current scan results +- **Comment Before Closure**: Adds brief explanatory comment. +- **Managed Issues Only**: Only closes issues containing the scanio-managed annotation to avoid affecting manually created issues + +### Correlation Criteria +The correlation logic uses a **4-stage hierarchical matching system** that processes stages in order, with earlier stages being more precise. Once an issue is matched in any stage, it's excluded from subsequent stages. + +**Required for all stages**: Scanner name and Rule ID must match exactly. + +**Stage 1 (Most Precise)**: Scanner + RuleID + Filename + StartLine + EndLine + SnippetHash +- All fields must match exactly +- Used when both issues have snippet hashes available + +**Stage 2**: Scanner + RuleID + Filename + SnippetHash +- Matches based on code content fingerprint +- Used when snippet hashes are available but line numbers differ + +**Stage 3**: Scanner + RuleID + Filename + StartLine + EndLine +- Matches based on exact line range +- Used when snippet hashes are not available + +**Stage 4 (Least Precise)**: Scanner + RuleID + Filename + StartLine +- Matches based on file and starting line only +- Fallback when end line information is missing + +### Issue Filtering for Correlation +Only specific types of open issues are considered for correlation: +- **Well-structured issues**: Must have Scanner, RuleID, and FilePath metadata +- **Scanio-managed issues**: Must contain the scanio-managed annotation +- **Malformed issues are skipped**: Issues without proper metadata are ignored to prevent accidental closure of manually created issues + +## Issue Format + +### Issue Title Format +``` +[][][] at :[-] +``` +**Example**: `[Semgrep OSS][High][javascript.express.security.audit.express-check-csurf-middleware-usage.express-check-csurf-middleware-usage] at app.js:42-45` + +### Issue Body Structure + +**Header** +```markdown +## 🐞 +``` + +**Compact Metadata (Blockquote)** +```markdown +> **Severity**: High, **Scanner**: Semgrep OSS +> **File**: app.js, **Lines**: 42-45 +``` + +**Rule Description** +- Includes help text from SARIF rule definitions +- Parses and formats reference links + +**GitHub Permalink** +- Direct link to vulnerable code in repository +- Uses commit SHA for permanent links +- Includes line number anchors: `#L42-L45` + +**Security References** +- Automatically generates links for CWE identifiers: `[CWE-79](https://cwe.mitre.org/data/definitions/79.html)` +- Creates OWASP Top 10 links when applicable +- Extracts from SARIF rule tags and properties + +**Snippet Hash** +```markdown +> **Snippet SHA256**: abc123... +``` + +**Management Annotation** +```markdown +> [!NOTE] +> This issue was created and will be managed by scanio automation. Don't change body manually for proper processing, unless you know what you do +``` + +### Example Complete Issue Body +```markdown +## 🐞 javascript.express.security.audit.express-check-csurf-middleware-usage.express-check-csurf-middleware-usage + +> **Severity**: High, **Scanner**: Semgrep OSS +> **File**: app.js, **Lines**: 42-45 + +This Express.js application appears to be missing CSRF protection middleware. CSRF attacks can force authenticated users to perform unintended actions. + +https://github.com/scan-io-git/scan-io/blob/abc123def456/app.js#L42-L45 + +References: +- [CWE-352](https://cwe.mitre.org/data/definitions/352.html) +- [OWASP A01:2021 - Broken Access Control](https://owasp.org/Top10/A01_2021-Broken_Access_Control/) + +> **Snippet SHA256**: abc123def456789... + +> [!NOTE] +> This issue was created and will be managed by scanio automation. Don't change body manually for proper processing, unless you know what you do +``` + +This format provides comprehensive information while maintaining machine readability for correlation and automated management. From c426323c68930f28b821274280cbee7522cc7b0e Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Thu, 25 Sep 2025 22:51:15 +0200 Subject: [PATCH 20/52] refactor: remove redundant code --- cmd/sarif-issues/sarif-issues.go | 285 ++++++++++++++----------------- 1 file changed, 126 insertions(+), 159 deletions(-) diff --git a/cmd/sarif-issues/sarif-issues.go b/cmd/sarif-issues/sarif-issues.go index 088ee94c..4faec8e0 100644 --- a/cmd/sarif-issues/sarif-issues.go +++ b/cmd/sarif-issues/sarif-issues.go @@ -48,6 +48,10 @@ var ( AppConfig *config.Config opts RunOptions + // Compiled regex patterns for security tag parsing + cweRegex = regexp.MustCompile(`^CWE-(\d+)\b`) + owaspRegex = regexp.MustCompile(`^OWASP[- ]?A(\d{2}):(\d{4})\s*-\s*(.+)$`) + // SarifIssuesCmd represents the command to create GitHub issues from a SARIF file. SarifIssuesCmd = &cobra.Command{ Use: "sarif-issues --sarif PATH [--namespace NAMESPACE] [--repository REPO] [--source-folder PATH] [--ref REF] [--labels label[,label...]] [--assignees user[,user...]]", @@ -152,6 +156,72 @@ func validate(o *RunOptions) error { return nil } +// generateOWASPSlug creates a URL-safe slug from OWASP title text. +// Converts spaces to underscores and removes non-alphanumeric characters except hyphens and underscores. +func generateOWASPSlug(title string) string { + slug := strings.ReplaceAll(strings.TrimSpace(title), " ", "_") + clean := make([]rune, 0, len(slug)) + for _, r := range slug { + if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' || r == '-' { + clean = append(clean, r) + } + } + return string(clean) +} + +// processSecurityTags converts security tags (CWE, OWASP) into reference links. +// Returns a slice of markdown reference links for recognized security identifiers. +func processSecurityTags(tags []string) []string { + var tagRefs []string + for _, tag := range tags { + t := strings.TrimSpace(tag) + if t == "" { + continue + } + + // Process CWE tags + if m := cweRegex.FindStringSubmatch(t); len(m) == 2 { + num := m[1] + url := fmt.Sprintf("https://cwe.mitre.org/data/definitions/%s.html", num) + tagRefs = append(tagRefs, fmt.Sprintf("- [%s](%s)", t, url)) + continue + } + + // Process OWASP tags + if m := owaspRegex.FindStringSubmatch(t); len(m) == 4 { + rank := m[1] + year := m[2] + title := m[3] + slug := generateOWASPSlug(title) + url := fmt.Sprintf("https://owasp.org/Top10/A%s_%s-%s/", rank, year, slug) + tagRefs = append(tagRefs, fmt.Sprintf("- [%s](%s)", t, url)) + continue + } + } + return tagRefs +} + +// parseLineRange parses line range from strings like "123" or "123-456". +// Returns (start, end) where end equals start for single line numbers. +func parseLineRange(value string) (int, int) { + value = strings.TrimSpace(value) + if strings.Contains(value, "-") { + parts := strings.SplitN(value, "-", 2) + if len(parts) == 2 { + start, err1 := strconv.Atoi(strings.TrimSpace(parts[0])) + end, err2 := strconv.Atoi(strings.TrimSpace(parts[1])) + if err1 == nil && err2 == nil { + return start, end + } + } + } else { + if line, err := strconv.Atoi(value); err == nil { + return line, line + } + } + return 0, 0 +} + // displaySeverity normalizes SARIF severity levels to more descriptive labels. func displaySeverity(level string) string { normalized := strings.ToLower(strings.TrimSpace(level)) @@ -172,19 +242,6 @@ func displaySeverity(level string) string { } } -// helper to fetch a string property safely -func getStringProp(m map[string]interface{}, key string) string { - if m == nil { - return "" - } - if v, ok := m[key]; ok { - if s, ok := v.(string); ok { - return s - } - } - return "" -} - // buildIssueTitle creates a concise issue title using scanner name (fallback to SARIF), // severity, ruleID and location info. It formats as "[][][] at" // and includes a range when endLine > line. @@ -347,128 +404,55 @@ type OpenIssueEntry struct { } // parseIssueBody attempts to read the body produced by this command and extract -// known metadata lines (Severity, Scanner, File, Line(s), Snippet SHA256, Description). +// known metadata from blockquote format lines. Only supports the new format: +// "> **Severity**: Error, **Scanner**: Semgrep OSS" +// "> **File**: app.py, **Lines**: 11-29" // Returns an OpenIssueReport with zero values when fields are missing. func parseIssueBody(body string) OpenIssueReport { rep := OpenIssueReport{} - // Prefer new-style rule ID header first; fallback to legacy "Rule:" line if absent. + + // Extract rule ID from header format: "## 🐞 " if rid := extractRuleIDFromBody(body); rid != "" { rep.RuleID = rid } + for _, line := range strings.Split(body, "\n") { line = strings.TrimSpace(line) - // Support new blockquote compact metadata lines - // "> **Severity**: Error, **Scanner**: Semgrep OSS" - // "> **File**: app.py, **Lines**: 11-29" - if strings.HasPrefix(line, "> ") { - // Remove "> " prefix for easier parsing - l := strings.TrimSpace(strings.TrimPrefix(line, "> ")) - // Normalize bold markers to plain keys - l = strings.ReplaceAll(l, "**", "") - // Split into comma-separated parts first - parts := strings.Split(l, ",") - for _, p := range parts { - seg := strings.TrimSpace(p) - if strings.HasPrefix(seg, "Severity:") { - rep.Severity = strings.TrimSpace(strings.TrimPrefix(seg, "Severity:")) - continue - } - if strings.HasPrefix(seg, "Scanner:") { - rep.Scanner = strings.TrimSpace(strings.TrimPrefix(seg, "Scanner:")) - continue - } - if strings.HasPrefix(seg, "File:") { - // If File appears on the first line with comma, capture - v := strings.TrimSpace(strings.TrimPrefix(seg, "File:")) - if v != "" { - rep.FilePath = v - } - continue - } - if strings.HasPrefix(seg, "Lines:") { - v := strings.TrimSpace(strings.TrimPrefix(seg, "Lines:")) - if strings.Contains(v, "-") { - lr := strings.SplitN(v, "-", 2) - if len(lr) == 2 { - if s, err := strconv.Atoi(strings.TrimSpace(lr[0])); err == nil { - rep.StartLine = s - } - if e, err := strconv.Atoi(strings.TrimSpace(lr[1])); err == nil { - rep.EndLine = e - } - } - } else { - if n, err := strconv.Atoi(v); err == nil { - rep.StartLine = n - rep.EndLine = n - } - } - continue - } - // Support snippet hash in blockquoted metadata line at end of issue - if strings.HasPrefix(seg, "Snippet SHA256:") { - rep.Hash = strings.TrimSpace(strings.TrimPrefix(seg, "Snippet SHA256:")) - continue - } - } - continue - } - if strings.HasPrefix(line, "Severity:") { - rep.Severity = strings.TrimSpace(strings.TrimPrefix(line, "Severity:")) - continue - } - if strings.HasPrefix(line, "Scanner:") { - rep.Scanner = strings.TrimSpace(strings.TrimPrefix(line, "Scanner:")) - continue - } - if strings.HasPrefix(line, "Rule:") { - // Legacy fallback only if not already populated by new header format - if rep.RuleID == "" { - rep.RuleID = strings.TrimSpace(strings.TrimPrefix(line, "Rule:")) + + // Only process blockquote metadata lines + if !strings.HasPrefix(line, "> ") { + // Check for GitHub permalink URLs + if rep.Permalink == "" && strings.HasPrefix(line, "https://github.com/") && strings.Contains(line, "/blob/") { + rep.Permalink = line } - continue - } - if strings.HasPrefix(line, "File:") { - rep.FilePath = strings.TrimSpace(strings.TrimPrefix(line, "File:")) - continue - } - if strings.HasPrefix(line, "Line:") { - v := strings.TrimSpace(strings.TrimPrefix(line, "Line:")) - if n, err := strconv.Atoi(v); err == nil { - rep.StartLine = n - rep.EndLine = n + // Capture first non-metadata line as description if empty + if rep.Description == "" && line != "" && !strings.HasPrefix(line, "##") && !strings.HasPrefix(line, "") { + rep.Description = line } continue } - if strings.HasPrefix(line, "Lines:") { - v := strings.TrimSpace(strings.TrimPrefix(line, "Lines:")) - parts := strings.Split(v, "-") - if len(parts) == 2 { - if s, err := strconv.Atoi(strings.TrimSpace(parts[0])); err == nil { - rep.StartLine = s - } - if e, err := strconv.Atoi(strings.TrimSpace(parts[1])); err == nil { - rep.EndLine = e - } + + // Remove "> " prefix and normalize bold markers + content := strings.TrimSpace(strings.TrimPrefix(line, "> ")) + content = strings.ReplaceAll(content, "**", "") + + // Parse comma-separated metadata fields + parts := strings.Split(content, ",") + for _, part := range parts { + segment := strings.TrimSpace(part) + + if strings.HasPrefix(segment, "Severity:") { + rep.Severity = strings.TrimSpace(strings.TrimPrefix(segment, "Severity:")) + } else if strings.HasPrefix(segment, "Scanner:") { + rep.Scanner = strings.TrimSpace(strings.TrimPrefix(segment, "Scanner:")) + } else if strings.HasPrefix(segment, "File:") { + rep.FilePath = strings.TrimSpace(strings.TrimPrefix(segment, "File:")) + } else if strings.HasPrefix(segment, "Lines:") { + value := strings.TrimSpace(strings.TrimPrefix(segment, "Lines:")) + rep.StartLine, rep.EndLine = parseLineRange(value) + } else if strings.HasPrefix(segment, "Snippet SHA256:") { + rep.Hash = strings.TrimSpace(strings.TrimPrefix(segment, "Snippet SHA256:")) } - continue - } - if strings.HasPrefix(line, "Snippet SHA256:") { - rep.Hash = strings.TrimSpace(strings.TrimPrefix(line, "Snippet SHA256:")) - continue - } - if strings.HasPrefix(line, "Permalink:") { - rep.Permalink = strings.TrimSpace(strings.TrimPrefix(line, "Permalink:")) - continue - } - // Check if line is a URL (for new format without "Permalink:" prefix) - if strings.HasPrefix(line, "https://github.com/") && strings.Contains(line, "/blob/") { - rep.Permalink = strings.TrimSpace(line) - continue - } - // When we hit a non-metadata line and description is empty, assume rest is description - if rep.Description == "" && line != "" { - rep.Description = line } } return rep @@ -571,19 +555,34 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl ruleID = *res.RuleID } + // Warn about missing rule ID + if strings.TrimSpace(ruleID) == "" { + lg.Warn("SARIF result missing rule ID, skipping", "result_index", len(newIssues)) + continue + } + fileURI := filepath.ToSlash(extractFileURIFromResult(res, options.SourceFolder)) if fileURI == "" { fileURI = "" + lg.Warn("SARIF result missing file URI, using placeholder", "rule_id", ruleID) } line, endLine := extractRegionFromResult(res) - // desc := getStringProp(res.Properties, "Description") - // if desc == "" && res.Message.Text != nil { - // desc = *res.Message.Text - // } + // Warn about missing location information + if line <= 0 { + lg.Warn("SARIF result missing line information", "rule_id", ruleID, "file", fileURI) + } snippetHash := computeSnippetHash(fileURI, line, endLine, options.SourceFolder) + if snippetHash == "" && fileURI != "" && line > 0 { + lg.Warn("failed to compute snippet hash", "rule_id", ruleID, "file", fileURI, "line", line) + } + scannerName := getScannerName(run) + if scannerName == "" { + lg.Warn("SARIF run missing scanner/tool name, using fallback", "rule_id", ruleID) + } + sev := displaySeverity(level) // build body and title with scanner name label @@ -646,39 +645,7 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl } if len(tags) > 0 { - cweRe := regexp.MustCompile(`^CWE-(\d+)\b`) - owaspRe := regexp.MustCompile(`^OWASP[- ]?A(\d{2}):(\d{4})\s*-\s*(.+)$`) - var tagRefs []string - for _, tag := range tags { - t := strings.TrimSpace(tag) - if t == "" { - continue - } - if m := cweRe.FindStringSubmatch(t); len(m) == 2 { - num := m[1] - url := fmt.Sprintf("https://cwe.mitre.org/data/definitions/%s.html", num) - tagRefs = append(tagRefs, fmt.Sprintf("- [%s](%s)", t, url)) - continue - } - if m := owaspRe.FindStringSubmatch(t); len(m) == 4 { - rank := m[1] - year := m[2] - title := m[3] - slug := strings.ReplaceAll(strings.TrimSpace(title), " ", "_") - // Remove characters that are not letters, numbers, underscore, or hyphen - clean := make([]rune, 0, len(slug)) - for _, r := range slug { - if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' || r == '-' { - clean = append(clean, r) - } - } - slug = string(clean) - url := fmt.Sprintf("https://owasp.org/Top10/A%s_%s-%s/", rank, year, slug) - tagRefs = append(tagRefs, fmt.Sprintf("- [%s](%s)", t, url)) - continue - } - } - if len(tagRefs) > 0 { + if tagRefs := processSecurityTags(tags); len(tagRefs) > 0 { references = append(references, tagRefs...) } } From 9921efd940fb5587f7a60eaea0059d88d1ce889a Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Thu, 25 Sep 2025 22:54:27 +0200 Subject: [PATCH 21/52] refactor: remove duplicate API calls --- cmd/sarif-issues/sarif-issues.go | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/cmd/sarif-issues/sarif-issues.go b/cmd/sarif-issues/sarif-issues.go index 4faec8e0..af3bf9bf 100644 --- a/cmd/sarif-issues/sarif-issues.go +++ b/cmd/sarif-issues/sarif-issues.go @@ -115,7 +115,7 @@ var ( } lg.Info("fetched open issues from repository", "count", len(openIssues)) - created, err := processSARIFReport(report, opts, lg) + created, err := processSARIFReport(report, opts, lg, openIssues) if err != nil { return err } @@ -520,7 +520,7 @@ func listOpenIssues(options RunOptions) (map[int]OpenIssueEntry, error) { // processSARIFReport iterates runs/results in the SARIF report and creates VCS issues for // high severity findings. Returns number of created issues or an error. -func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hclog.Logger) (int, error) { +func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hclog.Logger, openIssues map[int]OpenIssueEntry) (int, error) { // Build list of new issues from SARIF (only high severity -> "error"). newIssues := make([]issuecorrelation.IssueMetadata, 0) // Also keep parallel arrays of the text bodies and titles so we can create issues later. @@ -676,12 +676,7 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl } } - // Build list of known issues (open issues fetched previously by caller via listOpenIssues) - openIssues, err := listOpenIssues(options) - if err != nil { - return 0, err - } - + // Build list of known issues from the provided open issues data knownIssues := make([]issuecorrelation.IssueMetadata, 0, len(openIssues)) for num, entry := range openIssues { rep := entry.OpenIssueReport From 3107fa35e720ee940b3969ae6cdbe3cfc16384de Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Thu, 25 Sep 2025 23:02:58 +0200 Subject: [PATCH 22/52] refactor: large processSARIFReport function decomposition --- cmd/sarif-issues/sarif-issues.go | 112 ++++++++++++++++++++++--------- 1 file changed, 81 insertions(+), 31 deletions(-) diff --git a/cmd/sarif-issues/sarif-issues.go b/cmd/sarif-issues/sarif-issues.go index af3bf9bf..03d1e2f1 100644 --- a/cmd/sarif-issues/sarif-issues.go +++ b/cmd/sarif-issues/sarif-issues.go @@ -518,17 +518,19 @@ func listOpenIssues(options RunOptions) (map[int]OpenIssueEntry, error) { return reports, nil } -// processSARIFReport iterates runs/results in the SARIF report and creates VCS issues for -// high severity findings. Returns number of created issues or an error. -func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hclog.Logger, openIssues map[int]OpenIssueEntry) (int, error) { - // Build list of new issues from SARIF (only high severity -> "error"). - newIssues := make([]issuecorrelation.IssueMetadata, 0) - // Also keep parallel arrays of the text bodies and titles so we can create issues later. - newBodies := make([]string, 0) - newTitles := make([]string, 0) +// NewIssueData holds the data needed to create a new issue from SARIF results. +type NewIssueData struct { + Metadata issuecorrelation.IssueMetadata + Body string + Title string +} - for _, run := range report.Runs { +// buildNewIssuesFromSARIF processes SARIF report and extracts high severity findings, +// returning structured data for creating new issues. +func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, lg hclog.Logger) []NewIssueData { + var newIssueData []NewIssueData + for _, run := range report.Runs { // Build a map of rules keyed by rule ID for quick lookups rulesByID := map[string]*sarif.ReportingDescriptor{} if run.Tool.Driver != nil { @@ -557,7 +559,7 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl // Warn about missing rule ID if strings.TrimSpace(ruleID) == "" { - lg.Warn("SARIF result missing rule ID, skipping", "result_index", len(newIssues)) + lg.Warn("SARIF result missing rule ID, skipping", "result_index", len(newIssueData)) continue } @@ -661,22 +663,29 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl } body += "\n" + scanioManagedAnnotation - newIssues = append(newIssues, issuecorrelation.IssueMetadata{ - IssueID: "", - Scanner: scannerName, - RuleID: ruleID, - Severity: level, - Filename: fileURI, - StartLine: line, - EndLine: endLine, - SnippetHash: snippetHash, + newIssueData = append(newIssueData, NewIssueData{ + Metadata: issuecorrelation.IssueMetadata{ + IssueID: "", + Scanner: scannerName, + RuleID: ruleID, + Severity: level, + Filename: fileURI, + StartLine: line, + EndLine: endLine, + SnippetHash: snippetHash, + }, + Body: body, + Title: titleText, }) - newBodies = append(newBodies, body) - newTitles = append(newTitles, titleText) } } - // Build list of known issues from the provided open issues data + return newIssueData +} + +// buildKnownIssuesFromOpen converts open GitHub issues into correlation metadata, +// filtering for well-structured scanio-managed issues only. +func buildKnownIssuesFromOpen(openIssues map[int]OpenIssueEntry, lg hclog.Logger) []issuecorrelation.IssueMetadata { knownIssues := make([]issuecorrelation.IssueMetadata, 0, len(openIssues)) for num, entry := range openIssues { rep := entry.OpenIssueReport @@ -706,13 +715,12 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl SnippetHash: rep.Hash, }) } + return knownIssues +} - // correlate - corr := issuecorrelation.NewCorrelator(newIssues, knownIssues) - corr.Process() - - // Create only unmatched new issues - unmatchedNew := corr.UnmatchedNew() +// createUnmatchedIssues creates GitHub issues for new findings that don't correlate with existing issues. +// Returns the number of successfully created issues. +func createUnmatchedIssues(unmatchedNew []issuecorrelation.IssueMetadata, newIssues []issuecorrelation.IssueMetadata, newBodies, newTitles []string, options RunOptions, lg hclog.Logger) (int, error) { created := 0 for _, u := range unmatchedNew { // find corresponding index in newIssues to retrieve body/title @@ -756,9 +764,12 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl } created++ } + return created, nil +} - // Close unmatched known issues (open issues that did not correlate) - unmatchedKnown := corr.UnmatchedKnown() +// closeUnmatchedIssues closes GitHub issues for known findings that don't correlate with current scan results. +// Returns an error if any issue closure fails. +func closeUnmatchedIssues(unmatchedKnown []issuecorrelation.IssueMetadata, options RunOptions, lg hclog.Logger) error { for _, k := range unmatchedKnown { // known IssueID contains the number as string num, err := strconv.Atoi(k.IssueID) @@ -815,9 +826,48 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hcl if err != nil { lg.Error("failed to close issue via plugin", "error", err, "number", num) // continue closing others but report an error at end - return created, errors.NewCommandError(options, nil, fmt.Errorf("close issue failed: %w", err), 2) + return errors.NewCommandError(options, nil, fmt.Errorf("close issue failed: %w", err), 2) } } + return nil +} + +// processSARIFReport iterates runs/results in the SARIF report and creates VCS issues for +// high severity findings. Returns number of created issues or an error. +func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hclog.Logger, openIssues map[int]OpenIssueEntry) (int, error) { + // Build list of new issues from SARIF using extracted function + newIssueData := buildNewIssuesFromSARIF(report, options, lg) + + // Extract metadata, bodies, and titles for correlation and issue creation + newIssues := make([]issuecorrelation.IssueMetadata, len(newIssueData)) + newBodies := make([]string, len(newIssueData)) + newTitles := make([]string, len(newIssueData)) + + for i, data := range newIssueData { + newIssues[i] = data.Metadata + newBodies[i] = data.Body + newTitles[i] = data.Title + } + + // Build list of known issues from the provided open issues data + knownIssues := buildKnownIssuesFromOpen(openIssues, lg) + + // correlate + corr := issuecorrelation.NewCorrelator(newIssues, knownIssues) + corr.Process() + + // Create only unmatched new issues + unmatchedNew := corr.UnmatchedNew() + created, err := createUnmatchedIssues(unmatchedNew, newIssues, newBodies, newTitles, options, lg) + if err != nil { + return created, err + } + + // Close unmatched known issues (open issues that did not correlate) + unmatchedKnown := corr.UnmatchedKnown() + if err := closeUnmatchedIssues(unmatchedKnown, options, lg); err != nil { + return created, err + } return created, nil } From b8c7787f1344f6131cebd65158bb60fc3356bf4c Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Thu, 25 Sep 2025 23:16:26 +0200 Subject: [PATCH 23/52] refactor: split sarif-issues cmd to multi-file structure --- cmd/sarif-issues/issue_processing.go | 512 +++++++++++++++ cmd/sarif-issues/sarif-issues.go | 947 +++------------------------ cmd/sarif-issues/utils.go | 315 +++++++++ cmd/sarif-issues/validation.go | 20 + 4 files changed, 926 insertions(+), 868 deletions(-) create mode 100644 cmd/sarif-issues/issue_processing.go create mode 100644 cmd/sarif-issues/utils.go create mode 100644 cmd/sarif-issues/validation.go diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go new file mode 100644 index 00000000..9fa8a1f2 --- /dev/null +++ b/cmd/sarif-issues/issue_processing.go @@ -0,0 +1,512 @@ +package sarifissues + +import ( + "fmt" + "path/filepath" + "regexp" + "strconv" + "strings" + + hclog "github.com/hashicorp/go-hclog" + "github.com/owenrumney/go-sarif/v2/sarif" + internalsarif "github.com/scan-io-git/scan-io/internal/sarif" + issuecorrelation "github.com/scan-io-git/scan-io/pkg/issuecorrelation" + "github.com/scan-io-git/scan-io/pkg/shared" + "github.com/scan-io-git/scan-io/pkg/shared/errors" +) + +// Compiled regex patterns for security tag parsing +var ( + cweRegex = regexp.MustCompile(`^CWE-(\d+)\b`) + owaspRegex = regexp.MustCompile(`^OWASP[- ]?A(\d{2}):(\d{4})\s*-\s*(.+)$`) +) + +// OpenIssueReport represents parsed metadata from an open issue body. +type OpenIssueReport struct { + Severity string + Scanner string + RuleID string + FilePath string + StartLine int + EndLine int + Hash string + Permalink string + Description string +} + +// OpenIssueEntry combines parsed metadata from an open issue body with the +// original IssueParams returned by the VCS plugin. The map returned by +// listOpenIssues uses the issue number as key and this struct as value. +type OpenIssueEntry struct { + OpenIssueReport + Params shared.IssueParams +} + +// NewIssueData holds the data needed to create a new issue from SARIF results. +type NewIssueData struct { + Metadata issuecorrelation.IssueMetadata + Body string + Title string +} + +// parseIssueBody attempts to read the body produced by this command and extract +// known metadata from blockquote format lines. Only supports the new format: +// "> **Severity**: Error, **Scanner**: Semgrep OSS" +// "> **File**: app.py, **Lines**: 11-29" +// Returns an OpenIssueReport with zero values when fields are missing. +func parseIssueBody(body string) OpenIssueReport { + rep := OpenIssueReport{} + + // Extract rule ID from header format: "## 🐞 " + if rid := extractRuleIDFromBody(body); rid != "" { + rep.RuleID = rid + } + + for _, line := range strings.Split(body, "\n") { + line = strings.TrimSpace(line) + + // Only process blockquote metadata lines + if !strings.HasPrefix(line, "> ") { + // Check for GitHub permalink URLs + if rep.Permalink == "" && strings.HasPrefix(line, "https://github.com/") && strings.Contains(line, "/blob/") { + rep.Permalink = line + } + // Capture first non-metadata line as description if empty + if rep.Description == "" && line != "" && !strings.HasPrefix(line, "##") && !strings.HasPrefix(line, "") { + rep.Description = line + } + continue + } + + // Remove "> " prefix and normalize bold markers + content := strings.TrimSpace(strings.TrimPrefix(line, "> ")) + content = strings.ReplaceAll(content, "**", "") + + // Parse comma-separated metadata fields + parts := strings.Split(content, ",") + for _, part := range parts { + segment := strings.TrimSpace(part) + + if strings.HasPrefix(segment, "Severity:") { + rep.Severity = strings.TrimSpace(strings.TrimPrefix(segment, "Severity:")) + } else if strings.HasPrefix(segment, "Scanner:") { + rep.Scanner = strings.TrimSpace(strings.TrimPrefix(segment, "Scanner:")) + } else if strings.HasPrefix(segment, "File:") { + rep.FilePath = strings.TrimSpace(strings.TrimPrefix(segment, "File:")) + } else if strings.HasPrefix(segment, "Lines:") { + value := strings.TrimSpace(strings.TrimPrefix(segment, "Lines:")) + rep.StartLine, rep.EndLine = parseLineRange(value) + } else if strings.HasPrefix(segment, "Snippet SHA256:") { + rep.Hash = strings.TrimSpace(strings.TrimPrefix(segment, "Snippet SHA256:")) + } + } + } + return rep +} + +// extractRuleIDFromBody attempts to parse a rule ID from the new body format header line: +// "## " where can be any single or combined emoji/symbol token. +// Returns empty string if not found. +func extractRuleIDFromBody(body string) string { + // Compile regex once per call; trivial cost compared to network IO. If needed, lift to package scope. + re := regexp.MustCompile(`^##\s+[^\w\s]+\s+(.+)$`) + for _, line := range strings.Split(body, "\n") { + l := strings.TrimSpace(line) + if !strings.HasPrefix(l, "##") { + continue + } + if m := re.FindStringSubmatch(l); len(m) == 2 { + return strings.TrimSpace(m[1]) + } + } + return "" +} + +// listOpenIssues calls the VCS plugin to list open issues for the configured repo +// and parses their bodies into OpenIssueReport structures. +func listOpenIssues(options RunOptions) (map[int]OpenIssueEntry, error) { + req := shared.VCSListIssuesRequest{ + VCSRequestBase: shared.VCSRequestBase{ + RepoParam: shared.RepositoryParams{ + Namespace: options.Namespace, + Repository: options.Repository, + }, + Action: "listIssues", + }, + State: "open", + } + + var issues []shared.IssueParams + err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { + vcs, ok := raw.(shared.VCS) + if !ok { + return fmt.Errorf("invalid VCS plugin type") + } + list, err := vcs.ListIssues(req) + if err != nil { + return err + } + issues = list + return nil + }) + if err != nil { + return nil, err + } + + reports := make(map[int]OpenIssueEntry, len(issues)) + for _, it := range issues { + rep := parseIssueBody(it.Body) + reports[it.Number] = OpenIssueEntry{ + OpenIssueReport: rep, + Params: it, + } + } + return reports, nil +} + +// buildNewIssuesFromSARIF processes SARIF report and extracts high severity findings, +// returning structured data for creating new issues. +func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, lg hclog.Logger) []NewIssueData { + var newIssueData []NewIssueData + + for _, run := range report.Runs { + // Build a map of rules keyed by rule ID for quick lookups + rulesByID := map[string]*sarif.ReportingDescriptor{} + if run.Tool.Driver != nil { + for _, r := range run.Tool.Driver.Rules { + if r == nil { + continue + } + id := strings.TrimSpace(r.ID) + if id == "" { + continue + } + rulesByID[id] = r + } + } + + for _, res := range run.Results { + level, _ := res.Properties["Level"].(string) + if strings.ToLower(level) != "error" { + continue + } + + ruleID := "" + if res.RuleID != nil { + ruleID = *res.RuleID + } + + // Warn about missing rule ID + if strings.TrimSpace(ruleID) == "" { + lg.Warn("SARIF result missing rule ID, skipping", "result_index", len(newIssueData)) + continue + } + + fileURI := filepath.ToSlash(extractFileURIFromResult(res, options.SourceFolder)) + if fileURI == "" { + fileURI = "" + lg.Warn("SARIF result missing file URI, using placeholder", "rule_id", ruleID) + } + line, endLine := extractRegionFromResult(res) + + // Warn about missing location information + if line <= 0 { + lg.Warn("SARIF result missing line information", "rule_id", ruleID, "file", fileURI) + } + + snippetHash := computeSnippetHash(fileURI, line, endLine, options.SourceFolder) + if snippetHash == "" && fileURI != "" && line > 0 { + lg.Warn("failed to compute snippet hash", "rule_id", ruleID, "file", fileURI, "line", line) + } + + scannerName := getScannerName(run) + if scannerName == "" { + lg.Warn("SARIF run missing scanner/tool name, using fallback", "rule_id", ruleID) + } + + sev := displaySeverity(level) + + // build body and title with scanner name label + titleText := buildIssueTitle(scannerName, sev, ruleID, fileURI, line, endLine) + + // New body header and compact metadata blockquote + header := "" + if strings.TrimSpace(ruleID) != "" { + header = fmt.Sprintf("## 🐞 %s\n\n", ruleID) + } + scannerDisp := scannerName + if scannerDisp == "" { + scannerDisp = "SARIF" + } + fileDisp := fileURI + linesDisp := fmt.Sprintf("%d", line) + if endLine > line { + linesDisp = fmt.Sprintf("%d-%d", line, endLine) + } + meta := fmt.Sprintf( + "> **Severity**: %s, **Scanner**: %s\n> **File**: %s, **Lines**: %s\n", + sev, scannerDisp, fileDisp, linesDisp, + ) + // Only use the new header and blockquote metadata + body := header + meta + "\n" + var references []string + + // Append rule help markdown if available + if r, ok := rulesByID[ruleID]; ok && r != nil && r.Help != nil && r.Help.Markdown != nil { + if hm := strings.TrimSpace(*r.Help.Markdown); hm != "" { + detail, helpRefs := parseRuleHelpMarkdown(hm) + if detail != "" { + body += "\n\n" + detail + } + if len(helpRefs) > 0 { + references = append(references, helpRefs...) + } + } + } + + // Append permalink if available + if link := buildGitHubPermalink(options, fileURI, line, endLine); link != "" { + body += fmt.Sprintf("\n%s\n", link) + } + + // Append security identifier tags (CWE, OWASP) with links if available in rule properties + if r, ok := rulesByID[ruleID]; ok && r != nil && r.Properties != nil { + var tags []string + if v, ok := r.Properties["tags"]; ok && v != nil { + switch tv := v.(type) { + case []string: + tags = tv + case []interface{}: + for _, it := range tv { + if s, ok := it.(string); ok { + tags = append(tags, s) + } + } + } + } + + if len(tags) > 0 { + if tagRefs := processSecurityTags(tags); len(tagRefs) > 0 { + references = append(references, tagRefs...) + } + } + } + + if len(references) > 0 { + body += "\n\nReferences:\n" + strings.Join(references, "\n") + } + + // Add a second snippet hash right before the scanio-managed note, as a blockquote + if snippetHash != "" { + body += fmt.Sprintf("\n\n> **Snippet SHA256**: %s\n", snippetHash) + } + body += "\n" + scanioManagedAnnotation + + newIssueData = append(newIssueData, NewIssueData{ + Metadata: issuecorrelation.IssueMetadata{ + IssueID: "", + Scanner: scannerName, + RuleID: ruleID, + Severity: level, + Filename: fileURI, + StartLine: line, + EndLine: endLine, + SnippetHash: snippetHash, + }, + Body: body, + Title: titleText, + }) + } + } + + return newIssueData +} + +// buildKnownIssuesFromOpen converts open GitHub issues into correlation metadata, +// filtering for well-structured scanio-managed issues only. +func buildKnownIssuesFromOpen(openIssues map[int]OpenIssueEntry, lg hclog.Logger) []issuecorrelation.IssueMetadata { + knownIssues := make([]issuecorrelation.IssueMetadata, 0, len(openIssues)) + for num, entry := range openIssues { + rep := entry.OpenIssueReport + // Only include well-structured issues for automatic closure. + // If an open issue doesn't include basic metadata we skip it so + // we don't accidentally close unrelated or free-form issues. + if rep.Scanner == "" || rep.RuleID == "" || rep.FilePath == "" { + lg.Debug("skipping malformed open issue (won't be auto-closed)", "number", num) + continue + } + + // Only consider issues that contain the scanio-managed annotation. + // If the annotation is absent, treat the issue as manually managed and + // exclude it from correlation/auto-closure logic. + if !strings.Contains(entry.Params.Body, scanioManagedAnnotation) { + lg.Debug("skipping non-scanio-managed issue (won't be auto-closed)", "number", num) + continue + } + knownIssues = append(knownIssues, issuecorrelation.IssueMetadata{ + IssueID: fmt.Sprintf("%d", num), + Scanner: rep.Scanner, + RuleID: rep.RuleID, + Severity: rep.Severity, + Filename: rep.FilePath, + StartLine: rep.StartLine, + EndLine: rep.EndLine, + SnippetHash: rep.Hash, + }) + } + return knownIssues +} + +// createUnmatchedIssues creates GitHub issues for new findings that don't correlate with existing issues. +// Returns the number of successfully created issues. +func createUnmatchedIssues(unmatchedNew []issuecorrelation.IssueMetadata, newIssues []issuecorrelation.IssueMetadata, newBodies, newTitles []string, options RunOptions, lg hclog.Logger) (int, error) { + created := 0 + for _, u := range unmatchedNew { + // find corresponding index in newIssues to retrieve body/title + var idx int = -1 + for ni, n := range newIssues { + if n == u { + idx = ni + break + } + } + if idx == -1 { + // shouldn't happen + continue + } + + req := shared.VCSIssueCreationRequest{ + VCSRequestBase: shared.VCSRequestBase{ + RepoParam: shared.RepositoryParams{ + Namespace: options.Namespace, + Repository: options.Repository, + }, + Action: "createIssue", + }, + Title: newTitles[idx], + Body: newBodies[idx], + Labels: opts.Labels, + Assignees: opts.Assignees, + } + + err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { + vcs, ok := raw.(shared.VCS) + if !ok { + return fmt.Errorf("invalid VCS plugin type") + } + _, err := vcs.CreateIssue(req) + return err + }) + if err != nil { + lg.Error("failed to create issue via plugin", "error", err, "file", u.Filename, "line", u.StartLine) + return created, errors.NewCommandError(options, nil, fmt.Errorf("create issue failed: %w", err), 2) + } + created++ + } + return created, nil +} + +// closeUnmatchedIssues closes GitHub issues for known findings that don't correlate with current scan results. +// Returns an error if any issue closure fails. +func closeUnmatchedIssues(unmatchedKnown []issuecorrelation.IssueMetadata, options RunOptions, lg hclog.Logger) error { + for _, k := range unmatchedKnown { + // known IssueID contains the number as string + num, err := strconv.Atoi(k.IssueID) + if err != nil { + // skip if we can't parse number + continue + } + // Leave a comment before closing the issue to explain why it is being closed + commentReq := shared.VCSCreateIssueCommentRequest{ + VCSRequestBase: shared.VCSRequestBase{ + RepoParam: shared.RepositoryParams{ + Namespace: options.Namespace, + Repository: options.Repository, + }, + Action: "createIssueComment", + }, + Number: num, + Body: "Recent scan didn't see the issue; closing this as resolved.", + } + + err = shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { + vcs, ok := raw.(shared.VCS) + if !ok { + return fmt.Errorf("invalid VCS plugin type") + } + _, err := vcs.CreateIssueComment(commentReq) + return err + }) + if err != nil { + lg.Error("failed to add comment before closing issue", "error", err, "number", num) + // continue to attempt closing even if commenting failed + } + + upd := shared.VCSIssueUpdateRequest{ + VCSRequestBase: shared.VCSRequestBase{ + RepoParam: shared.RepositoryParams{ + Namespace: options.Namespace, + Repository: options.Repository, + }, + Action: "updateIssue", + }, + Number: num, + State: "closed", + } + + err = shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { + vcs, ok := raw.(shared.VCS) + if !ok { + return fmt.Errorf("invalid VCS plugin type") + } + _, err := vcs.UpdateIssue(upd) + return err + }) + if err != nil { + lg.Error("failed to close issue via plugin", "error", err, "number", num) + // continue closing others but report an error at end + return errors.NewCommandError(options, nil, fmt.Errorf("close issue failed: %w", err), 2) + } + } + return nil +} + +// processSARIFReport iterates runs/results in the SARIF report and creates VCS issues for +// high severity findings. Returns number of created issues or an error. +func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hclog.Logger, openIssues map[int]OpenIssueEntry) (int, error) { + // Build list of new issues from SARIF using extracted function + newIssueData := buildNewIssuesFromSARIF(report, options, lg) + + // Extract metadata, bodies, and titles for correlation and issue creation + newIssues := make([]issuecorrelation.IssueMetadata, len(newIssueData)) + newBodies := make([]string, len(newIssueData)) + newTitles := make([]string, len(newIssueData)) + + for i, data := range newIssueData { + newIssues[i] = data.Metadata + newBodies[i] = data.Body + newTitles[i] = data.Title + } + + // Build list of known issues from the provided open issues data + knownIssues := buildKnownIssuesFromOpen(openIssues, lg) + + // correlate + corr := issuecorrelation.NewCorrelator(newIssues, knownIssues) + corr.Process() + + // Create only unmatched new issues + unmatchedNew := corr.UnmatchedNew() + created, err := createUnmatchedIssues(unmatchedNew, newIssues, newBodies, newTitles, options, lg) + if err != nil { + return created, err + } + + // Close unmatched known issues (open issues that did not correlate) + unmatchedKnown := corr.UnmatchedKnown() + if err := closeUnmatchedIssues(unmatchedKnown, options, lg); err != nil { + return created, err + } + + return created, nil +} diff --git a/cmd/sarif-issues/sarif-issues.go b/cmd/sarif-issues/sarif-issues.go index 03d1e2f1..a0540d8e 100644 --- a/cmd/sarif-issues/sarif-issues.go +++ b/cmd/sarif-issues/sarif-issues.go @@ -1,24 +1,13 @@ package sarifissues import ( - "crypto/sha256" "fmt" "os" - "path/filepath" - "regexp" - "strconv" "strings" - "golang.org/x/text/cases" - "golang.org/x/text/language" - "github.com/spf13/cobra" - hclog "github.com/hashicorp/go-hclog" - "github.com/owenrumney/go-sarif/v2/sarif" - "github.com/scan-io-git/scan-io/internal/git" internalsarif "github.com/scan-io-git/scan-io/internal/sarif" - issuecorrelation "github.com/scan-io-git/scan-io/pkg/issuecorrelation" "github.com/scan-io-git/scan-io/pkg/shared" "github.com/scan-io-git/scan-io/pkg/shared/config" "github.com/scan-io-git/scan-io/pkg/shared/errors" @@ -33,7 +22,7 @@ const ( semgrepPromoFooter = "#### 💎 Enable cross-file analysis and Pro rules for free at sg.run/pro\n\n" ) -// RunOptions holds flags for the create-issues-from-sarif command. +// RunOptions holds flags for the sarif-issues command. type RunOptions struct { Namespace string `json:"namespace,omitempty"` Repository string `json:"repository,omitempty"` @@ -48,902 +37,124 @@ var ( AppConfig *config.Config opts RunOptions - // Compiled regex patterns for security tag parsing - cweRegex = regexp.MustCompile(`^CWE-(\d+)\b`) - owaspRegex = regexp.MustCompile(`^OWASP[- ]?A(\d{2}):(\d{4})\s*-\s*(.+)$`) + // Example usage for the sarif-issues command + exampleSarifIssuesUsage = ` # Create issues from SARIF report with basic configuration + scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif + + # Create issues with labels and assignees + scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --labels bug,security --assignees alice,bob + + # Create issues with source folder for better file path resolution + scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --source-folder /path/to/source + + # Create issues with specific git reference for permalinks + scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --ref feature-branch + + # Using environment variables (GitHub Actions) + GITHUB_REPOSITORY_OWNER=scan-io-git GITHUB_REPOSITORY=scan-io-git/scan-io GITHUB_SHA=abc123 scanio sarif-issues --sarif /path/to/report.sarif` // SarifIssuesCmd represents the command to create GitHub issues from a SARIF file. SarifIssuesCmd = &cobra.Command{ Use: "sarif-issues --sarif PATH [--namespace NAMESPACE] [--repository REPO] [--source-folder PATH] [--ref REF] [--labels label[,label...]] [--assignees user[,user...]]", Short: "Create GitHub issues for high severity SARIF findings", - Example: "scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --labels bug,security --assignees alice,bob", + Example: exampleSarifIssuesUsage, SilenceUsage: false, Hidden: false, DisableFlagsInUseLine: true, - RunE: func(cmd *cobra.Command, args []string) error { - if len(args) == 0 && !shared.HasFlags(cmd.Flags()) { - return cmd.Help() - } - - // Fallback: if --namespace not provided, try $GITHUB_REPOSITORY_OWNER - if strings.TrimSpace(opts.Namespace) == "" { - if ns := strings.TrimSpace(os.Getenv("GITHUB_REPOSITORY_OWNER")); ns != "" { - opts.Namespace = ns - } - } - - // Fallback: if --repository not provided, try ${GITHUB_REPOSITORY#*/} - if strings.TrimSpace(opts.Repository) == "" { - if gr := strings.TrimSpace(os.Getenv("GITHUB_REPOSITORY")); gr != "" { - if idx := strings.Index(gr, "/"); idx >= 0 && idx < len(gr)-1 { - opts.Repository = gr[idx+1:] - } else { - // No slash present; fall back to the whole value - opts.Repository = gr - } - } - } - - // Fallback: if --ref not provided, try $GITHUB_SHA - if strings.TrimSpace(opts.Ref) == "" { - if sha := strings.TrimSpace(os.Getenv("GITHUB_SHA")); sha != "" { - opts.Ref = sha - } - } - - if err := validate(&opts); err != nil { - return errors.NewCommandError(opts, nil, err, 1) - } - - lg := logger.NewLogger(AppConfig, "create-issues-from-sarif") - - report, err := internalsarif.ReadReport(opts.SarifPath, lg, opts.SourceFolder, true) - if err != nil { - lg.Error("failed to read SARIF report", "error", err) - return errors.NewCommandError(opts, nil, fmt.Errorf("failed to read SARIF report: %w", err), 2) - } - - // Enrich to ensure Levels and Titles are present - report.EnrichResultsLevelProperty() - report.EnrichResultsTitleProperty() - // No need to enrich locations here; we'll compute file path from URI directly - - // get all open github issues - openIssues, err := listOpenIssues(opts) - if err != nil { - return err - } - lg.Info("fetched open issues from repository", "count", len(openIssues)) - - created, err := processSARIFReport(report, opts, lg, openIssues) - if err != nil { - return err - } - - lg.Info("issues created from SARIF high severity findings", "count", created) - fmt.Printf("Created %d issue(s) from SARIF high severity findings\n", created) - return nil - }, + RunE: runSarifIssues, } ) // Init wires config into this command. -func Init(cfg *config.Config) { AppConfig = cfg } - -func init() { - SarifIssuesCmd.Flags().StringVar(&opts.Namespace, "namespace", "", "GitHub org/user (defaults to $GITHUB_REPOSITORY_OWNER when unset)") - SarifIssuesCmd.Flags().StringVar(&opts.Repository, "repository", "", "Repository name (defaults to ${GITHUB_REPOSITORY#*/} when unset)") - SarifIssuesCmd.Flags().StringVar(&opts.SarifPath, "sarif", "", "Path to SARIF file") - SarifIssuesCmd.Flags().StringVar(&opts.SourceFolder, "source-folder", "", "Optional: source folder to improve file path resolution in SARIF (used for absolute paths)") - SarifIssuesCmd.Flags().StringVar(&opts.Ref, "ref", "", "Git ref (branch or commit SHA) to build a permalink to the vulnerable code (defaults to $GITHUB_SHA when unset)") - // --labels supports multiple usages (e.g., --labels bug --labels security) or comma-separated values - SarifIssuesCmd.Flags().StringSliceVar(&opts.Labels, "labels", nil, "Optional: labels to assign to created GitHub issues (repeat flag or use comma-separated values)") - // --assignees supports multiple usages or comma-separated values - SarifIssuesCmd.Flags().StringSliceVar(&opts.Assignees, "assignees", nil, "Optional: assignees (GitHub logins) to assign to created issues (repeat flag or use comma-separated values)") - SarifIssuesCmd.Flags().BoolP("help", "h", false, "Show help for create-issues-from-sarif command.") +func Init(cfg *config.Config) { + AppConfig = cfg } -func validate(o *RunOptions) error { - if o.Namespace == "" { - return fmt.Errorf("--namespace is required") - } - if o.Repository == "" { - return fmt.Errorf("--repository is required") +// runSarifIssues is the main execution function for the sarif-issues command. +func runSarifIssues(cmd *cobra.Command, args []string) error { + // 1. Check for help request + if len(args) == 0 && !shared.HasFlags(cmd.Flags()) { + return cmd.Help() } - if strings.TrimSpace(o.SarifPath) == "" { - return fmt.Errorf("--sarif is required") - } - return nil -} -// generateOWASPSlug creates a URL-safe slug from OWASP title text. -// Converts spaces to underscores and removes non-alphanumeric characters except hyphens and underscores. -func generateOWASPSlug(title string) string { - slug := strings.ReplaceAll(strings.TrimSpace(title), " ", "_") - clean := make([]rune, 0, len(slug)) - for _, r := range slug { - if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' || r == '-' { - clean = append(clean, r) - } - } - return string(clean) -} + // 2. Initialize logger + lg := logger.NewLogger(AppConfig, "sarif-issues") -// processSecurityTags converts security tags (CWE, OWASP) into reference links. -// Returns a slice of markdown reference links for recognized security identifiers. -func processSecurityTags(tags []string) []string { - var tagRefs []string - for _, tag := range tags { - t := strings.TrimSpace(tag) - if t == "" { - continue - } - - // Process CWE tags - if m := cweRegex.FindStringSubmatch(t); len(m) == 2 { - num := m[1] - url := fmt.Sprintf("https://cwe.mitre.org/data/definitions/%s.html", num) - tagRefs = append(tagRefs, fmt.Sprintf("- [%s](%s)", t, url)) - continue - } - - // Process OWASP tags - if m := owaspRegex.FindStringSubmatch(t); len(m) == 4 { - rank := m[1] - year := m[2] - title := m[3] - slug := generateOWASPSlug(title) - url := fmt.Sprintf("https://owasp.org/Top10/A%s_%s-%s/", rank, year, slug) - tagRefs = append(tagRefs, fmt.Sprintf("- [%s](%s)", t, url)) - continue + // 3. Handle environment variable fallbacks + // Fallback: if --namespace not provided, try $GITHUB_REPOSITORY_OWNER + if strings.TrimSpace(opts.Namespace) == "" { + if ns := strings.TrimSpace(os.Getenv("GITHUB_REPOSITORY_OWNER")); ns != "" { + opts.Namespace = ns } } - return tagRefs -} -// parseLineRange parses line range from strings like "123" or "123-456". -// Returns (start, end) where end equals start for single line numbers. -func parseLineRange(value string) (int, int) { - value = strings.TrimSpace(value) - if strings.Contains(value, "-") { - parts := strings.SplitN(value, "-", 2) - if len(parts) == 2 { - start, err1 := strconv.Atoi(strings.TrimSpace(parts[0])) - end, err2 := strconv.Atoi(strings.TrimSpace(parts[1])) - if err1 == nil && err2 == nil { - return start, end + // Fallback: if --repository not provided, try ${GITHUB_REPOSITORY#*/} + if strings.TrimSpace(opts.Repository) == "" { + if gr := strings.TrimSpace(os.Getenv("GITHUB_REPOSITORY")); gr != "" { + if idx := strings.Index(gr, "/"); idx >= 0 && idx < len(gr)-1 { + opts.Repository = gr[idx+1:] + } else { + // No slash present; fall back to the whole value + opts.Repository = gr } } - } else { - if line, err := strconv.Atoi(value); err == nil { - return line, line - } } - return 0, 0 -} -// displaySeverity normalizes SARIF severity levels to more descriptive labels. -func displaySeverity(level string) string { - normalized := strings.ToLower(strings.TrimSpace(level)) - switch normalized { - case "error": - return "High" - case "warning": - return "Medium" - case "note": - return "Low" - case "none": - return "Info" - default: - if normalized == "" { - return "" + // Fallback: if --ref not provided, try $GITHUB_SHA + if strings.TrimSpace(opts.Ref) == "" { + if sha := strings.TrimSpace(os.Getenv("GITHUB_SHA")); sha != "" { + opts.Ref = sha } - return cases.Title(language.Und).String(normalized) } -} -// buildIssueTitle creates a concise issue title using scanner name (fallback to SARIF), -// severity, ruleID and location info. It formats as "[][][] at" -// and includes a range when endLine > line. -func buildIssueTitle(scannerName, severity, ruleID, fileURI string, line, endLine int) string { - label := strings.TrimSpace(scannerName) - if label == "" { - label = "SARIF" - } - sev := strings.TrimSpace(severity) - parts := []string{label} - if sev != "" { - parts = append(parts, sev) - } - parts = append(parts, ruleID) - title := fmt.Sprintf("[%s]", strings.Join(parts, "][")) - if line > 0 { - if endLine > line { - return fmt.Sprintf("%s at %s:%d-%d", title, fileURI, line, endLine) - } - return fmt.Sprintf("%s at %s:%d", title, fileURI, line) + // 4. Validate arguments + if err := validate(&opts); err != nil { + lg.Error("invalid arguments", "error", err) + return errors.NewCommandError(opts, nil, fmt.Errorf("invalid arguments: %w", err), 1) } - return fmt.Sprintf("%s at %s", title, fileURI) -} -// computeSnippetHash reads the snippet (single line or range) from sourceFolder + fileURI -// and returns its SHA256 hex string. Returns empty string on any error or if inputs are invalid. -func computeSnippetHash(fileURI string, line, endLine int, sourceFolder string) string { - if fileURI == "" || fileURI == "" || line <= 0 || sourceFolder == "" { - return "" - } - absPath := filepath.Join(sourceFolder, filepath.FromSlash(fileURI)) - data, err := os.ReadFile(absPath) + // 5. Read and process SARIF report + report, err := internalsarif.ReadReport(opts.SarifPath, lg, opts.SourceFolder, true) if err != nil { - return "" - } - lines := strings.Split(string(data), "\n") - start := line - end := line - if endLine > line { - end = endLine - } - // Validate bounds (1-based line numbers) - if start < 1 || start > len(lines) { - return "" - } - if end > len(lines) { - end = len(lines) - } - if end < start { - return "" - } - snippet := strings.Join(lines[start-1:end], "\n") - sum := sha256.Sum256([]byte(snippet)) - return fmt.Sprintf("%x", sum[:]) -} - -// parseRuleHelpMarkdown removes promotional content from help markdown and splits -// it into the descriptive details and a list of reference bullet points. -func parseRuleHelpMarkdown(markdown string) (string, []string) { - cleaned := strings.ReplaceAll(markdown, semgrepPromoFooter, "") - cleaned = strings.TrimSpace(cleaned) - if cleaned == "" { - return "", nil - } - - lines := strings.Split(cleaned, "\n") - referencesStart := -1 - for idx, raw := range lines { - if strings.TrimSpace(raw) == "References:" { - referencesStart = idx - break - } - } - - if referencesStart == -1 { - return cleaned, nil + lg.Error("failed to read SARIF report", "error", err) + return errors.NewCommandError(opts, nil, fmt.Errorf("failed to read SARIF report: %w", err), 2) } - detail := strings.TrimSpace(strings.Join(lines[:referencesStart], "\n")) - var references []string - for _, raw := range lines[referencesStart+1:] { - trimmed := strings.TrimSpace(raw) - if trimmed == "" { - continue - } - // Normalise to Markdown bullet points regardless of the original marker. - trimmed = strings.TrimLeft(trimmed, "-* \t") - if trimmed == "" { - continue - } - references = append(references, "- "+trimmed) - } - - return detail, references -} - -// getScannerName returns the tool/driver name for a SARIF run when available. -func getScannerName(run *sarif.Run) string { - if run == nil { - return "" - } - if run.Tool.Driver == nil { - return "" - } - if run.Tool.Driver.Name != "" { - return run.Tool.Driver.Name - } - return "" -} - -// buildGitHubPermalink builds a permalink to a file and region in GitHub. -// It prefers the CLI --ref when provided; otherwise attempts to read the -// current commit hash from --source-folder using git metadata. When neither -// is available, returns an empty string. -func buildGitHubPermalink(options RunOptions, fileURI string, start, end int) string { - base := fmt.Sprintf("https://github.com/%s/%s", options.Namespace, options.Repository) - ref := strings.TrimSpace(options.Ref) - - if ref == "" && options.SourceFolder != "" { - if md, err := git.CollectRepositoryMetadata(options.SourceFolder); err == nil && md.CommitHash != nil && *md.CommitHash != "" { - ref = *md.CommitHash - } - } - - if ref == "" || fileURI == "" || fileURI == "" { - return "" - } - - path := filepath.ToSlash(fileURI) - anchor := "" - if start > 0 { - anchor = fmt.Sprintf("#L%d", start) - if end > start { - anchor = fmt.Sprintf("%s-L%d", anchor, end) - } - } - - return fmt.Sprintf("%s/blob/%s/%s%s", base, ref, path, anchor) -} + // Enrich to ensure Levels and Titles are present + report.EnrichResultsLevelProperty() + report.EnrichResultsTitleProperty() -// OpenIssueReport represents parsed metadata from an open issue body. -type OpenIssueReport struct { - Severity string - Scanner string - RuleID string - FilePath string - StartLine int - EndLine int - Hash string - Permalink string - Description string -} - -// OpenIssueEntry combines parsed metadata from an open issue body with the -// original IssueParams returned by the VCS plugin. The map returned by -// listOpenIssues uses the issue number as key and this struct as value. -type OpenIssueEntry struct { - OpenIssueReport - Params shared.IssueParams -} - -// parseIssueBody attempts to read the body produced by this command and extract -// known metadata from blockquote format lines. Only supports the new format: -// "> **Severity**: Error, **Scanner**: Semgrep OSS" -// "> **File**: app.py, **Lines**: 11-29" -// Returns an OpenIssueReport with zero values when fields are missing. -func parseIssueBody(body string) OpenIssueReport { - rep := OpenIssueReport{} - - // Extract rule ID from header format: "## 🐞 " - if rid := extractRuleIDFromBody(body); rid != "" { - rep.RuleID = rid - } - - for _, line := range strings.Split(body, "\n") { - line = strings.TrimSpace(line) - - // Only process blockquote metadata lines - if !strings.HasPrefix(line, "> ") { - // Check for GitHub permalink URLs - if rep.Permalink == "" && strings.HasPrefix(line, "https://github.com/") && strings.Contains(line, "/blob/") { - rep.Permalink = line - } - // Capture first non-metadata line as description if empty - if rep.Description == "" && line != "" && !strings.HasPrefix(line, "##") && !strings.HasPrefix(line, "") { - rep.Description = line - } - continue - } - - // Remove "> " prefix and normalize bold markers - content := strings.TrimSpace(strings.TrimPrefix(line, "> ")) - content = strings.ReplaceAll(content, "**", "") - - // Parse comma-separated metadata fields - parts := strings.Split(content, ",") - for _, part := range parts { - segment := strings.TrimSpace(part) - - if strings.HasPrefix(segment, "Severity:") { - rep.Severity = strings.TrimSpace(strings.TrimPrefix(segment, "Severity:")) - } else if strings.HasPrefix(segment, "Scanner:") { - rep.Scanner = strings.TrimSpace(strings.TrimPrefix(segment, "Scanner:")) - } else if strings.HasPrefix(segment, "File:") { - rep.FilePath = strings.TrimSpace(strings.TrimPrefix(segment, "File:")) - } else if strings.HasPrefix(segment, "Lines:") { - value := strings.TrimSpace(strings.TrimPrefix(segment, "Lines:")) - rep.StartLine, rep.EndLine = parseLineRange(value) - } else if strings.HasPrefix(segment, "Snippet SHA256:") { - rep.Hash = strings.TrimSpace(strings.TrimPrefix(segment, "Snippet SHA256:")) - } - } - } - return rep -} - -// extractRuleIDFromBody attempts to parse a rule ID from the new body format header line: -// "## " where can be any single or combined emoji/symbol token. -// Returns empty string if not found. -func extractRuleIDFromBody(body string) string { - // Compile regex once per call; trivial cost compared to network IO. If needed, lift to package scope. - re := regexp.MustCompile(`^##\s+[^\w\s]+\s+(.+)$`) - for _, line := range strings.Split(body, "\n") { - l := strings.TrimSpace(line) - if !strings.HasPrefix(l, "##") { - continue - } - if m := re.FindStringSubmatch(l); len(m) == 2 { - return strings.TrimSpace(m[1]) - } - } - return "" -} - -// listOpenIssues calls the VCS plugin to list open issues for the configured repo -// and parses their bodies into OpenIssueReport structures. -func listOpenIssues(options RunOptions) (map[int]OpenIssueEntry, error) { - req := shared.VCSListIssuesRequest{ - VCSRequestBase: shared.VCSRequestBase{ - RepoParam: shared.RepositoryParams{ - Namespace: options.Namespace, - Repository: options.Repository, - }, - Action: "listIssues", - }, - State: "open", - } - - var issues []shared.IssueParams - err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { - vcs, ok := raw.(shared.VCS) - if !ok { - return fmt.Errorf("invalid VCS plugin type") - } - list, err := vcs.ListIssues(req) - if err != nil { - return err - } - issues = list - return nil - }) + // 6. Get all open GitHub issues + openIssues, err := listOpenIssues(opts) if err != nil { - return nil, err + lg.Error("failed to list open issues", "error", err) + return errors.NewCommandError(opts, nil, fmt.Errorf("failed to list open issues: %w", err), 2) } + lg.Info("fetched open issues from repository", "count", len(openIssues)) - reports := make(map[int]OpenIssueEntry, len(issues)) - for _, it := range issues { - rep := parseIssueBody(it.Body) - reports[it.Number] = OpenIssueEntry{ - OpenIssueReport: rep, - Params: it, - } - } - return reports, nil -} - -// NewIssueData holds the data needed to create a new issue from SARIF results. -type NewIssueData struct { - Metadata issuecorrelation.IssueMetadata - Body string - Title string -} - -// buildNewIssuesFromSARIF processes SARIF report and extracts high severity findings, -// returning structured data for creating new issues. -func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, lg hclog.Logger) []NewIssueData { - var newIssueData []NewIssueData - - for _, run := range report.Runs { - // Build a map of rules keyed by rule ID for quick lookups - rulesByID := map[string]*sarif.ReportingDescriptor{} - if run.Tool.Driver != nil { - for _, r := range run.Tool.Driver.Rules { - if r == nil { - continue - } - id := strings.TrimSpace(r.ID) - if id == "" { - continue - } - rulesByID[id] = r - } - } - - for _, res := range run.Results { - level, _ := res.Properties["Level"].(string) - if strings.ToLower(level) != "error" { - continue - } - - ruleID := "" - if res.RuleID != nil { - ruleID = *res.RuleID - } - - // Warn about missing rule ID - if strings.TrimSpace(ruleID) == "" { - lg.Warn("SARIF result missing rule ID, skipping", "result_index", len(newIssueData)) - continue - } - - fileURI := filepath.ToSlash(extractFileURIFromResult(res, options.SourceFolder)) - if fileURI == "" { - fileURI = "" - lg.Warn("SARIF result missing file URI, using placeholder", "rule_id", ruleID) - } - line, endLine := extractRegionFromResult(res) - - // Warn about missing location information - if line <= 0 { - lg.Warn("SARIF result missing line information", "rule_id", ruleID, "file", fileURI) - } - - snippetHash := computeSnippetHash(fileURI, line, endLine, options.SourceFolder) - if snippetHash == "" && fileURI != "" && line > 0 { - lg.Warn("failed to compute snippet hash", "rule_id", ruleID, "file", fileURI, "line", line) - } - - scannerName := getScannerName(run) - if scannerName == "" { - lg.Warn("SARIF run missing scanner/tool name, using fallback", "rule_id", ruleID) - } - - sev := displaySeverity(level) - - // build body and title with scanner name label - titleText := buildIssueTitle(scannerName, sev, ruleID, fileURI, line, endLine) - - // New body header and compact metadata blockquote - header := "" - if strings.TrimSpace(ruleID) != "" { - header = fmt.Sprintf("## 🐞 %s\n\n", ruleID) - } - scannerDisp := scannerName - if scannerDisp == "" { - scannerDisp = "SARIF" - } - fileDisp := fileURI - linesDisp := fmt.Sprintf("%d", line) - if endLine > line { - linesDisp = fmt.Sprintf("%d-%d", line, endLine) - } - meta := fmt.Sprintf( - "> **Severity**: %s, **Scanner**: %s\n> **File**: %s, **Lines**: %s\n", - sev, scannerDisp, fileDisp, linesDisp, - ) - // Only use the new header and blockquote metadata - body := header + meta + "\n" - var references []string - - // Append rule help markdown if available - if r, ok := rulesByID[ruleID]; ok && r != nil && r.Help != nil && r.Help.Markdown != nil { - if hm := strings.TrimSpace(*r.Help.Markdown); hm != "" { - detail, helpRefs := parseRuleHelpMarkdown(hm) - if detail != "" { - body += "\n\n" + detail - } - if len(helpRefs) > 0 { - references = append(references, helpRefs...) - } - } - } - - // Append permalink if available - if link := buildGitHubPermalink(options, fileURI, line, endLine); link != "" { - body += fmt.Sprintf("\n%s\n", link) - } - - // Append security identifier tags (CWE, OWASP) with links if available in rule properties - if r, ok := rulesByID[ruleID]; ok && r != nil && r.Properties != nil { - var tags []string - if v, ok := r.Properties["tags"]; ok && v != nil { - switch tv := v.(type) { - case []string: - tags = tv - case []interface{}: - for _, it := range tv { - if s, ok := it.(string); ok { - tags = append(tags, s) - } - } - } - } - - if len(tags) > 0 { - if tagRefs := processSecurityTags(tags); len(tagRefs) > 0 { - references = append(references, tagRefs...) - } - } - } - - if len(references) > 0 { - body += "\n\nReferences:\n" + strings.Join(references, "\n") - } - - // Add a second snippet hash right before the scanio-managed note, as a blockquote - if snippetHash != "" { - body += fmt.Sprintf("\n\n> **Snippet SHA256**: %s\n", snippetHash) - } - body += "\n" + scanioManagedAnnotation - - newIssueData = append(newIssueData, NewIssueData{ - Metadata: issuecorrelation.IssueMetadata{ - IssueID: "", - Scanner: scannerName, - RuleID: ruleID, - Severity: level, - Filename: fileURI, - StartLine: line, - EndLine: endLine, - SnippetHash: snippetHash, - }, - Body: body, - Title: titleText, - }) - } - } - - return newIssueData -} - -// buildKnownIssuesFromOpen converts open GitHub issues into correlation metadata, -// filtering for well-structured scanio-managed issues only. -func buildKnownIssuesFromOpen(openIssues map[int]OpenIssueEntry, lg hclog.Logger) []issuecorrelation.IssueMetadata { - knownIssues := make([]issuecorrelation.IssueMetadata, 0, len(openIssues)) - for num, entry := range openIssues { - rep := entry.OpenIssueReport - // Only include well-structured issues for automatic closure. - // If an open issue doesn't include basic metadata we skip it so - // we don't accidentally close unrelated or free-form issues. - if rep.Scanner == "" || rep.RuleID == "" || rep.FilePath == "" { - lg.Debug("skipping malformed open issue (won't be auto-closed)", "number", num) - continue - } - - // Only consider issues that contain the scanio-managed annotation. - // If the annotation is absent, treat the issue as manually managed and - // exclude it from correlation/auto-closure logic. - if !strings.Contains(entry.Params.Body, scanioManagedAnnotation) { - lg.Debug("skipping non-scanio-managed issue (won't be auto-closed)", "number", num) - continue - } - knownIssues = append(knownIssues, issuecorrelation.IssueMetadata{ - IssueID: fmt.Sprintf("%d", num), - Scanner: rep.Scanner, - RuleID: rep.RuleID, - Severity: rep.Severity, - Filename: rep.FilePath, - StartLine: rep.StartLine, - EndLine: rep.EndLine, - SnippetHash: rep.Hash, - }) - } - return knownIssues -} - -// createUnmatchedIssues creates GitHub issues for new findings that don't correlate with existing issues. -// Returns the number of successfully created issues. -func createUnmatchedIssues(unmatchedNew []issuecorrelation.IssueMetadata, newIssues []issuecorrelation.IssueMetadata, newBodies, newTitles []string, options RunOptions, lg hclog.Logger) (int, error) { - created := 0 - for _, u := range unmatchedNew { - // find corresponding index in newIssues to retrieve body/title - var idx int = -1 - for ni, n := range newIssues { - if n == u { - idx = ni - break - } - } - if idx == -1 { - // shouldn't happen - continue - } - - req := shared.VCSIssueCreationRequest{ - VCSRequestBase: shared.VCSRequestBase{ - RepoParam: shared.RepositoryParams{ - Namespace: options.Namespace, - Repository: options.Repository, - }, - Action: "createIssue", - }, - Title: newTitles[idx], - Body: newBodies[idx], - Labels: opts.Labels, - Assignees: opts.Assignees, - } - - err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { - vcs, ok := raw.(shared.VCS) - if !ok { - return fmt.Errorf("invalid VCS plugin type") - } - _, err := vcs.CreateIssue(req) - return err - }) - if err != nil { - lg.Error("failed to create issue via plugin", "error", err, "file", u.Filename, "line", u.StartLine) - return created, errors.NewCommandError(options, nil, fmt.Errorf("create issue failed: %w", err), 2) - } - created++ - } - return created, nil -} - -// closeUnmatchedIssues closes GitHub issues for known findings that don't correlate with current scan results. -// Returns an error if any issue closure fails. -func closeUnmatchedIssues(unmatchedKnown []issuecorrelation.IssueMetadata, options RunOptions, lg hclog.Logger) error { - for _, k := range unmatchedKnown { - // known IssueID contains the number as string - num, err := strconv.Atoi(k.IssueID) - if err != nil { - // skip if we can't parse number - continue - } - // Leave a comment before closing the issue to explain why it is being closed - commentReq := shared.VCSCreateIssueCommentRequest{ - VCSRequestBase: shared.VCSRequestBase{ - RepoParam: shared.RepositoryParams{ - Namespace: options.Namespace, - Repository: options.Repository, - }, - Action: "createIssueComment", - }, - Number: num, - Body: "Recent scan didn't see the issue; closing this as resolved.", - } - - err = shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { - vcs, ok := raw.(shared.VCS) - if !ok { - return fmt.Errorf("invalid VCS plugin type") - } - _, err := vcs.CreateIssueComment(commentReq) - return err - }) - if err != nil { - lg.Error("failed to add comment before closing issue", "error", err, "number", num) - // continue to attempt closing even if commenting failed - } - - upd := shared.VCSIssueUpdateRequest{ - VCSRequestBase: shared.VCSRequestBase{ - RepoParam: shared.RepositoryParams{ - Namespace: options.Namespace, - Repository: options.Repository, - }, - Action: "updateIssue", - }, - Number: num, - State: "closed", - } - - err = shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { - vcs, ok := raw.(shared.VCS) - if !ok { - return fmt.Errorf("invalid VCS plugin type") - } - _, err := vcs.UpdateIssue(upd) - return err - }) - if err != nil { - lg.Error("failed to close issue via plugin", "error", err, "number", num) - // continue closing others but report an error at end - return errors.NewCommandError(options, nil, fmt.Errorf("close issue failed: %w", err), 2) - } - } - return nil -} - -// processSARIFReport iterates runs/results in the SARIF report and creates VCS issues for -// high severity findings. Returns number of created issues or an error. -func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hclog.Logger, openIssues map[int]OpenIssueEntry) (int, error) { - // Build list of new issues from SARIF using extracted function - newIssueData := buildNewIssuesFromSARIF(report, options, lg) - - // Extract metadata, bodies, and titles for correlation and issue creation - newIssues := make([]issuecorrelation.IssueMetadata, len(newIssueData)) - newBodies := make([]string, len(newIssueData)) - newTitles := make([]string, len(newIssueData)) - - for i, data := range newIssueData { - newIssues[i] = data.Metadata - newBodies[i] = data.Body - newTitles[i] = data.Title - } - - // Build list of known issues from the provided open issues data - knownIssues := buildKnownIssuesFromOpen(openIssues, lg) - - // correlate - corr := issuecorrelation.NewCorrelator(newIssues, knownIssues) - corr.Process() - - // Create only unmatched new issues - unmatchedNew := corr.UnmatchedNew() - created, err := createUnmatchedIssues(unmatchedNew, newIssues, newBodies, newTitles, options, lg) + // 7. Process SARIF report and create/close issues + created, err := processSARIFReport(report, opts, lg, openIssues) if err != nil { - return created, err + lg.Error("failed to process SARIF report", "error", err) + return err } - // Close unmatched known issues (open issues that did not correlate) - unmatchedKnown := corr.UnmatchedKnown() - if err := closeUnmatchedIssues(unmatchedKnown, options, lg); err != nil { - return created, err - } + // 8. Log success and handle output + lg.Info("issues created from SARIF high severity findings", "count", created) + fmt.Printf("Created %d issue(s) from SARIF high severity findings\n", created) - return created, nil -} - -// extractLocationInfo derives a file path (relative when appropriate), start line and end line -// from a SARIF result's first location. It mirrors the previous inline logic used in the -// command handler. Returns (fileURI, startLine, endLine). -// extractFileURIFromResult returns a file path derived from the SARIF result's first location. -// If the URI is absolute and a non-empty sourceFolder is provided, the returned path will be -// made relative to sourceFolder (matching previous behaviour). -func extractFileURIFromResult(res *sarif.Result, sourceFolder string) string { - if res == nil || len(res.Locations) == 0 { - return "" - } - loc := res.Locations[0] - if loc.PhysicalLocation == nil { - return "" - } - art := loc.PhysicalLocation.ArtifactLocation - if art == nil || art.URI == nil { - return "" - } - uri := *art.URI - // If URI is not absolute or there's no sourceFolder provided, return it unchanged. - if !filepath.IsAbs(uri) || sourceFolder == "" { - return uri - } - - // Normalize sourceFolder to an absolute, cleaned path so relative inputs like - // "../scanio-test" match absolute URIs such as "/home/jekos/.../scanio-test/...". - if absSource, err := filepath.Abs(sourceFolder); err == nil { - absSource = filepath.Clean(absSource) - - // Prefer filepath.Rel which will produce a relative path when uri is under absSource. - if rel, err := filepath.Rel(absSource, uri); err == nil { - // If rel does not escape to parent directories, it's a proper subpath. - if rel != "" && !strings.HasPrefix(rel, "..") { - return rel - } - } - - // Fallback: trim the absolute source prefix explicitly when possible. - prefix := absSource + string(filepath.Separator) - if strings.HasPrefix(uri, prefix) { - return strings.TrimPrefix(uri, prefix) - } - if strings.HasPrefix(uri, absSource) { - return strings.TrimPrefix(uri, absSource) - } - } - - // Last-resort: try trimming the raw sourceFolder string provided by the user. - rel := strings.TrimPrefix(uri, sourceFolder) - if strings.HasPrefix(rel, string(filepath.Separator)) { - return rel[1:] - } - return rel + return nil } -// extractRegionFromResult returns start and end line numbers (0 when not present) -// taken from the SARIF result's first location region. -func extractRegionFromResult(res *sarif.Result) (int, int) { - if res == nil || len(res.Locations) == 0 { - return 0, 0 - } - loc := res.Locations[0] - if loc.PhysicalLocation == nil || loc.PhysicalLocation.Region == nil { - return 0, 0 - } - start := 0 - end := 0 - if loc.PhysicalLocation.Region.StartLine != nil { - start = *loc.PhysicalLocation.Region.StartLine - } - if loc.PhysicalLocation.Region.EndLine != nil { - end = *loc.PhysicalLocation.Region.EndLine - } - return start, end +func init() { + SarifIssuesCmd.Flags().StringVar(&opts.Namespace, "namespace", "", "GitHub org/user (defaults to $GITHUB_REPOSITORY_OWNER when unset)") + SarifIssuesCmd.Flags().StringVar(&opts.Repository, "repository", "", "Repository name (defaults to ${GITHUB_REPOSITORY#*/} when unset)") + SarifIssuesCmd.Flags().StringVar(&opts.SarifPath, "sarif", "", "Path to SARIF file") + SarifIssuesCmd.Flags().StringVar(&opts.SourceFolder, "source-folder", "", "Optional: source folder to improve file path resolution in SARIF (used for absolute paths)") + SarifIssuesCmd.Flags().StringVar(&opts.Ref, "ref", "", "Git ref (branch or commit SHA) to build a permalink to the vulnerable code (defaults to $GITHUB_SHA when unset)") + // --labels supports multiple usages (e.g., --labels bug --labels security) or comma-separated values + SarifIssuesCmd.Flags().StringSliceVar(&opts.Labels, "labels", nil, "Optional: labels to assign to created GitHub issues (repeat flag or use comma-separated values)") + // --assignees supports multiple usages or comma-separated values + SarifIssuesCmd.Flags().StringSliceVar(&opts.Assignees, "assignees", nil, "Optional: assignees (GitHub logins) to assign to created issues (repeat flag or use comma-separated values)") + SarifIssuesCmd.Flags().BoolP("help", "h", false, "Show help for sarif-issues command.") } diff --git a/cmd/sarif-issues/utils.go b/cmd/sarif-issues/utils.go new file mode 100644 index 00000000..c732cde0 --- /dev/null +++ b/cmd/sarif-issues/utils.go @@ -0,0 +1,315 @@ +package sarifissues + +import ( + "crypto/sha256" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "golang.org/x/text/cases" + "golang.org/x/text/language" + + "github.com/owenrumney/go-sarif/v2/sarif" + "github.com/scan-io-git/scan-io/internal/git" +) + +// parseLineRange parses line range from strings like "123" or "123-456". +// Returns (start, end) where end equals start for single line numbers. +func parseLineRange(value string) (int, int) { + value = strings.TrimSpace(value) + if strings.Contains(value, "-") { + parts := strings.SplitN(value, "-", 2) + if len(parts) == 2 { + start, err1 := strconv.Atoi(strings.TrimSpace(parts[0])) + end, err2 := strconv.Atoi(strings.TrimSpace(parts[1])) + if err1 == nil && err2 == nil { + return start, end + } + } + } else { + if line, err := strconv.Atoi(value); err == nil { + return line, line + } + } + return 0, 0 +} + +// displaySeverity normalizes SARIF severity levels to more descriptive labels. +func displaySeverity(level string) string { + normalized := strings.ToLower(strings.TrimSpace(level)) + switch normalized { + case "error": + return "High" + case "warning": + return "Medium" + case "note": + return "Low" + case "none": + return "Info" + default: + if normalized == "" { + return "" + } + return cases.Title(language.Und).String(normalized) + } +} + +// generateOWASPSlug creates a URL-safe slug from OWASP title text. +// Converts spaces to underscores and removes non-alphanumeric characters except hyphens and underscores. +func generateOWASPSlug(title string) string { + slug := strings.ReplaceAll(strings.TrimSpace(title), " ", "_") + clean := make([]rune, 0, len(slug)) + for _, r := range slug { + if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' || r == '-' { + clean = append(clean, r) + } + } + return string(clean) +} + +// processSecurityTags converts security tags (CWE, OWASP) into reference links. +// Returns a slice of markdown reference links for recognized security identifiers. +func processSecurityTags(tags []string) []string { + var tagRefs []string + for _, tag := range tags { + t := strings.TrimSpace(tag) + if t == "" { + continue + } + + // Process CWE tags + if m := cweRegex.FindStringSubmatch(t); len(m) == 2 { + num := m[1] + url := fmt.Sprintf("https://cwe.mitre.org/data/definitions/%s.html", num) + tagRefs = append(tagRefs, fmt.Sprintf("- [%s](%s)", t, url)) + continue + } + + // Process OWASP tags + if m := owaspRegex.FindStringSubmatch(t); len(m) == 4 { + rank := m[1] + year := m[2] + title := m[3] + slug := generateOWASPSlug(title) + url := fmt.Sprintf("https://owasp.org/Top10/A%s_%s-%s/", rank, year, slug) + tagRefs = append(tagRefs, fmt.Sprintf("- [%s](%s)", t, url)) + continue + } + } + return tagRefs +} + +// buildIssueTitle creates a concise issue title using scanner name (fallback to SARIF), +// severity, ruleID and location info. It formats as "[][][] at" +// and includes a range when endLine > line. +func buildIssueTitle(scannerName, severity, ruleID, fileURI string, line, endLine int) string { + label := strings.TrimSpace(scannerName) + if label == "" { + label = "SARIF" + } + sev := strings.TrimSpace(severity) + parts := []string{label} + if sev != "" { + parts = append(parts, sev) + } + parts = append(parts, ruleID) + title := fmt.Sprintf("[%s]", strings.Join(parts, "][")) + if line > 0 { + if endLine > line { + return fmt.Sprintf("%s at %s:%d-%d", title, fileURI, line, endLine) + } + return fmt.Sprintf("%s at %s:%d", title, fileURI, line) + } + return fmt.Sprintf("%s at %s", title, fileURI) +} + +// computeSnippetHash reads the snippet (single line or range) from sourceFolder + fileURI +// and returns its SHA256 hex string. Returns empty string on any error or if inputs are invalid. +func computeSnippetHash(fileURI string, line, endLine int, sourceFolder string) string { + if fileURI == "" || fileURI == "" || line <= 0 || sourceFolder == "" { + return "" + } + absPath := filepath.Join(sourceFolder, filepath.FromSlash(fileURI)) + data, err := os.ReadFile(absPath) + if err != nil { + return "" + } + lines := strings.Split(string(data), "\n") + start := line + end := line + if endLine > line { + end = endLine + } + // Validate bounds (1-based line numbers) + if start < 1 || start > len(lines) { + return "" + } + if end > len(lines) { + end = len(lines) + } + if end < start { + return "" + } + snippet := strings.Join(lines[start-1:end], "\n") + sum := sha256.Sum256([]byte(snippet)) + return fmt.Sprintf("%x", sum[:]) +} + +// parseRuleHelpMarkdown removes promotional content from help markdown and splits +// it into the descriptive details and a list of reference bullet points. +func parseRuleHelpMarkdown(markdown string) (string, []string) { + cleaned := strings.ReplaceAll(markdown, semgrepPromoFooter, "") + cleaned = strings.TrimSpace(cleaned) + if cleaned == "" { + return "", nil + } + + lines := strings.Split(cleaned, "\n") + referencesStart := -1 + for idx, raw := range lines { + if strings.TrimSpace(raw) == "References:" { + referencesStart = idx + break + } + } + + if referencesStart == -1 { + return cleaned, nil + } + + detail := strings.TrimSpace(strings.Join(lines[:referencesStart], "\n")) + var references []string + for _, raw := range lines[referencesStart+1:] { + trimmed := strings.TrimSpace(raw) + if trimmed == "" { + continue + } + // Normalise to Markdown bullet points regardless of the original marker. + trimmed = strings.TrimLeft(trimmed, "-* \t") + if trimmed == "" { + continue + } + references = append(references, "- "+trimmed) + } + + return detail, references +} + +// getScannerName returns the tool/driver name for a SARIF run when available. +func getScannerName(run *sarif.Run) string { + if run == nil { + return "" + } + if run.Tool.Driver == nil { + return "" + } + if run.Tool.Driver.Name != "" { + return run.Tool.Driver.Name + } + return "" +} + +// buildGitHubPermalink builds a permalink to a file and region in GitHub. +// It prefers the CLI --ref when provided; otherwise attempts to read the +// current commit hash from --source-folder using git metadata. When neither +// is available, returns an empty string. +func buildGitHubPermalink(options RunOptions, fileURI string, start, end int) string { + base := fmt.Sprintf("https://github.com/%s/%s", options.Namespace, options.Repository) + ref := strings.TrimSpace(options.Ref) + + if ref == "" && options.SourceFolder != "" { + if md, err := git.CollectRepositoryMetadata(options.SourceFolder); err == nil && md.CommitHash != nil && *md.CommitHash != "" { + ref = *md.CommitHash + } + } + + if ref == "" || fileURI == "" || fileURI == "" { + return "" + } + + path := filepath.ToSlash(fileURI) + anchor := "" + if start > 0 { + anchor = fmt.Sprintf("#L%d", start) + if end > start { + anchor = fmt.Sprintf("%s-L%d", anchor, end) + } + } + + return fmt.Sprintf("%s/blob/%s/%s%s", base, ref, path, anchor) +} + +// extractFileURIFromResult returns a file path derived from the SARIF result's first location. +// If the URI is absolute and a non-empty sourceFolder is provided, the returned path will be +// made relative to sourceFolder (matching previous behaviour). +func extractFileURIFromResult(res *sarif.Result, sourceFolder string) string { + if res == nil || len(res.Locations) == 0 { + return "" + } + loc := res.Locations[0] + if loc.PhysicalLocation == nil { + return "" + } + art := loc.PhysicalLocation.ArtifactLocation + if art == nil || art.URI == nil { + return "" + } + uri := *art.URI + // If URI is not absolute or there's no sourceFolder provided, return it unchanged. + if !filepath.IsAbs(uri) || sourceFolder == "" { + return uri + } + + // Normalize sourceFolder to an absolute, cleaned path so relative inputs like + // "../scanio-test" match absolute URIs such as "/home/jekos/.../scanio-test/...". + if absSource, err := filepath.Abs(sourceFolder); err == nil { + absSource = filepath.Clean(absSource) + + // Prefer filepath.Rel which will produce a relative path when uri is under absSource. + if rel, err := filepath.Rel(absSource, uri); err == nil { + // If rel does not escape to parent directories, it's a proper subpath. + if rel != "" && !strings.HasPrefix(rel, "..") { + return rel + } + } + + // Fallback: trim the absolute source prefix explicitly when possible. + prefix := absSource + string(filepath.Separator) + if strings.HasPrefix(uri, prefix) { + return strings.TrimPrefix(uri, prefix) + } + if strings.HasPrefix(uri, absSource) { + return strings.TrimPrefix(uri, absSource) + } + } + + // Last-resort: try trimming the raw sourceFolder string provided by the user. + rel := strings.TrimPrefix(uri, sourceFolder) + if strings.HasPrefix(rel, string(filepath.Separator)) { + return rel[1:] + } + return rel +} + +// extractRegionFromResult returns start and end line numbers (0 when not present) +// taken from the SARIF result's first location region. +func extractRegionFromResult(res *sarif.Result) (int, int) { + if res == nil || len(res.Locations) == 0 { + return 0, 0 + } + loc := res.Locations[0] + if loc.PhysicalLocation == nil || loc.PhysicalLocation.Region == nil { + return 0, 0 + } + start := 0 + end := 0 + if loc.PhysicalLocation.Region.StartLine != nil { + start = *loc.PhysicalLocation.Region.StartLine + } + if loc.PhysicalLocation.Region.EndLine != nil { + end = *loc.PhysicalLocation.Region.EndLine + } + return start, end +} diff --git a/cmd/sarif-issues/validation.go b/cmd/sarif-issues/validation.go new file mode 100644 index 00000000..52c30faa --- /dev/null +++ b/cmd/sarif-issues/validation.go @@ -0,0 +1,20 @@ +package sarifissues + +import ( + "fmt" + "strings" +) + +// validate validates the RunOptions for the sarif-issues command. +func validate(o *RunOptions) error { + if o.Namespace == "" { + return fmt.Errorf("--namespace is required") + } + if o.Repository == "" { + return fmt.Errorf("--repository is required") + } + if strings.TrimSpace(o.SarifPath) == "" { + return fmt.Errorf("--sarif is required") + } + return nil +} From 8d163285be1be41befc26b0f852791209b7270d0 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Thu, 25 Sep 2025 23:18:35 +0200 Subject: [PATCH 24/52] refactor: remove redundant commands --- cmd/create-issue/create-issue.go | 105 -------------------------- cmd/list-issues/list-issues.go | 118 ----------------------------- cmd/root.go | 9 --- cmd/update-issue/update-issue.go | 124 ------------------------------- 4 files changed, 356 deletions(-) delete mode 100644 cmd/create-issue/create-issue.go delete mode 100644 cmd/list-issues/list-issues.go delete mode 100644 cmd/update-issue/update-issue.go diff --git a/cmd/create-issue/create-issue.go b/cmd/create-issue/create-issue.go deleted file mode 100644 index 6f195be8..00000000 --- a/cmd/create-issue/create-issue.go +++ /dev/null @@ -1,105 +0,0 @@ -package createissue - -import ( - "fmt" - - "github.com/spf13/cobra" - - "github.com/scan-io-git/scan-io/pkg/shared" - "github.com/scan-io-git/scan-io/pkg/shared/config" - "github.com/scan-io-git/scan-io/pkg/shared/errors" - "github.com/scan-io-git/scan-io/pkg/shared/logger" -) - -// RunOptions holds flags for the create-issue command. -type RunOptions struct { - Namespace string `json:"namespace,omitempty"` - Repository string `json:"repository,omitempty"` - Title string `json:"title,omitempty"` - Body string `json:"body,omitempty"` -} - -var ( - AppConfig *config.Config - opts RunOptions - - // CreateIssueCmd represents the command to create a GitHub issue. - CreateIssueCmd = &cobra.Command{ - Use: "create-issue --namespace NAMESPACE --repository REPO --title TITLE [--body BODY]", - Short: "Create a GitHub issue (minimal command)", - Example: "go run ./main.go create-issue --namespace scan-io-git --repository scanio-test --title 'My Title' --body 'My Body'", - Hidden: true, - SilenceUsage: true, - DisableFlagsInUseLine: true, - RunE: func(cmd *cobra.Command, args []string) error { - if len(args) == 0 && !shared.HasFlags(cmd.Flags()) { - return cmd.Help() - } - - if err := validate(&opts); err != nil { - return errors.NewCommandError(opts, nil, err, 1) - } - - lg := logger.NewLogger(AppConfig, "create-issue") - - // Build request for VCS plugin - req := shared.VCSIssueCreationRequest{ - VCSRequestBase: shared.VCSRequestBase{ - RepoParam: shared.RepositoryParams{ - Namespace: opts.Namespace, - Repository: opts.Repository, - }, - Action: "createIssue", - }, - Title: opts.Title, - Body: opts.Body, - } - - var createdIssueNumber int - err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { - vcs, ok := raw.(shared.VCS) - if !ok { - return fmt.Errorf("invalid VCS plugin type") - } - num, err := vcs.CreateIssue(req) - if err != nil { - return err - } - createdIssueNumber = num - return nil - }) - if err != nil { - lg.Error("failed to create issue via plugin", "error", err) - return errors.NewCommandError(opts, nil, fmt.Errorf("create issue failed: %w", err), 2) - } - - lg.Info("issue created", "number", createdIssueNumber) - fmt.Printf("Created issue #%d\n", createdIssueNumber) - return nil - }, - } -) - -// Init wires config into this command. -func Init(cfg *config.Config) { AppConfig = cfg } - -func init() { - CreateIssueCmd.Flags().StringVar(&opts.Namespace, "namespace", "", "GitHub org/user") - CreateIssueCmd.Flags().StringVar(&opts.Repository, "repository", "", "Repository name") - CreateIssueCmd.Flags().StringVar(&opts.Title, "title", "", "Issue title") - CreateIssueCmd.Flags().StringVar(&opts.Body, "body", "", "Issue body") - CreateIssueCmd.Flags().BoolP("help", "h", false, "Show help for create-issue command.") -} - -func validate(o *RunOptions) error { - if o.Namespace == "" { - return fmt.Errorf("--namespace is required") - } - if o.Repository == "" { - return fmt.Errorf("--repository is required") - } - if o.Title == "" { - return fmt.Errorf("--title is required") - } - return nil -} diff --git a/cmd/list-issues/list-issues.go b/cmd/list-issues/list-issues.go deleted file mode 100644 index 7d8907be..00000000 --- a/cmd/list-issues/list-issues.go +++ /dev/null @@ -1,118 +0,0 @@ -package listissues - -import ( - "fmt" - "sort" - "time" - - "github.com/spf13/cobra" - - "github.com/scan-io-git/scan-io/pkg/shared" - "github.com/scan-io-git/scan-io/pkg/shared/config" - "github.com/scan-io-git/scan-io/pkg/shared/errors" - "github.com/scan-io-git/scan-io/pkg/shared/logger" -) - -// RunOptions holds flags for the list-issues command. -type RunOptions struct { - Namespace string `json:"namespace,omitempty"` - Repository string `json:"repository,omitempty"` - State string `json:"state,omitempty"` // open|closed|all -} - -var ( - AppConfig *config.Config - opts RunOptions - - // ListIssuesCmd represents the command to list GitHub issues. - ListIssuesCmd = &cobra.Command{ - Use: "list-issues --namespace NAMESPACE --repository REPO [--state open|closed|all]", - Short: "List GitHub issues (minimal command)", - Hidden: true, - SilenceUsage: true, - DisableFlagsInUseLine: true, - RunE: func(cmd *cobra.Command, args []string) error { - if len(args) == 0 && !shared.HasFlags(cmd.Flags()) { - return cmd.Help() - } - - if err := validate(&opts); err != nil { - return errors.NewCommandError(opts, nil, err, 1) - } - - lg := logger.NewLogger(AppConfig, "list-issues") - - // Build request for VCS plugin - req := shared.VCSListIssuesRequest{ - VCSRequestBase: shared.VCSRequestBase{ - RepoParam: shared.RepositoryParams{ - Namespace: opts.Namespace, - Repository: opts.Repository, - }, - Action: "listIssues", - }, - State: opts.State, - } - - var issues []shared.IssueParams - err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { - vcs, ok := raw.(shared.VCS) - if !ok { - return fmt.Errorf("invalid VCS plugin type") - } - list, err := vcs.ListIssues(req) - if err != nil { - return err - } - issues = list - return nil - }) - if err != nil { - lg.Error("failed to list issues via plugin", "error", err) - return errors.NewCommandError(opts, nil, fmt.Errorf("list issues failed: %w", err), 2) - } - - // Sort by updated desc for nicer output - sort.Slice(issues, func(i, j int) bool { return issues[i].UpdatedDate > issues[j].UpdatedDate }) - - if len(issues) == 0 { - fmt.Println("No issues found") - return nil - } - - // Print concise table - fmt.Printf("# %-8s %-7s %-18s %s\n", "NUMBER", "STATE", "AUTHOR", "TITLE") - for _, it := range issues { - upd := time.Unix(it.UpdatedDate, 0).UTC().Format(time.RFC3339) - _ = upd // keep for future verbose mode - fmt.Printf("- %-8d %-7s %-18s %s\n", it.Number, it.State, it.Author.UserName, it.Title) - } - return nil - }, - } -) - -// Init wires config into this command. -func Init(cfg *config.Config) { AppConfig = cfg } - -func init() { - ListIssuesCmd.Flags().StringVar(&opts.Namespace, "namespace", "", "GitHub org/user") - ListIssuesCmd.Flags().StringVar(&opts.Repository, "repository", "", "Repository name") - ListIssuesCmd.Flags().StringVar(&opts.State, "state", "open", "Issue state filter: open|closed|all") - ListIssuesCmd.Flags().BoolP("help", "h", false, "Show help for list-issues command.") -} - -func validate(o *RunOptions) error { - if o.Namespace == "" { - return fmt.Errorf("--namespace is required") - } - if o.Repository == "" { - return fmt.Errorf("--repository is required") - } - switch o.State { - case "", "open", "closed", "all": - return nil - default: - return fmt.Errorf("--state must be one of: open, closed, all") - } -} diff --git a/cmd/root.go b/cmd/root.go index 432fe998..1dabca1e 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -8,13 +8,10 @@ import ( "github.com/spf13/cobra" "github.com/scan-io-git/scan-io/cmd/analyse" - createissue "github.com/scan-io-git/scan-io/cmd/create-issue" "github.com/scan-io-git/scan-io/cmd/fetch" integrationvcs "github.com/scan-io-git/scan-io/cmd/integration-vcs" "github.com/scan-io-git/scan-io/cmd/list" - listissues "github.com/scan-io-git/scan-io/cmd/list-issues" sarifissues "github.com/scan-io-git/scan-io/cmd/sarif-issues" - updateissue "github.com/scan-io-git/scan-io/cmd/update-issue" "github.com/scan-io-git/scan-io/cmd/version" "github.com/scan-io-git/scan-io/pkg/shared" "github.com/scan-io-git/scan-io/pkg/shared/config" @@ -74,9 +71,6 @@ func initConfig() { fetch.Init(AppConfig) analyse.Init(AppConfig) integrationvcs.Init(AppConfig) - createissue.Init(AppConfig) - listissues.Init(AppConfig) - updateissue.Init(AppConfig) sarifissues.Init(AppConfig) version.Init(AppConfig) } @@ -89,9 +83,6 @@ func init() { rootCmd.AddCommand(fetch.FetchCmd) rootCmd.AddCommand(analyse.AnalyseCmd) rootCmd.AddCommand(integrationvcs.IntegrationVCSCmd) - rootCmd.AddCommand(createissue.CreateIssueCmd) - rootCmd.AddCommand(listissues.ListIssuesCmd) - rootCmd.AddCommand(updateissue.UpdateIssueCmd) rootCmd.AddCommand(sarifissues.SarifIssuesCmd) rootCmd.AddCommand(version.NewVersionCmd()) // rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file") diff --git a/cmd/update-issue/update-issue.go b/cmd/update-issue/update-issue.go deleted file mode 100644 index ccaa981d..00000000 --- a/cmd/update-issue/update-issue.go +++ /dev/null @@ -1,124 +0,0 @@ -package updateissue - -import ( - "fmt" - "strings" - - "github.com/spf13/cobra" - - "github.com/scan-io-git/scan-io/pkg/shared" - "github.com/scan-io-git/scan-io/pkg/shared/config" - "github.com/scan-io-git/scan-io/pkg/shared/errors" - "github.com/scan-io-git/scan-io/pkg/shared/logger" -) - -// RunOptions holds flags for the update-issue command. -type RunOptions struct { - Namespace string `json:"namespace,omitempty"` - Repository string `json:"repository,omitempty"` - Number int `json:"number,omitempty"` - Title string `json:"title,omitempty"` - Body string `json:"body,omitempty"` - State string `json:"state,omitempty"` -} - -var ( - AppConfig *config.Config - opts RunOptions - - // UpdateIssueCmd represents the command to update a GitHub issue. - UpdateIssueCmd = &cobra.Command{ - Use: "update-issue --namespace NAMESPACE --repository REPO --number N [--title TITLE] [--body BODY] [--state STATE]", - Short: "Update a GitHub issue (title/body/state)", - Example: "scanio update-issue --namespace scan-io-git --repository scanio-test --number 4 --state closed", - SilenceUsage: true, - Hidden: true, - DisableFlagsInUseLine: true, - RunE: func(cmd *cobra.Command, args []string) error { - if len(args) == 0 && !shared.HasFlags(cmd.Flags()) { - return cmd.Help() - } - - if err := validate(&opts); err != nil { - return errors.NewCommandError(opts, nil, err, 1) - } - - lg := logger.NewLogger(AppConfig, "update-issue") - - // Build request for VCS plugin - req := shared.VCSIssueUpdateRequest{ - VCSRequestBase: shared.VCSRequestBase{ - RepoParam: shared.RepositoryParams{ - Namespace: opts.Namespace, - Repository: opts.Repository, - }, - Action: "updateIssue", - }, - Number: opts.Number, - Title: opts.Title, - Body: opts.Body, - State: opts.State, - } - - var success bool - err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { - vcs, ok := raw.(shared.VCS) - if !ok { - return fmt.Errorf("invalid VCS plugin type") - } - okResp, err := vcs.UpdateIssue(req) - if err != nil { - return err - } - success = okResp - return nil - }) - if err != nil { - lg.Error("failed to update issue via plugin", "error", err) - return errors.NewCommandError(opts, nil, fmt.Errorf("update issue failed: %w", err), 2) - } - - if success { - lg.Info("issue updated", "number", opts.Number) - fmt.Printf("Updated issue #%d\n", opts.Number) - } else { - lg.Warn("issue update returned false", "number", opts.Number) - fmt.Printf("Issue not updated (no-op?) #%d\n", opts.Number) - } - return nil - }, - } -) - -// Init wires config into this command. -func Init(cfg *config.Config) { AppConfig = cfg } - -func init() { - UpdateIssueCmd.Flags().StringVar(&opts.Namespace, "namespace", "", "GitHub org/user") - UpdateIssueCmd.Flags().StringVar(&opts.Repository, "repository", "", "Repository name") - UpdateIssueCmd.Flags().IntVar(&opts.Number, "number", 0, "Issue number") - UpdateIssueCmd.Flags().StringVar(&opts.Title, "title", "", "New issue title") - UpdateIssueCmd.Flags().StringVar(&opts.Body, "body", "", "New issue body") - UpdateIssueCmd.Flags().StringVar(&opts.State, "state", "", "New issue state: open or closed") - UpdateIssueCmd.Flags().BoolP("help", "h", false, "Show help for update-issue command.") -} - -func validate(o *RunOptions) error { - if o.Namespace == "" { - return fmt.Errorf("--namespace is required") - } - if o.Repository == "" { - return fmt.Errorf("--repository is required") - } - if o.Number <= 0 { - return fmt.Errorf("--number is required and must be > 0") - } - // at least one field to update must be provided - if strings.TrimSpace(o.Title) == "" && strings.TrimSpace(o.Body) == "" && strings.TrimSpace(o.State) == "" { - return fmt.Errorf("provide at least one of --title, --body, or --state") - } - if s := strings.ToLower(strings.TrimSpace(o.State)); s != "" && s != "open" && s != "closed" { - return fmt.Errorf("--state must be 'open' or 'closed' if provided") - } - return nil -} From 20d6532519340daf0a47ae5341299eb8fd9260d3 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Tue, 7 Oct 2025 08:03:37 +0200 Subject: [PATCH 25/52] tests: add sarif issues utils tets --- .cursorrules | 15 ++ cmd/sarif-issues/utils_test.go | 437 +++++++++++++++++++++++++++++++++ 2 files changed, 452 insertions(+) create mode 100644 .cursorrules create mode 100644 cmd/sarif-issues/utils_test.go diff --git a/.cursorrules b/.cursorrules new file mode 100644 index 00000000..da74d835 --- /dev/null +++ b/.cursorrules @@ -0,0 +1,15 @@ +# Scan-io Cursor Rules + +## Development workflow +- When implementing new features or making changes to the codebase, always check the `docs/engineering/` directory first for established patterns and guidelines: +- When in doubt about implementation details, always refer to the engineering documentation first, then examine similar existing implementations in the codebase. + +## Commands +- Build cli with: `make build-cli` +- Build plugins with: `make build-plugins` +- Build everything with: `make build` +- Test with: `make test` +- Use `go fmt` for formatting + +## Code style +- Use early returns when handling errors or special cases to reduce nesting and improve readability. diff --git a/cmd/sarif-issues/utils_test.go b/cmd/sarif-issues/utils_test.go new file mode 100644 index 00000000..7a52f1ea --- /dev/null +++ b/cmd/sarif-issues/utils_test.go @@ -0,0 +1,437 @@ +package sarifissues + +import ( + "crypto/sha256" + "fmt" + "os" + "path/filepath" + "testing" +) + +func TestDisplaySeverity(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + // Standard SARIF severity levels + { + name: "error level", + input: "error", + expected: "High", + }, + { + name: "warning level", + input: "warning", + expected: "Medium", + }, + { + name: "note level", + input: "note", + expected: "Low", + }, + { + name: "none level", + input: "none", + expected: "Info", + }, + // Case insensitive tests + { + name: "ERROR uppercase", + input: "ERROR", + expected: "High", + }, + { + name: "Warning mixed case", + input: "Warning", + expected: "Medium", + }, + { + name: "NOTE uppercase", + input: "NOTE", + expected: "Low", + }, + { + name: "NONE uppercase", + input: "NONE", + expected: "Info", + }, + // Whitespace handling + { + name: "error with leading space", + input: " error", + expected: "High", + }, + { + name: "warning with trailing space", + input: "warning ", + expected: "Medium", + }, + { + name: "note with surrounding spaces", + input: " note ", + expected: "Low", + }, + { + name: "none with tabs", + input: "\tnone\t", + expected: "Info", + }, + // Edge cases + { + name: "empty string", + input: "", + expected: "", + }, + { + name: "whitespace only", + input: " ", + expected: "", + }, + { + name: "tab only", + input: "\t", + expected: "", + }, + { + name: "newline only", + input: "\n", + expected: "", + }, + // Custom/unknown severity levels (should be title-cased) + { + name: "custom severity lowercase", + input: "critical", + expected: "Critical", + }, + { + name: "custom severity uppercase", + input: "FATAL", + expected: "Fatal", + }, + { + name: "custom severity mixed case", + input: "sEvErE", + expected: "Severe", + }, + { + name: "custom multi-word severity", + input: "very high", + expected: "Very High", + }, + { + name: "custom with numbers", + input: "level1", + expected: "Level1", + }, + { + name: "custom with special chars", + input: "high-priority", + expected: "High-Priority", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := displaySeverity(tt.input) + if result != tt.expected { + t.Errorf("displaySeverity(%q) = %q, want %q", tt.input, result, tt.expected) + } + }) + } +} + +func TestComputeSnippetHash(t *testing.T) { + // Create a temporary directory for test files + tempDir, err := os.MkdirTemp("", "snippet_hash_test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create test files with known content + testFileContent := `line 1 +line 2 +line 3 +line 4 +line 5` + + testFilePath := filepath.Join(tempDir, "test.txt") + err = os.WriteFile(testFilePath, []byte(testFileContent), 0644) + if err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + + // Create another test file with different content + singleLineContent := "single line content" + singleLineFilePath := filepath.Join(tempDir, "single.txt") + err = os.WriteFile(singleLineFilePath, []byte(singleLineContent), 0644) + if err != nil { + t.Fatalf("Failed to create single line test file: %v", err) + } + + // Create empty file + emptyFilePath := filepath.Join(tempDir, "empty.txt") + err = os.WriteFile(emptyFilePath, []byte(""), 0644) + if err != nil { + t.Fatalf("Failed to create empty test file: %v", err) + } + + // Helper function to compute expected hash + computeExpectedHash := func(content string) string { + sum := sha256.Sum256([]byte(content)) + return fmt.Sprintf("%x", sum[:]) + } + + tests := []struct { + name string + fileURI string + line int + endLine int + sourceFolder string + expected string + }{ + // Valid cases + { + name: "single line from middle", + fileURI: "test.txt", + line: 2, + endLine: 2, + sourceFolder: tempDir, + expected: computeExpectedHash("line 2"), + }, + { + name: "multiple lines range", + fileURI: "test.txt", + line: 2, + endLine: 4, + sourceFolder: tempDir, + expected: computeExpectedHash("line 2\nline 3\nline 4"), + }, + { + name: "first line only", + fileURI: "test.txt", + line: 1, + endLine: 1, + sourceFolder: tempDir, + expected: computeExpectedHash("line 1"), + }, + { + name: "last line only", + fileURI: "test.txt", + line: 5, + endLine: 5, + sourceFolder: tempDir, + expected: computeExpectedHash("line 5"), + }, + { + name: "entire file", + fileURI: "test.txt", + line: 1, + endLine: 5, + sourceFolder: tempDir, + expected: computeExpectedHash(testFileContent), + }, + { + name: "single line file", + fileURI: "single.txt", + line: 1, + endLine: 1, + sourceFolder: tempDir, + expected: computeExpectedHash(singleLineContent), + }, + { + name: "endLine same as line (no range)", + fileURI: "test.txt", + line: 3, + endLine: 3, + sourceFolder: tempDir, + expected: computeExpectedHash("line 3"), + }, + { + name: "endLine less than line (should use single line)", + fileURI: "test.txt", + line: 3, + endLine: 2, + sourceFolder: tempDir, + expected: computeExpectedHash("line 3"), + }, + + // Edge cases that should return empty string + { + name: "empty fileURI", + fileURI: "", + line: 1, + endLine: 1, + sourceFolder: tempDir, + expected: "", + }, + { + name: "unknown fileURI", + fileURI: "", + line: 1, + endLine: 1, + sourceFolder: tempDir, + expected: "", + }, + { + name: "zero line number", + fileURI: "test.txt", + line: 0, + endLine: 1, + sourceFolder: tempDir, + expected: "", + }, + { + name: "negative line number", + fileURI: "test.txt", + line: -1, + endLine: 1, + sourceFolder: tempDir, + expected: "", + }, + { + name: "empty sourceFolder", + fileURI: "test.txt", + line: 1, + endLine: 1, + sourceFolder: "", + expected: "", + }, + { + name: "line number beyond file length", + fileURI: "test.txt", + line: 10, + endLine: 10, + sourceFolder: tempDir, + expected: "", + }, + { + name: "file does not exist", + fileURI: "nonexistent.txt", + line: 1, + endLine: 1, + sourceFolder: tempDir, + expected: "", + }, + { + name: "invalid sourceFolder", + fileURI: "test.txt", + line: 1, + endLine: 1, + sourceFolder: "/nonexistent/path", + expected: "", + }, + + // Boundary cases + { + name: "endLine beyond file length (should clamp)", + fileURI: "test.txt", + line: 4, + endLine: 10, + sourceFolder: tempDir, + expected: computeExpectedHash("line 4\nline 5"), + }, + { + name: "empty file", + fileURI: "empty.txt", + line: 1, + endLine: 1, + sourceFolder: tempDir, + expected: computeExpectedHash(""), // Empty file has one empty line + }, + + // Path handling + { + name: "fileURI with forward slashes", + fileURI: "test.txt", + line: 1, + endLine: 1, + sourceFolder: tempDir, + expected: computeExpectedHash("line 1"), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := computeSnippetHash(tt.fileURI, tt.line, tt.endLine, tt.sourceFolder) + if result != tt.expected { + t.Errorf("computeSnippetHash(%q, %d, %d, %q) = %q, want %q", + tt.fileURI, tt.line, tt.endLine, tt.sourceFolder, result, tt.expected) + } + }) + } +} + +// TestComputeSnippetHash_DifferentContentDifferentHash tests that different +// content produces different hashes +func TestComputeSnippetHash_DifferentContentDifferentHash(t *testing.T) { + // Create a temporary directory for test files + tempDir, err := os.MkdirTemp("", "snippet_hash_different_test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create two files with different content + file1Path := filepath.Join(tempDir, "file1.txt") + file2Path := filepath.Join(tempDir, "file2.txt") + + err = os.WriteFile(file1Path, []byte("content A"), 0644) + if err != nil { + t.Fatalf("Failed to create file1: %v", err) + } + + err = os.WriteFile(file2Path, []byte("content B"), 0644) + if err != nil { + t.Fatalf("Failed to create file2: %v", err) + } + + hash1 := computeSnippetHash("file1.txt", 1, 1, tempDir) + hash2 := computeSnippetHash("file2.txt", 1, 1, tempDir) + + if hash1 == hash2 { + t.Errorf("Different content produced same hash: %q", hash1) + } + + if hash1 == "" || hash2 == "" { + t.Error("One or both hashes were empty") + } +} + +// TestComputeSnippetHash_SameContentSameHash tests that identical content +// produces identical hashes regardless of file name +func TestComputeSnippetHash_SameContentSameHash(t *testing.T) { + // Create a temporary directory for test files + tempDir, err := os.MkdirTemp("", "snippet_hash_same_test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create two files with identical content but different names + content := "identical content\nline 2\nline 3" + file1Path := filepath.Join(tempDir, "identical1.txt") + file2Path := filepath.Join(tempDir, "identical2.txt") + + err = os.WriteFile(file1Path, []byte(content), 0644) + if err != nil { + t.Fatalf("Failed to create file1: %v", err) + } + + err = os.WriteFile(file2Path, []byte(content), 0644) + if err != nil { + t.Fatalf("Failed to create file2: %v", err) + } + + hash1 := computeSnippetHash("identical1.txt", 1, 2, tempDir) + hash2 := computeSnippetHash("identical2.txt", 1, 2, tempDir) + + if hash1 != hash2 { + t.Errorf("Identical content produced different hashes: %q vs %q", hash1, hash2) + } + + if hash1 == "" { + t.Error("Hash was empty for valid content") + } +} From f1f8d12528f905cea8ff50ebc09548f213361f1f Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Wed, 8 Oct 2025 20:38:59 +0200 Subject: [PATCH 26/52] fix: better metadata parsing to support more edge cases for sarif-issues command --- .cursorrules | 7 + .gitignore | 2 + cmd/sarif-issues/issue_processing.go | 16 +- cmd/sarif-issues/sarif-issues.go | 49 +- cmd/sarif-issues/utils.go | 196 ++++- cmd/sarif-issues/utils_test.go | 832 +++++++++++++++--- cmd/to-html.go | 21 +- .../engineering/sarif-issues-path-analysis.md | 98 +++ internal/git/metadata.go | 53 +- 9 files changed, 1037 insertions(+), 237 deletions(-) create mode 100644 docs/engineering/sarif-issues-path-analysis.md diff --git a/.cursorrules b/.cursorrules index da74d835..cfcd570d 100644 --- a/.cursorrules +++ b/.cursorrules @@ -3,6 +3,7 @@ ## Development workflow - When implementing new features or making changes to the codebase, always check the `docs/engineering/` directory first for established patterns and guidelines: - When in doubt about implementation details, always refer to the engineering documentation first, then examine similar existing implementations in the codebase. +- Try to reuse internal packages if relevant. Extend if required functionality does not exist. ## Commands - Build cli with: `make build-cli` @@ -13,3 +14,9 @@ ## Code style - Use early returns when handling errors or special cases to reduce nesting and improve readability. + +## Planning +- When generating a plan for new features, refactoring and so on, make an analysis of codebase, then write a plan in phases. +- The plan may contain one or more phases. Each phase contains tasks. Write inputs and deliverables for each phase, task or group of tasks. +- Ensure that new functionality has tasks related to having tests for that functionality. +- Default plan file is `PLAN.md` in the root. diff --git a/.gitignore b/.gitignore index cefedaf9..8eff9db3 100644 --- a/.gitignore +++ b/.gitignore @@ -67,3 +67,5 @@ __debug_bin* *.json *report.html *results.html + +data/ \ No newline at end of file diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go index 9fa8a1f2..167b60ea 100644 --- a/cmd/sarif-issues/issue_processing.go +++ b/cmd/sarif-issues/issue_processing.go @@ -9,6 +9,7 @@ import ( hclog "github.com/hashicorp/go-hclog" "github.com/owenrumney/go-sarif/v2/sarif" + "github.com/scan-io-git/scan-io/internal/git" internalsarif "github.com/scan-io-git/scan-io/internal/sarif" issuecorrelation "github.com/scan-io-git/scan-io/pkg/issuecorrelation" "github.com/scan-io-git/scan-io/pkg/shared" @@ -166,7 +167,7 @@ func listOpenIssues(options RunOptions) (map[int]OpenIssueEntry, error) { // buildNewIssuesFromSARIF processes SARIF report and extracts high severity findings, // returning structured data for creating new issues. -func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, lg hclog.Logger) []NewIssueData { +func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, sourceFolderAbs string, repoMetadata *git.RepositoryMetadata, lg hclog.Logger) []NewIssueData { var newIssueData []NewIssueData for _, run := range report.Runs { @@ -202,7 +203,8 @@ func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, l continue } - fileURI := filepath.ToSlash(extractFileURIFromResult(res, options.SourceFolder)) + fileURI, localPath := extractFileURIFromResult(res, sourceFolderAbs, repoMetadata) + fileURI = filepath.ToSlash(strings.TrimSpace(fileURI)) if fileURI == "" { fileURI = "" lg.Warn("SARIF result missing file URI, using placeholder", "rule_id", ruleID) @@ -214,9 +216,9 @@ func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, l lg.Warn("SARIF result missing line information", "rule_id", ruleID, "file", fileURI) } - snippetHash := computeSnippetHash(fileURI, line, endLine, options.SourceFolder) + snippetHash := computeSnippetHash(localPath, line, endLine) if snippetHash == "" && fileURI != "" && line > 0 { - lg.Warn("failed to compute snippet hash", "rule_id", ruleID, "file", fileURI, "line", line) + lg.Warn("failed to compute snippet hash", "rule_id", ruleID, "file", fileURI, "line", line, "local_path", localPath) } scannerName := getScannerName(run) @@ -265,7 +267,7 @@ func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, l } // Append permalink if available - if link := buildGitHubPermalink(options, fileURI, line, endLine); link != "" { + if link := buildGitHubPermalink(options, repoMetadata, fileURI, line, endLine); link != "" { body += fmt.Sprintf("\n%s\n", link) } @@ -473,9 +475,9 @@ func closeUnmatchedIssues(unmatchedKnown []issuecorrelation.IssueMetadata, optio // processSARIFReport iterates runs/results in the SARIF report and creates VCS issues for // high severity findings. Returns number of created issues or an error. -func processSARIFReport(report *internalsarif.Report, options RunOptions, lg hclog.Logger, openIssues map[int]OpenIssueEntry) (int, error) { +func processSARIFReport(report *internalsarif.Report, options RunOptions, sourceFolderAbs string, repoMetadata *git.RepositoryMetadata, lg hclog.Logger, openIssues map[int]OpenIssueEntry) (int, error) { // Build list of new issues from SARIF using extracted function - newIssueData := buildNewIssuesFromSARIF(report, options, lg) + newIssueData := buildNewIssuesFromSARIF(report, options, sourceFolderAbs, repoMetadata, lg) // Extract metadata, bodies, and titles for correlation and issue creation newIssues := make([]issuecorrelation.IssueMetadata, len(newIssueData)) diff --git a/cmd/sarif-issues/sarif-issues.go b/cmd/sarif-issues/sarif-issues.go index a0540d8e..74f8ced6 100644 --- a/cmd/sarif-issues/sarif-issues.go +++ b/cmd/sarif-issues/sarif-issues.go @@ -2,11 +2,12 @@ package sarifissues import ( "fmt" - "os" "strings" + "github.com/hashicorp/go-hclog" "github.com/spf13/cobra" + "github.com/scan-io-git/scan-io/internal/git" internalsarif "github.com/scan-io-git/scan-io/internal/sarif" "github.com/scan-io-git/scan-io/pkg/shared" "github.com/scan-io-git/scan-io/pkg/shared/config" @@ -81,31 +82,7 @@ func runSarifIssues(cmd *cobra.Command, args []string) error { lg := logger.NewLogger(AppConfig, "sarif-issues") // 3. Handle environment variable fallbacks - // Fallback: if --namespace not provided, try $GITHUB_REPOSITORY_OWNER - if strings.TrimSpace(opts.Namespace) == "" { - if ns := strings.TrimSpace(os.Getenv("GITHUB_REPOSITORY_OWNER")); ns != "" { - opts.Namespace = ns - } - } - - // Fallback: if --repository not provided, try ${GITHUB_REPOSITORY#*/} - if strings.TrimSpace(opts.Repository) == "" { - if gr := strings.TrimSpace(os.Getenv("GITHUB_REPOSITORY")); gr != "" { - if idx := strings.Index(gr, "/"); idx >= 0 && idx < len(gr)-1 { - opts.Repository = gr[idx+1:] - } else { - // No slash present; fall back to the whole value - opts.Repository = gr - } - } - } - - // Fallback: if --ref not provided, try $GITHUB_SHA - if strings.TrimSpace(opts.Ref) == "" { - if sha := strings.TrimSpace(os.Getenv("GITHUB_SHA")); sha != "" { - opts.Ref = sha - } - } + ApplyEnvironmentFallbacks(&opts) // 4. Validate arguments if err := validate(&opts); err != nil { @@ -120,6 +97,12 @@ func runSarifIssues(cmd *cobra.Command, args []string) error { return errors.NewCommandError(opts, nil, fmt.Errorf("failed to read SARIF report: %w", err), 2) } + // Resolve source folder to absolute form for path calculations + sourceFolderAbs := ResolveSourceFolder(opts.SourceFolder, lg) + + // Collect repository metadata to understand repo root vs. subfolder layout + repoMetadata := resolveRepositoryMetadata(sourceFolderAbs, lg) + // Enrich to ensure Levels and Titles are present report.EnrichResultsLevelProperty() report.EnrichResultsTitleProperty() @@ -133,7 +116,7 @@ func runSarifIssues(cmd *cobra.Command, args []string) error { lg.Info("fetched open issues from repository", "count", len(openIssues)) // 7. Process SARIF report and create/close issues - created, err := processSARIFReport(report, opts, lg, openIssues) + created, err := processSARIFReport(report, opts, sourceFolderAbs, repoMetadata, lg, openIssues) if err != nil { lg.Error("failed to process SARIF report", "error", err) return err @@ -158,3 +141,15 @@ func init() { SarifIssuesCmd.Flags().StringSliceVar(&opts.Assignees, "assignees", nil, "Optional: assignees (GitHub logins) to assign to created issues (repeat flag or use comma-separated values)") SarifIssuesCmd.Flags().BoolP("help", "h", false, "Show help for sarif-issues command.") } + +func resolveRepositoryMetadata(sourceFolderAbs string, lg hclog.Logger) *git.RepositoryMetadata { + if strings.TrimSpace(sourceFolderAbs) == "" { + return nil + } + + md, err := git.CollectRepositoryMetadata(sourceFolderAbs) + if err != nil { + lg.Debug("unable to collect repository metadata", "error", err) + } + return md +} diff --git a/cmd/sarif-issues/utils.go b/cmd/sarif-issues/utils.go index c732cde0..8f2abbfe 100644 --- a/cmd/sarif-issues/utils.go +++ b/cmd/sarif-issues/utils.go @@ -11,8 +11,10 @@ import ( "golang.org/x/text/cases" "golang.org/x/text/language" + "github.com/hashicorp/go-hclog" "github.com/owenrumney/go-sarif/v2/sarif" "github.com/scan-io-git/scan-io/internal/git" + "github.com/scan-io-git/scan-io/pkg/shared/files" ) // parseLineRange parses line range from strings like "123" or "123-456". @@ -125,14 +127,13 @@ func buildIssueTitle(scannerName, severity, ruleID, fileURI string, line, endLin return fmt.Sprintf("%s at %s", title, fileURI) } -// computeSnippetHash reads the snippet (single line or range) from sourceFolder + fileURI +// computeSnippetHash reads the snippet (single line or range) from a local filesystem path // and returns its SHA256 hex string. Returns empty string on any error or if inputs are invalid. -func computeSnippetHash(fileURI string, line, endLine int, sourceFolder string) string { - if fileURI == "" || fileURI == "" || line <= 0 || sourceFolder == "" { +func computeSnippetHash(localPath string, line, endLine int) string { + if strings.TrimSpace(localPath) == "" || line <= 0 { return "" } - absPath := filepath.Join(sourceFolder, filepath.FromSlash(fileURI)) - data, err := os.ReadFile(absPath) + data, err := os.ReadFile(localPath) if err != nil { return "" } @@ -213,15 +214,20 @@ func getScannerName(run *sarif.Run) string { // buildGitHubPermalink builds a permalink to a file and region in GitHub. // It prefers the CLI --ref when provided; otherwise attempts to read the -// current commit hash from --source-folder using git metadata. When neither -// is available, returns an empty string. -func buildGitHubPermalink(options RunOptions, fileURI string, start, end int) string { +// current commit hash from repo metadata (falling back to collecting it +// directly when metadata is not provided). Returns empty string when any +// critical component is missing. +func buildGitHubPermalink(options RunOptions, repoMetadata *git.RepositoryMetadata, fileURI string, start, end int) string { base := fmt.Sprintf("https://github.com/%s/%s", options.Namespace, options.Repository) ref := strings.TrimSpace(options.Ref) - if ref == "" && options.SourceFolder != "" { - if md, err := git.CollectRepositoryMetadata(options.SourceFolder); err == nil && md.CommitHash != nil && *md.CommitHash != "" { - ref = *md.CommitHash + if ref == "" { + if repoMetadata != nil && repoMetadata.CommitHash != nil && *repoMetadata.CommitHash != "" { + ref = *repoMetadata.CommitHash + } else if options.SourceFolder != "" { + if md, err := git.CollectRepositoryMetadata(options.SourceFolder); err == nil && md.CommitHash != nil && *md.CommitHash != "" { + ref = *md.CommitHash + } } } @@ -241,56 +247,112 @@ func buildGitHubPermalink(options RunOptions, fileURI string, start, end int) st return fmt.Sprintf("%s/blob/%s/%s%s", base, ref, path, anchor) } -// extractFileURIFromResult returns a file path derived from the SARIF result's first location. -// If the URI is absolute and a non-empty sourceFolder is provided, the returned path will be -// made relative to sourceFolder (matching previous behaviour). -func extractFileURIFromResult(res *sarif.Result, sourceFolder string) string { +// extractFileURIFromResult derives both the repository-relative path and local filesystem path +// for the first location in a SARIF result. When repository metadata is available the repo-relative +// path is anchored at the repository root; otherwise the function falls back to trimming the +// provided source folder (preserving legacy behaviour). +func extractFileURIFromResult(res *sarif.Result, absSourceFolder string, repoMetadata *git.RepositoryMetadata) (string, string) { if res == nil || len(res.Locations) == 0 { - return "" + return "", "" } loc := res.Locations[0] if loc.PhysicalLocation == nil { - return "" + return "", "" } art := loc.PhysicalLocation.ArtifactLocation if art == nil || art.URI == nil { - return "" + return "", "" } - uri := *art.URI - // If URI is not absolute or there's no sourceFolder provided, return it unchanged. - if !filepath.IsAbs(uri) || sourceFolder == "" { - return uri + rawURI := strings.TrimSpace(*art.URI) + if rawURI == "" { + return "", "" } - // Normalize sourceFolder to an absolute, cleaned path so relative inputs like - // "../scanio-test" match absolute URIs such as "/home/jekos/.../scanio-test/...". - if absSource, err := filepath.Abs(sourceFolder); err == nil { - absSource = filepath.Clean(absSource) + repoPath := "" + localPath := "" + subfolder := normalisedSubfolder(repoMetadata) + + if filepath.IsAbs(rawURI) { + localPath = filepath.Clean(rawURI) + repoPath = filepath.ToSlash(rawURI) + if repoMetadata != nil && repoMetadata.RepoRootFolder != "" { + repoPath = filepath.ToSlash(trimPathPrefix(localPath, repoMetadata.RepoRootFolder)) + } else if absSourceFolder != "" { + repoPath = filepath.ToSlash(trimPathPrefix(localPath, absSourceFolder)) + } + } else { + normalised := strings.TrimLeft(rawURI, "./") + repoPath = filepath.ToSlash(normalised) + + if subfolder != "" && !strings.HasPrefix(repoPath, subfolder+"/") && repoPath != subfolder { + repoPath = filepath.ToSlash(filepath.Join(subfolder, repoPath)) + } - // Prefer filepath.Rel which will produce a relative path when uri is under absSource. - if rel, err := filepath.Rel(absSource, uri); err == nil { - // If rel does not escape to parent directories, it's a proper subpath. - if rel != "" && !strings.HasPrefix(rel, "..") { - return rel + if repoMetadata != nil && repoMetadata.RepoRootFolder != "" { + candidate := filepath.Join(repoMetadata.RepoRootFolder, filepath.FromSlash(repoPath)) + if _, err := os.Stat(candidate); err == nil { + localPath = candidate } } - // Fallback: trim the absolute source prefix explicitly when possible. - prefix := absSource + string(filepath.Separator) - if strings.HasPrefix(uri, prefix) { - return strings.TrimPrefix(uri, prefix) + if localPath == "" && absSourceFolder != "" { + candidate := filepath.Join(absSourceFolder, filepath.FromSlash(normalised)) + if _, err := os.Stat(candidate); err == nil { + localPath = candidate + } } - if strings.HasPrefix(uri, absSource) { - return strings.TrimPrefix(uri, absSource) + + if localPath == "" && repoMetadata != nil && repoMetadata.RepoRootFolder != "" && subfolder != "" { + candidate := filepath.Join(repoMetadata.RepoRootFolder, filepath.FromSlash(subfolder), filepath.FromSlash(normalised)) + if _, err := os.Stat(candidate); err == nil { + localPath = candidate + } } + + if localPath == "" && absSourceFolder != "" { + localPath = filepath.Join(absSourceFolder, filepath.FromSlash(normalised)) + } + } + + repoPath = strings.TrimLeft(repoPath, "/") + repoPath = filepath.ToSlash(repoPath) + return repoPath, localPath +} + +func trimPathPrefix(path, prefix string) string { + if prefix == "" { + return path + } + + cleanPath := filepath.Clean(path) + cleanPrefix := filepath.Clean(prefix) + + if rel, err := filepath.Rel(cleanPrefix, cleanPath); err == nil && rel != "" && !strings.HasPrefix(rel, "..") { + return rel + } + + prefixWithSep := cleanPrefix + string(filepath.Separator) + if strings.HasPrefix(cleanPath, prefixWithSep) { + return strings.TrimPrefix(cleanPath, prefixWithSep) } - // Last-resort: try trimming the raw sourceFolder string provided by the user. - rel := strings.TrimPrefix(uri, sourceFolder) - if strings.HasPrefix(rel, string(filepath.Separator)) { - return rel[1:] + if strings.HasPrefix(cleanPath, cleanPrefix) { + return strings.TrimPrefix(cleanPath, cleanPrefix) } - return rel + + trimmed := strings.TrimPrefix(cleanPath, prefix) + if strings.HasPrefix(trimmed, string(filepath.Separator)) { + return trimmed[1:] + } + return trimmed +} + +func normalisedSubfolder(md *git.RepositoryMetadata) string { + if md == nil { + return "" + } + sub := strings.Trim(md.Subfolder, "/\\") + return filepath.ToSlash(sub) } // extractRegionFromResult returns start and end line numbers (0 when not present) @@ -313,3 +375,53 @@ func extractRegionFromResult(res *sarif.Result) (int, int) { } return start, end } + +// ResolveSourceFolder resolves a source folder path to its absolute form for path calculations. +// It handles path expansion (e.g., ~) and absolute path resolution with graceful fallbacks. +// Returns an empty string if the input folder is empty or whitespace-only. +func ResolveSourceFolder(folder string, logger hclog.Logger) string { + if folder := strings.TrimSpace(folder); folder != "" { + expandedFolder, expandErr := files.ExpandPath(folder) + if expandErr != nil { + logger.Debug("failed to expand source folder; using raw value", "error", expandErr) + expandedFolder = folder + } + if absFolder, absErr := filepath.Abs(expandedFolder); absErr != nil { + logger.Debug("failed to resolve absolute source folder; using expanded value", "error", absErr) + return expandedFolder + } else { + return filepath.Clean(absFolder) + } + } + return "" +} + +// ApplyEnvironmentFallbacks applies environment variable fallbacks to the run options. +// It sets namespace, repository, and ref from GitHub environment variables if not already provided. +func ApplyEnvironmentFallbacks(opts *RunOptions) { + // Fallback: if --namespace not provided, try $GITHUB_REPOSITORY_OWNER + if strings.TrimSpace(opts.Namespace) == "" { + if ns := strings.TrimSpace(os.Getenv("GITHUB_REPOSITORY_OWNER")); ns != "" { + opts.Namespace = ns + } + } + + // Fallback: if --repository not provided, try ${GITHUB_REPOSITORY#*/} + if strings.TrimSpace(opts.Repository) == "" { + if gr := strings.TrimSpace(os.Getenv("GITHUB_REPOSITORY")); gr != "" { + if idx := strings.Index(gr, "/"); idx >= 0 && idx < len(gr)-1 { + opts.Repository = gr[idx+1:] + } else { + // No slash present; fall back to the whole value + opts.Repository = gr + } + } + } + + // Fallback: if --ref not provided, try $GITHUB_SHA + if strings.TrimSpace(opts.Ref) == "" { + if sha := strings.TrimSpace(os.Getenv("GITHUB_SHA")); sha != "" { + opts.Ref = sha + } + } +} diff --git a/cmd/sarif-issues/utils_test.go b/cmd/sarif-issues/utils_test.go index 7a52f1ea..ab60e1c1 100644 --- a/cmd/sarif-issues/utils_test.go +++ b/cmd/sarif-issues/utils_test.go @@ -6,6 +6,10 @@ import ( "os" "path/filepath" "testing" + + "github.com/hashicorp/go-hclog" + "github.com/owenrumney/go-sarif/v2/sarif" + "github.com/scan-io-git/scan-io/internal/git" ) func TestDisplaySeverity(t *testing.T) { @@ -184,180 +188,130 @@ line 5` } tests := []struct { - name string - fileURI string - line int - endLine int - sourceFolder string - expected string + name string + localPath string + line int + endLine int + expected string }{ // Valid cases { - name: "single line from middle", - fileURI: "test.txt", - line: 2, - endLine: 2, - sourceFolder: tempDir, - expected: computeExpectedHash("line 2"), + name: "single line from middle", + localPath: testFilePath, + line: 2, + endLine: 2, + expected: computeExpectedHash("line 2"), }, { - name: "multiple lines range", - fileURI: "test.txt", - line: 2, - endLine: 4, - sourceFolder: tempDir, - expected: computeExpectedHash("line 2\nline 3\nline 4"), + name: "multiple lines range", + localPath: testFilePath, + line: 2, + endLine: 4, + expected: computeExpectedHash("line 2\nline 3\nline 4"), }, { - name: "first line only", - fileURI: "test.txt", - line: 1, - endLine: 1, - sourceFolder: tempDir, - expected: computeExpectedHash("line 1"), + name: "first line only", + localPath: testFilePath, + line: 1, + endLine: 1, + expected: computeExpectedHash("line 1"), }, { - name: "last line only", - fileURI: "test.txt", - line: 5, - endLine: 5, - sourceFolder: tempDir, - expected: computeExpectedHash("line 5"), + name: "last line only", + localPath: testFilePath, + line: 5, + endLine: 5, + expected: computeExpectedHash("line 5"), }, { - name: "entire file", - fileURI: "test.txt", - line: 1, - endLine: 5, - sourceFolder: tempDir, - expected: computeExpectedHash(testFileContent), + name: "entire file", + localPath: testFilePath, + line: 1, + endLine: 5, + expected: computeExpectedHash(testFileContent), }, { - name: "single line file", - fileURI: "single.txt", - line: 1, - endLine: 1, - sourceFolder: tempDir, - expected: computeExpectedHash(singleLineContent), + name: "single line file", + localPath: singleLineFilePath, + line: 1, + endLine: 1, + expected: computeExpectedHash(singleLineContent), }, { - name: "endLine same as line (no range)", - fileURI: "test.txt", - line: 3, - endLine: 3, - sourceFolder: tempDir, - expected: computeExpectedHash("line 3"), + name: "endLine same as line (no range)", + localPath: testFilePath, + line: 3, + endLine: 3, + expected: computeExpectedHash("line 3"), }, { - name: "endLine less than line (should use single line)", - fileURI: "test.txt", - line: 3, - endLine: 2, - sourceFolder: tempDir, - expected: computeExpectedHash("line 3"), + name: "endLine less than line (should use single line)", + localPath: testFilePath, + line: 3, + endLine: 2, + expected: computeExpectedHash("line 3"), }, // Edge cases that should return empty string { - name: "empty fileURI", - fileURI: "", - line: 1, - endLine: 1, - sourceFolder: tempDir, - expected: "", + name: "empty path", + localPath: "", + line: 1, + endLine: 1, + expected: "", }, { - name: "unknown fileURI", - fileURI: "", - line: 1, - endLine: 1, - sourceFolder: tempDir, - expected: "", + name: "zero line number", + localPath: testFilePath, + line: 0, + endLine: 1, + expected: "", }, { - name: "zero line number", - fileURI: "test.txt", - line: 0, - endLine: 1, - sourceFolder: tempDir, - expected: "", + name: "negative line number", + localPath: testFilePath, + line: -1, + endLine: 1, + expected: "", }, { - name: "negative line number", - fileURI: "test.txt", - line: -1, - endLine: 1, - sourceFolder: tempDir, - expected: "", + name: "line number beyond file length", + localPath: testFilePath, + line: 10, + endLine: 10, + expected: "", }, { - name: "empty sourceFolder", - fileURI: "test.txt", - line: 1, - endLine: 1, - sourceFolder: "", - expected: "", - }, - { - name: "line number beyond file length", - fileURI: "test.txt", - line: 10, - endLine: 10, - sourceFolder: tempDir, - expected: "", - }, - { - name: "file does not exist", - fileURI: "nonexistent.txt", - line: 1, - endLine: 1, - sourceFolder: tempDir, - expected: "", - }, - { - name: "invalid sourceFolder", - fileURI: "test.txt", - line: 1, - endLine: 1, - sourceFolder: "/nonexistent/path", - expected: "", + name: "file does not exist", + localPath: filepath.Join(tempDir, "nonexistent.txt"), + line: 1, + endLine: 1, + expected: "", }, // Boundary cases { - name: "endLine beyond file length (should clamp)", - fileURI: "test.txt", - line: 4, - endLine: 10, - sourceFolder: tempDir, - expected: computeExpectedHash("line 4\nline 5"), - }, - { - name: "empty file", - fileURI: "empty.txt", - line: 1, - endLine: 1, - sourceFolder: tempDir, - expected: computeExpectedHash(""), // Empty file has one empty line + name: "endLine beyond file length (should clamp)", + localPath: testFilePath, + line: 4, + endLine: 10, + expected: computeExpectedHash("line 4\nline 5"), }, - - // Path handling { - name: "fileURI with forward slashes", - fileURI: "test.txt", - line: 1, - endLine: 1, - sourceFolder: tempDir, - expected: computeExpectedHash("line 1"), + name: "empty file", + localPath: emptyFilePath, + line: 1, + endLine: 1, + expected: computeExpectedHash(""), // Empty file has one empty line }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result := computeSnippetHash(tt.fileURI, tt.line, tt.endLine, tt.sourceFolder) + result := computeSnippetHash(tt.localPath, tt.line, tt.endLine) if result != tt.expected { - t.Errorf("computeSnippetHash(%q, %d, %d, %q) = %q, want %q", - tt.fileURI, tt.line, tt.endLine, tt.sourceFolder, result, tt.expected) + t.Errorf("computeSnippetHash(%q, %d, %d) = %q, want %q", + tt.localPath, tt.line, tt.endLine, result, tt.expected) } }) } @@ -387,8 +341,8 @@ func TestComputeSnippetHash_DifferentContentDifferentHash(t *testing.T) { t.Fatalf("Failed to create file2: %v", err) } - hash1 := computeSnippetHash("file1.txt", 1, 1, tempDir) - hash2 := computeSnippetHash("file2.txt", 1, 1, tempDir) + hash1 := computeSnippetHash(file1Path, 1, 1) + hash2 := computeSnippetHash(file2Path, 1, 1) if hash1 == hash2 { t.Errorf("Different content produced same hash: %q", hash1) @@ -424,8 +378,8 @@ func TestComputeSnippetHash_SameContentSameHash(t *testing.T) { t.Fatalf("Failed to create file2: %v", err) } - hash1 := computeSnippetHash("identical1.txt", 1, 2, tempDir) - hash2 := computeSnippetHash("identical2.txt", 1, 2, tempDir) + hash1 := computeSnippetHash(file1Path, 1, 2) + hash2 := computeSnippetHash(file2Path, 1, 2) if hash1 != hash2 { t.Errorf("Identical content produced different hashes: %q vs %q", hash1, hash2) @@ -435,3 +389,607 @@ func TestComputeSnippetHash_SameContentSameHash(t *testing.T) { t.Error("Hash was empty for valid content") } } + +func TestExtractFileURIFromResult(t *testing.T) { + tempDir, err := os.MkdirTemp("", "sarif_extract") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + repoRoot := filepath.Join(tempDir, "repo") + subfolder := filepath.Join(repoRoot, "apps", "demo") + if err := os.MkdirAll(subfolder, 0o755); err != nil { + t.Fatalf("Failed to create subfolder: %v", err) + } + + absoluteFile := filepath.Join(subfolder, "main.py") + metadata := &git.RepositoryMetadata{ + RepoRootFolder: repoRoot, + Subfolder: filepath.ToSlash(filepath.Join("apps", "demo")), + } + + if err := os.WriteFile(absoluteFile, []byte("print('demo')\n"), 0o644); err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + + tests := []struct { + name string + uri string + meta *git.RepositoryMetadata + expectedRepo string + expectedLocal string + sourceFolder string + }{ + { + name: "absolute URI with metadata", + uri: absoluteFile, + meta: metadata, + expectedRepo: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), + expectedLocal: absoluteFile, + sourceFolder: subfolder, + }, + { + name: "relative URI with metadata", + uri: "main.py", + meta: metadata, + expectedRepo: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), + expectedLocal: filepath.Join(repoRoot, "apps", "demo", "main.py"), + sourceFolder: subfolder, + }, + { + name: "relative URI already prefixed", + uri: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), + meta: metadata, + expectedRepo: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), + expectedLocal: filepath.Join(repoRoot, "apps", "demo", "main.py"), + sourceFolder: subfolder, + }, + { + name: "absolute URI without metadata falls back to source folder", + uri: absoluteFile, + meta: nil, + expectedRepo: "main.py", + expectedLocal: absoluteFile, + sourceFolder: subfolder, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + uri := tt.uri + result := &sarif.Result{ + Locations: []*sarif.Location{ + { + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: &uri, + }, + }, + }, + }, + } + + repoPath, localPath := extractFileURIFromResult(result, tt.sourceFolder, tt.meta) + if repoPath != tt.expectedRepo { + t.Fatalf("expected repo path %q, got %q", tt.expectedRepo, repoPath) + } + if localPath != tt.expectedLocal { + t.Fatalf("expected local path %q, got %q", tt.expectedLocal, localPath) + } + }) + } +} + +func TestBuildGitHubPermalink(t *testing.T) { + fileURI := filepath.ToSlash(filepath.Join("apps", "demo", "main.py")) + options := RunOptions{ + Namespace: "scan-io-git", + Repository: "scanio-test", + Ref: "aec0b795c350ff53fe9ab01adf862408aa34c3fd", + } + + link := buildGitHubPermalink(options, nil, fileURI, 11, 29) + expected := "https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py#L11-L29" + if link != expected { + t.Fatalf("expected permalink %q, got %q", expected, link) + } + + // Ref fallback to repository metadata + options.Ref = "" + commit := "1234567890abcdef" + metadata := &git.RepositoryMetadata{ + RepoRootFolder: "/tmp/repo", + CommitHash: &commit, + } + link = buildGitHubPermalink(options, metadata, fileURI, 5, 5) + expected = "https://github.com/scan-io-git/scanio-test/blob/1234567890abcdef/apps/demo/main.py#L5" + if link != expected { + t.Fatalf("expected metadata permalink %q, got %q", expected, link) + } + + // Missing ref and metadata commit should return empty string + options.Ref = "" + metadata.CommitHash = nil + link = buildGitHubPermalink(options, metadata, fileURI, 1, 1) + if link != "" { + t.Fatalf("expected empty permalink when ref and metadata are missing, got %q", link) + } +} + +func TestResolveSourceFolder(t *testing.T) { + // Create a test logger + logger := hclog.NewNullLogger() + + tests := []struct { + name string + input string + expected string + setup func() (string, func()) // setup function that returns a test path and cleanup function + }{ + { + name: "empty string", + input: "", + expected: "", + }, + { + name: "whitespace only", + input: " \t\n ", + expected: "", + }, + { + name: "relative path", + input: "", + expected: "", // Will be resolved to absolute path + setup: func() (string, func()) { + // Create a temporary directory and change to it + tempDir, err := os.MkdirTemp("", "sarif-test-*") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + testDir := filepath.Join(tempDir, "testdir") + err = os.Mkdir(testDir, 0755) + if err != nil { + os.RemoveAll(tempDir) + t.Fatalf("failed to create test dir: %v", err) + } + return testDir, func() { os.RemoveAll(tempDir) } + }, + }, + { + name: "absolute path", + input: "", + expected: "", // Will be set by setup + setup: func() (string, func()) { + tempDir, err := os.MkdirTemp("", "sarif-test-*") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + return tempDir, func() { os.RemoveAll(tempDir) } + }, + }, + { + name: "path with tilde expansion", + input: "~/testdir", + expected: "", // Will be resolved to actual home directory path + setup: func() (string, func()) { + homeDir, err := os.UserHomeDir() + if err != nil { + t.Fatalf("failed to get home dir: %v", err) + } + testDir := filepath.Join(homeDir, "testdir") + err = os.Mkdir(testDir, 0755) + if err != nil { + t.Fatalf("failed to create test dir: %v", err) + } + return testDir, func() { os.RemoveAll(testDir) } + }, + }, + { + name: "path with dots and slashes", + input: "", + expected: "", // Will be cleaned and resolved + setup: func() (string, func()) { + tempDir, err := os.MkdirTemp("", "sarif-test-*") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + // Create a parent directory structure + parentDir := filepath.Join(tempDir, "parent") + err = os.Mkdir(parentDir, 0755) + if err != nil { + os.RemoveAll(tempDir) + t.Fatalf("failed to create parent dir: %v", err) + } + testDir := filepath.Join(parentDir, "testdir") + err = os.Mkdir(testDir, 0755) + if err != nil { + os.RemoveAll(tempDir) + t.Fatalf("failed to create test dir: %v", err) + } + return testDir, func() { os.RemoveAll(tempDir) } + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var cleanup func() + var expectedPath string + + if tt.setup != nil { + testPath, cleanupFunc := tt.setup() + cleanup = cleanupFunc + expectedPath = filepath.Clean(testPath) + + // Set the input based on test type + if tt.name == "relative path" { + // Use absolute path for relative path test since we can't control working directory + tt.input = testPath + } else if tt.name == "absolute path" { + tt.input = testPath + } else if tt.name == "path with tilde expansion" { + tt.input = "~/testdir" + } else if tt.name == "path with dots and slashes" { + // Use absolute path for this test too + tt.input = testPath + } + } else { + expectedPath = tt.expected + } + + result := ResolveSourceFolder(tt.input, logger) + + if tt.setup != nil { + // For tests with setup, verify the result is an absolute path + if !filepath.IsAbs(result) { + t.Errorf("expected absolute path, got relative path: %s", result) + } + // Verify the resolved path points to the same directory + if result != expectedPath { + t.Errorf("expected %s, got %s", expectedPath, result) + } + } else { + // For tests without setup, verify exact match + if result != expectedPath { + t.Errorf("expected %s, got %s", expectedPath, result) + } + } + + if cleanup != nil { + cleanup() + } + }) + } +} + +func TestResolveSourceFolderErrorHandling(t *testing.T) { + logger := hclog.NewNullLogger() + + t.Run("non-existent path", func(t *testing.T) { + // Test with a path that doesn't exist - should still resolve to absolute path + result := ResolveSourceFolder("/non/existent/path", logger) + + // Should still return an absolute path even if it doesn't exist + if !filepath.IsAbs(result) { + t.Errorf("expected absolute path even for non-existent path, got: %s", result) + } + + expected := "/non/existent/path" + if result != expected { + t.Errorf("expected %s, got %s", expected, result) + } + }) + + t.Run("invalid characters in path", func(t *testing.T) { + // Test with path containing invalid characters + result := ResolveSourceFolder("/tmp/test\x00invalid", logger) + + // Should handle gracefully and return the path as-is + if result == "" { + t.Error("expected non-empty result for invalid path") + } + }) +} + +func TestResolveSourceFolderRelativePaths(t *testing.T) { + logger := hclog.NewNullLogger() + + t.Run("relative path with working directory change", func(t *testing.T) { + // Create a temporary directory structure + tempDir, err := os.MkdirTemp("", "sarif-test-*") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create a test directory + testDir := filepath.Join(tempDir, "testdir") + err = os.Mkdir(testDir, 0755) + if err != nil { + t.Fatalf("failed to create test dir: %v", err) + } + + // Change to the temp directory + originalDir, err := os.Getwd() + if err != nil { + t.Fatalf("failed to get current directory: %v", err) + } + defer os.Chdir(originalDir) + + err = os.Chdir(tempDir) + if err != nil { + t.Fatalf("failed to change directory: %v", err) + } + + // Test relative path + result := ResolveSourceFolder("./testdir", logger) + expected := filepath.Clean(testDir) + + if result != expected { + t.Errorf("expected %s, got %s", expected, result) + } + + if !filepath.IsAbs(result) { + t.Errorf("expected absolute path, got relative path: %s", result) + } + }) +} + +func TestApplyEnvironmentFallbacks(t *testing.T) { + tests := []struct { + name string + initialOpts RunOptions + envVars map[string]string + expectedOpts RunOptions + }{ + { + name: "no environment variables set", + initialOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "test-repo", + Ref: "test-ref", + }, + envVars: map[string]string{}, + expectedOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "test-repo", + Ref: "test-ref", + }, + }, + { + name: "all options already set - no fallbacks applied", + initialOpts: RunOptions{ + Namespace: "existing-namespace", + Repository: "existing-repo", + Ref: "existing-ref", + }, + envVars: map[string]string{ + "GITHUB_REPOSITORY_OWNER": "env-namespace", + "GITHUB_REPOSITORY": "env-owner/env-repo", + "GITHUB_SHA": "env-sha", + }, + expectedOpts: RunOptions{ + Namespace: "existing-namespace", + Repository: "existing-repo", + Ref: "existing-ref", + }, + }, + { + name: "namespace fallback applied", + initialOpts: RunOptions{ + Namespace: "", + Repository: "test-repo", + Ref: "test-ref", + }, + envVars: map[string]string{ + "GITHUB_REPOSITORY_OWNER": "env-namespace", + }, + expectedOpts: RunOptions{ + Namespace: "env-namespace", + Repository: "test-repo", + Ref: "test-ref", + }, + }, + { + name: "repository fallback applied with slash", + initialOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "", + Ref: "test-ref", + }, + envVars: map[string]string{ + "GITHUB_REPOSITORY": "env-owner/env-repo", + }, + expectedOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "env-repo", + Ref: "test-ref", + }, + }, + { + name: "repository fallback applied without slash", + initialOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "", + Ref: "test-ref", + }, + envVars: map[string]string{ + "GITHUB_REPOSITORY": "env-repo-only", + }, + expectedOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "env-repo-only", + Ref: "test-ref", + }, + }, + { + name: "ref fallback applied", + initialOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "test-repo", + Ref: "", + }, + envVars: map[string]string{ + "GITHUB_SHA": "env-sha-123", + }, + expectedOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "test-repo", + Ref: "env-sha-123", + }, + }, + { + name: "all fallbacks applied", + initialOpts: RunOptions{ + Namespace: "", + Repository: "", + Ref: "", + }, + envVars: map[string]string{ + "GITHUB_REPOSITORY_OWNER": "env-namespace", + "GITHUB_REPOSITORY": "env-owner/env-repo", + "GITHUB_SHA": "env-sha-123", + }, + expectedOpts: RunOptions{ + Namespace: "env-namespace", + Repository: "env-repo", + Ref: "env-sha-123", + }, + }, + { + name: "whitespace handling", + initialOpts: RunOptions{ + Namespace: " ", + Repository: "\t", + Ref: "\n", + }, + envVars: map[string]string{ + "GITHUB_REPOSITORY_OWNER": " env-namespace ", + "GITHUB_REPOSITORY": "\tenv-owner/env-repo\t", + "GITHUB_SHA": "\nenv-sha-123\n", + }, + expectedOpts: RunOptions{ + Namespace: "env-namespace", + Repository: "env-repo", + Ref: "env-sha-123", + }, + }, + { + name: "repository with multiple slashes", + initialOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "", + Ref: "test-ref", + }, + envVars: map[string]string{ + "GITHUB_REPOSITORY": "env-owner/subdir/env-repo", + }, + expectedOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "subdir/env-repo", + Ref: "test-ref", + }, + }, + { + name: "repository with slash at end", + initialOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "", + Ref: "test-ref", + }, + envVars: map[string]string{ + "GITHUB_REPOSITORY": "env-owner/env-repo/", + }, + expectedOpts: RunOptions{ + Namespace: "test-namespace", + Repository: "env-repo/", + Ref: "test-ref", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Set up environment variables + for key, value := range tt.envVars { + t.Setenv(key, value) + } + + // Create a copy of initial options + opts := tt.initialOpts + + // Apply environment fallbacks + ApplyEnvironmentFallbacks(&opts) + + // Verify results + if opts.Namespace != tt.expectedOpts.Namespace { + t.Errorf("Namespace: expected %q, got %q", tt.expectedOpts.Namespace, opts.Namespace) + } + if opts.Repository != tt.expectedOpts.Repository { + t.Errorf("Repository: expected %q, got %q", tt.expectedOpts.Repository, opts.Repository) + } + if opts.Ref != tt.expectedOpts.Ref { + t.Errorf("Ref: expected %q, got %q", tt.expectedOpts.Ref, opts.Ref) + } + }) + } +} + +func TestApplyEnvironmentFallbacksEdgeCases(t *testing.T) { + t.Run("empty environment variables", func(t *testing.T) { + opts := RunOptions{ + Namespace: "", + Repository: "", + Ref: "", + } + + // Set empty environment variables + t.Setenv("GITHUB_REPOSITORY_OWNER", "") + t.Setenv("GITHUB_REPOSITORY", "") + t.Setenv("GITHUB_SHA", "") + + ApplyEnvironmentFallbacks(&opts) + + // Should remain empty + if opts.Namespace != "" { + t.Errorf("Expected empty namespace, got %q", opts.Namespace) + } + if opts.Repository != "" { + t.Errorf("Expected empty repository, got %q", opts.Repository) + } + if opts.Ref != "" { + t.Errorf("Expected empty ref, got %q", opts.Ref) + } + }) + + t.Run("repository with only slash", func(t *testing.T) { + opts := RunOptions{ + Repository: "", + } + + t.Setenv("GITHUB_REPOSITORY", "/") + + ApplyEnvironmentFallbacks(&opts) + + // Should fall back to the whole value + if opts.Repository != "/" { + t.Errorf("Expected repository to be '/', got %q", opts.Repository) + } + }) + + t.Run("repository with slash at beginning", func(t *testing.T) { + opts := RunOptions{ + Repository: "", + } + + t.Setenv("GITHUB_REPOSITORY", "/env-repo") + + ApplyEnvironmentFallbacks(&opts) + + // Should extract the part after the slash since idx=0 and idx < len(gr)-1 + if opts.Repository != "env-repo" { + t.Errorf("Expected repository to be 'env-repo', got %q", opts.Repository) + } + }) +} diff --git a/cmd/to-html.go b/cmd/to-html.go index 2b9fca76..0fa0d077 100644 --- a/cmd/to-html.go +++ b/cmd/to-html.go @@ -132,7 +132,26 @@ var toHtmlCmd = &cobra.Command{ if err != nil { logger.Warn("can't collect repository metadata", "reason", err) } else { - logger.Debug("repositoryMetadata", "BranchName", *repositoryMetadata.BranchName, "CommitHash", *repositoryMetadata.CommitHash, "RepositoryFullName", *repositoryMetadata.RepositoryFullName, "Subfolder", repositoryMetadata.Subfolder, "RepoRootFolder", repositoryMetadata.RepoRootFolder) + branch := "" + if repositoryMetadata.BranchName != nil { + branch = *repositoryMetadata.BranchName + } + commit := "" + if repositoryMetadata.CommitHash != nil { + commit = *repositoryMetadata.CommitHash + } + fullName := "" + if repositoryMetadata.RepositoryFullName != nil { + fullName = *repositoryMetadata.RepositoryFullName + } + logger.Debug( + "repositoryMetadata", + "BranchName", branch, + "CommitHash", commit, + "RepositoryFullName", fullName, + "Subfolder", repositoryMetadata.Subfolder, + "RepoRootFolder", repositoryMetadata.RepoRootFolder, + ) } var url *vcsurl.VCSURL diff --git a/docs/engineering/sarif-issues-path-analysis.md b/docs/engineering/sarif-issues-path-analysis.md new file mode 100644 index 00000000..260c7cd4 --- /dev/null +++ b/docs/engineering/sarif-issues-path-analysis.md @@ -0,0 +1,98 @@ +# SARIF Issues Path Handling Analysis + +## Reproduction Context +- Command sequence: + 1. `scanio analyse --scanner semgrep /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo --format sarif --output outside-project.sarif` + 2. `scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif from-subfolder.sarif --source-folder /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo` +- Expected permalink: `.../blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py#L11-L29` +- Actual permalink (incorrect): `.../blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/main.py#L11-L29` + +## Key Observations +- `data/outside-project.sarif` contains absolute URIs such as `/home/.../scanio-test/apps/demo/main.py`. +- `data/from-subfolder.sarif` contains relative URIs (`main.py`) because Semgrep ran from the subfolder. +- In both cases the SARIF report points to the file under `apps/demo/main.py`, yet the CLI emits `main.py` in issue bodies and permalinks. + +## Code Flow Review +- `cmd/sarif-issues/issue_processing.go` calls `extractFileURIFromResult` to determine the file path recorded in `NewIssueData` (`buildNewIssuesFromSARIF`, line references around `fileURI` usage). +- `extractFileURIFromResult` (`cmd/sarif-issues/utils.go:173-212`) trims the `--source-folder` prefix from absolute URIs and returns the remainder; for relative URIs it simply returns the raw value. + - When `--source-folder` is `/.../scanio-test/apps/demo`, absolute URIs reduce to `main.py`, losing the repository subpath. +- `buildGitHubPermalink` (`utils.go:125-170`) expects `fileURI` to be repository-relative when constructing `https://github.com/{namespace}/{repo}/blob/{ref}/{fileURI}#L...`. +- `computeSnippetHash` (`utils.go:104-121`) relies on joining `sourceFolder` with the same `fileURI` to re-read the local file. If we change `fileURI` to be repo-relative (`apps/demo/main.py`), the current join logic will point at `/.../apps/demo/apps/demo/main.py` and fail. +- `internal/sarif.Report.EnrichResultsLocationProperty` and `EnrichResultsLocationURIProperty` perform similar prefix stripping using `sourceFolder`, so the HTML report path logic (`cmd/to-html.go`) inherits the same limitation. +- `internal/git.CollectRepositoryMetadata` already derives `RepoRootFolder` and the `Subfolder` path segment when `--source-folder` is nested within the repo. + +## Root Cause +The CLI assumes `--source-folder` equals the repository root. When the user points it to a subdirectory, the helper trims that prefix and drops intermediate path segments. Consequently: +- Issue metadata (`File` field) loses the directory context. +- GitHub permalinks omit the subfolder and land on the wrong file. +- Correlation metadata (`Metadata.Filename`) no longer matches the path stored in GitHub issues, risking mismatches if/when we fix the permalink logic without updating correlation. + +## Fix Considerations +1. **Determine repository root & subfolder once.** `internal/git.CollectRepositoryMetadata` gives us both `RepoRootFolder` and `Subfolder` for any path inside the repo. Reusing this keeps CLI logic consistent with the HTML report command. +2. **Produce dual path representations.** + - Repo-relative path (e.g. `apps/demo/main.py`) for GitHub URLs and issue bodies. + - Source-folder-relative path (e.g. `main.py`) or absolute path for reading files/snippet hashing. +3. **Avoid regressions in existing flows.** After changing `fileURI`, ensure: + - `computeSnippetHash` receives the correct on-disk path. + - Issue correlation (`Metadata.Filename`) uses the same representation that is stored in GitHub issue bodies to preserve matching. +4. **Consider harmonising SARIF helpers.** Updating `internal/sarif` enrichment to use repo metadata would fix both CLI commands (`sarif-issues`, `to-html`) and reduce duplicated path trimming logic. + +## Proposed Fix Plan +1. Enhance the `sarif-issues` command to collect repository metadata: + - Call `git.CollectRepositoryMetadata(opts.SourceFolder)` early (guard for errors). + - Derive helper closures that can translate between repo-relative and local paths. +2. Update `extractFileURIFromResult` (or an adjacent helper) to: + - Resolve the SARIF URI to an absolute path (using `uriBaseId` and `sourceFolder` when necessary). + - Emit the repo-relative path (using metadata.RepoRootFolder) for issue content and permalinks. + - Return both repo-relative and local paths, or store them in a small struct to avoid repeated conversions. +3. Adjust `computeSnippetHash` and correlation metadata to consume the correct local path while storing repo-relative filenames in issue metadata. +4. Reuse the new path helper in `buildGitHubPermalink` so the permalink path stays in sync. +5. Add regression tests: + - Extend `cmd/sarif-issues/utils_test.go` (or introduce new tests) covering absolute and relative SARIF URIs when `sourceFolder` points to a subdirectory. + - Include permalink assertions using `data/from-subfolder.sarif` / `data/outside-project.sarif`. +6. Evaluate whether `internal/sarif`’s enrichment should adopt the same metadata-aware logic; if so, share the helper to keep `to-html` and future commands consistent. + +# Manual testing +```sh +# 1. Outside folder absolute paths +cd /home/jekos/ghq/github.com/scan-io-git/scan-io +scanio analyse --scanner semgrep /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/outside-project-abs.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif data/outside-project-abs.sarif --source-folder /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo +# validate here: 2 issues with correct permalinks +# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py + +# 2. Outside folder relative paths +cd /home/jekos/ghq/github.com/scan-io-git/scan-io +scanio analyse --scanner semgrep ../scanio-test/apps/demo --format sarif --output data/outside-project-rel.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif data/outside-project-rel.sarif --source-folder ../scanio-test/apps/demo +# validate here: 2 issues with correct permalinks +# incorrect: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/scanio-test/apps/demo/main.py + +# 3. From root absolute path +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test +scanio analyse --scanner semgrep /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-root-asb.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-root-asb.sarif --source-folder /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo +# validate here: 2 issues with correct permalinks +# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py + +# 4. From root relative paths +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test +scanio analyse --scanner semgrep apps/demo --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-root-rel.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-root-rel.sarif --source-folder apps/demo +# validate here: 2 issues with correct permalinks +# correct https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py + +# 5. From subfolder absolute paths +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo +scanio analyse --scanner semgrep /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-subfolder-abs.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-subfolder-abs.sarif --source-folder /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo +# validate here: 2 issues with correct permalinks +# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py + +# 6. From subfolder relative paths +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo +scanio analyse --scanner semgrep . --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-subfolder-rel.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-subfolder-rel.sarif --source-folder . +# validate here: 2 issues with correct permalinks +# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py +``` diff --git a/internal/git/metadata.go b/internal/git/metadata.go index 82fbbc70..072d66de 100644 --- a/internal/git/metadata.go +++ b/internal/git/metadata.go @@ -2,6 +2,7 @@ package git import ( "fmt" + "path/filepath" "strings" "github.com/go-git/go-git/v5" @@ -19,44 +20,50 @@ type RepositoryMetadata struct { // CollectRepositoryMetadata function collects repository metadata // that includes branch name, commit hash, repository full name, subfolder and repository root folder func CollectRepositoryMetadata(sourceFolder string) (*RepositoryMetadata, error) { - defaultRepositoryMetadata := &RepositoryMetadata{ - RepoRootFolder: sourceFolder, - Subfolder: "", + if sourceFolder == "" { + return &RepositoryMetadata{}, fmt.Errorf("source folder is not set") } - if sourceFolder == "" { - return defaultRepositoryMetadata, fmt.Errorf("source folder is not set") + if absSource, err := filepath.Abs(sourceFolder); err == nil { + sourceFolder = absSource + } + + md := &RepositoryMetadata{ + RepoRootFolder: filepath.Clean(sourceFolder), } repoRootFolder, err := findGitRepositoryPath(sourceFolder) if err != nil { - return defaultRepositoryMetadata, err + return md, err } + md.RepoRootFolder = filepath.Clean(repoRootFolder) + repo, err := git.PlainOpen(repoRootFolder) if err != nil { - return defaultRepositoryMetadata, fmt.Errorf("failed to open repository: %w", err) + return md, fmt.Errorf("failed to open repository: %w", err) } - head, err := repo.Head() - if err != nil { - return defaultRepositoryMetadata, fmt.Errorf("failed to get HEAD: %w", err) + if rel, err := filepath.Rel(repoRootFolder, sourceFolder); err == nil && rel != "." { + md.Subfolder = filepath.ToSlash(rel) } - branchName := head.Name().Short() - commitHash := head.Hash().String() - remote, err := repo.Remote("origin") - if err != nil { - return defaultRepositoryMetadata, fmt.Errorf("failed to get remote: %w", err) + if head, err := repo.Head(); err == nil { + if head.Name().IsBranch() { + branchName := head.Name().Short() + md.BranchName = &branchName + } + + hash := head.Hash().String() + md.CommitHash = &hash } - repositoryFullName := strings.TrimSuffix(remote.Config().URLs[0], ".git") + if remote, err := repo.Remote("origin"); err == nil { + if cfg := remote.Config(); cfg != nil && len(cfg.URLs) > 0 { + repositoryFullName := strings.TrimSuffix(cfg.URLs[0], ".git") + md.RepositoryFullName = &repositoryFullName + } + } - return &RepositoryMetadata{ - BranchName: &branchName, - CommitHash: &commitHash, - RepositoryFullName: &repositoryFullName, - Subfolder: strings.TrimPrefix(sourceFolder, repoRootFolder), - RepoRootFolder: repoRootFolder, - }, nil + return md, nil } From bb48bd3027eb7caa440a2ede97b593f909fec9c6 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Wed, 8 Oct 2025 21:16:51 +0200 Subject: [PATCH 27/52] fix: correctly handle relative paths from outside of repo --- cmd/sarif-issues/utils.go | 142 ++++++++----- cmd/sarif-issues/utils_test.go | 190 +++++++++++++++++- .../engineering/sarif-issues-path-analysis.md | 3 +- 3 files changed, 282 insertions(+), 53 deletions(-) diff --git a/cmd/sarif-issues/utils.go b/cmd/sarif-issues/utils.go index 8f2abbfe..50c81145 100644 --- a/cmd/sarif-issues/utils.go +++ b/cmd/sarif-issues/utils.go @@ -271,46 +271,59 @@ func extractFileURIFromResult(res *sarif.Result, absSourceFolder string, repoMet repoPath := "" localPath := "" subfolder := normalisedSubfolder(repoMetadata) - - if filepath.IsAbs(rawURI) { - localPath = filepath.Clean(rawURI) - repoPath = filepath.ToSlash(rawURI) - if repoMetadata != nil && repoMetadata.RepoRootFolder != "" { - repoPath = filepath.ToSlash(trimPathPrefix(localPath, repoMetadata.RepoRootFolder)) - } else if absSourceFolder != "" { - repoPath = filepath.ToSlash(trimPathPrefix(localPath, absSourceFolder)) + var repoRoot string + if repoMetadata != nil && strings.TrimSpace(repoMetadata.RepoRootFolder) != "" { + repoRoot = filepath.Clean(repoMetadata.RepoRootFolder) + } + absSource := strings.TrimSpace(absSourceFolder) + if absSource != "" { + if abs, err := filepath.Abs(absSource); err == nil { + absSource = abs + } else { + absSource = filepath.Clean(absSource) } - } else { - normalised := strings.TrimLeft(rawURI, "./") - repoPath = filepath.ToSlash(normalised) + } - if subfolder != "" && !strings.HasPrefix(repoPath, subfolder+"/") && repoPath != subfolder { - repoPath = filepath.ToSlash(filepath.Join(subfolder, repoPath)) - } + // Normalise URI to the host OS path representation + osURI := filepath.FromSlash(rawURI) + osURI = strings.TrimPrefix(osURI, "file://") + cleanURI := filepath.Clean(osURI) - if repoMetadata != nil && repoMetadata.RepoRootFolder != "" { - candidate := filepath.Join(repoMetadata.RepoRootFolder, filepath.FromSlash(repoPath)) - if _, err := os.Stat(candidate); err == nil { - localPath = candidate + if filepath.IsAbs(cleanURI) { + localPath = cleanURI + if repoRoot != "" { + if rel, err := filepath.Rel(repoRoot, localPath); err == nil { + if rel != "." && !strings.HasPrefix(rel, "..") { + repoPath = filepath.ToSlash(rel) + } } } - - if localPath == "" && absSourceFolder != "" { - candidate := filepath.Join(absSourceFolder, filepath.FromSlash(normalised)) - if _, err := os.Stat(candidate); err == nil { - localPath = candidate + if repoPath == "" && absSource != "" { + if rel, err := filepath.Rel(absSource, localPath); err == nil { + repoPath = filepath.ToSlash(rel) } } + if repoPath == "" { + repoPath = filepath.ToSlash(strings.TrimPrefix(localPath, string(filepath.Separator))) + } + } else { + localPath = resolveRelativeLocalPath(cleanURI, repoRoot, subfolder, absSource) - if localPath == "" && repoMetadata != nil && repoMetadata.RepoRootFolder != "" && subfolder != "" { - candidate := filepath.Join(repoMetadata.RepoRootFolder, filepath.FromSlash(subfolder), filepath.FromSlash(normalised)) - if _, err := os.Stat(candidate); err == nil { - localPath = candidate + if repoRoot != "" && localPath != "" && pathWithin(localPath, repoRoot) { + if rel, err := filepath.Rel(repoRoot, localPath); err == nil { + if rel != "." { + repoPath = filepath.ToSlash(rel) + } } } - if localPath == "" && absSourceFolder != "" { - localPath = filepath.Join(absSourceFolder, filepath.FromSlash(normalised)) + if repoPath == "" { + normalised := strings.TrimLeft(filepath.ToSlash(cleanURI), "./") + if subfolder != "" && !strings.HasPrefix(normalised, subfolder+"/") && normalised != subfolder { + repoPath = filepath.ToSlash(filepath.Join(subfolder, normalised)) + } else { + repoPath = filepath.ToSlash(normalised) + } } } @@ -319,32 +332,71 @@ func extractFileURIFromResult(res *sarif.Result, absSourceFolder string, repoMet return repoPath, localPath } -func trimPathPrefix(path, prefix string) string { - if prefix == "" { - return path +func resolveRelativeLocalPath(cleanURI, repoRoot, subfolder, absSource string) string { + candidateRel := cleanURI + var bases []string + seen := map[string]struct{}{} + + addBase := func(base string) { + if base == "" { + return + } + if abs, err := filepath.Abs(base); err == nil { + base = abs + } else { + base = filepath.Clean(base) + } + if _, ok := seen[base]; ok { + return + } + seen[base] = struct{}{} + bases = append(bases, base) } - cleanPath := filepath.Clean(path) - cleanPrefix := filepath.Clean(prefix) + addBase(repoRoot) + if repoRoot != "" && subfolder != "" { + addBase(filepath.Join(repoRoot, filepath.FromSlash(subfolder))) + } + addBase(absSource) - if rel, err := filepath.Rel(cleanPrefix, cleanPath); err == nil && rel != "" && !strings.HasPrefix(rel, "..") { - return rel + for _, base := range bases { + candidate := filepath.Clean(filepath.Join(base, candidateRel)) + if repoRoot != "" && !pathWithin(candidate, repoRoot) { + continue + } + if _, err := os.Stat(candidate); err == nil { + return candidate + } } - prefixWithSep := cleanPrefix + string(filepath.Separator) - if strings.HasPrefix(cleanPath, prefixWithSep) { - return strings.TrimPrefix(cleanPath, prefixWithSep) + if len(bases) > 0 { + candidate := filepath.Clean(filepath.Join(bases[0], candidateRel)) + if repoRoot == "" || pathWithin(candidate, repoRoot) { + return candidate + } } - if strings.HasPrefix(cleanPath, cleanPrefix) { - return strings.TrimPrefix(cleanPath, cleanPrefix) + if absSource != "" { + return filepath.Clean(filepath.Join(absSource, candidateRel)) } + return "" +} - trimmed := strings.TrimPrefix(cleanPath, prefix) - if strings.HasPrefix(trimmed, string(filepath.Separator)) { - return trimmed[1:] +func pathWithin(path, root string) bool { + if root == "" { + return true + } + cleanPath, err1 := filepath.Abs(path) + cleanRoot, err2 := filepath.Abs(root) + if err1 != nil || err2 != nil { + cleanPath = filepath.Clean(path) + cleanRoot = filepath.Clean(root) + } + if cleanPath == cleanRoot { + return true } - return trimmed + rootWithSep := cleanRoot + string(filepath.Separator) + return strings.HasPrefix(cleanPath, rootWithSep) } func normalisedSubfolder(md *git.RepositoryMetadata) string { diff --git a/cmd/sarif-issues/utils_test.go b/cmd/sarif-issues/utils_test.go index ab60e1c1..7b5589a5 100644 --- a/cmd/sarif-issues/utils_test.go +++ b/cmd/sarif-issues/utils_test.go @@ -5,11 +5,13 @@ import ( "fmt" "os" "path/filepath" + "strings" "testing" "github.com/hashicorp/go-hclog" "github.com/owenrumney/go-sarif/v2/sarif" "github.com/scan-io-git/scan-io/internal/git" + internalsarif "github.com/scan-io-git/scan-io/internal/sarif" ) func TestDisplaySeverity(t *testing.T) { @@ -397,7 +399,7 @@ func TestExtractFileURIFromResult(t *testing.T) { } defer os.RemoveAll(tempDir) - repoRoot := filepath.Join(tempDir, "repo") + repoRoot := filepath.Join(tempDir, "scanio-test") subfolder := filepath.Join(repoRoot, "apps", "demo") if err := os.MkdirAll(subfolder, 0o755); err != nil { t.Fatalf("Failed to create subfolder: %v", err) @@ -437,6 +439,14 @@ func TestExtractFileURIFromResult(t *testing.T) { expectedLocal: filepath.Join(repoRoot, "apps", "demo", "main.py"), sourceFolder: subfolder, }, + { + name: "relative URI with parent segments", + uri: filepath.ToSlash(filepath.Join("..", "scanio-test", "apps", "demo", "main.py")), + meta: metadata, + expectedRepo: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), + expectedLocal: filepath.Join(repoRoot, "apps", "demo", "main.py"), + sourceFolder: subfolder, + }, { name: "relative URI already prefixed", uri: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), @@ -517,6 +527,171 @@ func TestBuildGitHubPermalink(t *testing.T) { } } +func TestBuildNewIssuesFromSARIFManualScenarios(t *testing.T) { + tempDir, err := os.MkdirTemp("", "sarif_scenarios") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + repoRoot := filepath.Join(tempDir, "scanio-test") + subfolder := filepath.Join(repoRoot, "apps", "demo") + if err := os.MkdirAll(subfolder, 0o755); err != nil { + t.Fatalf("Failed to create repo subfolder: %v", err) + } + + mainFile := filepath.Join(subfolder, "main.py") + var builder strings.Builder + for i := 1; i <= 60; i++ { + builder.WriteString(fmt.Sprintf("line %d\n", i)) + } + if err := os.WriteFile(mainFile, []byte(builder.String()), 0o644); err != nil { + t.Fatalf("Failed to write main.py: %v", err) + } + + logger := hclog.NewNullLogger() + commit := "aec0b795c350ff53fe9ab01adf862408aa34c3fd" + + metadata := &git.RepositoryMetadata{ + RepoRootFolder: repoRoot, + Subfolder: filepath.ToSlash(filepath.Join("apps", "demo")), + CommitHash: &commit, + } + + options := RunOptions{ + Namespace: "scan-io-git", + Repository: "scanio-test", + Ref: commit, + SourceFolder: subfolder, + } + + expectedRepoPath := filepath.ToSlash(filepath.Join("apps", "demo", "main.py")) + permalink := fmt.Sprintf("https://github.com/%s/%s/blob/%s/%s#L%d-L%d", + options.Namespace, + options.Repository, + commit, + expectedRepoPath, + 11, + 29, + ) + + scenarios := []struct { + name string + uri string + sourceFolderCLI string + sourceFolderAbs string + }{ + { + name: "outside project absolute", + uri: mainFile, + sourceFolderCLI: subfolder, + sourceFolderAbs: subfolder, + }, + { + name: "outside project relative", + uri: filepath.ToSlash(filepath.Join("..", "scanio-test", "apps", "demo", "main.py")), + sourceFolderCLI: filepath.Join("..", "scanio-test", "apps", "demo"), + sourceFolderAbs: subfolder, + }, + { + name: "from root absolute", + uri: mainFile, + sourceFolderCLI: subfolder, + sourceFolderAbs: subfolder, + }, + { + name: "from root relative", + uri: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), + sourceFolderCLI: filepath.Join("apps", "demo"), + sourceFolderAbs: subfolder, + }, + { + name: "from subfolder absolute", + uri: mainFile, + sourceFolderCLI: subfolder, + sourceFolderAbs: subfolder, + }, + { + name: "from subfolder relative", + uri: "main.py", + sourceFolderCLI: ".", + sourceFolderAbs: subfolder, + }, + } + + for _, scenario := range scenarios { + scenario := scenario + t.Run(scenario.name, func(t *testing.T) { + ruleID := "test.rule" + uriValue := scenario.uri + startLine := 11 + endLine := 29 + message := "Test finding" + baseID := "%SRCROOT%" + + result := &sarif.Result{ + RuleID: &ruleID, + Message: sarif.Message{ + Text: &message, + }, + Locations: []*sarif.Location{ + { + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: &uriValue, + URIBaseId: &baseID, + }, + Region: &sarif.Region{ + StartLine: &startLine, + EndLine: &endLine, + }, + }, + }, + }, + } + result.PropertyBag = *sarif.NewPropertyBag() + result.Add("Level", "error") + + report := &internalsarif.Report{ + Report: &sarif.Report{ + Runs: []*sarif.Run{ + { + Tool: sarif.Tool{ + Driver: &sarif.ToolComponent{ + Name: "Semgrep", + Rules: []*sarif.ReportingDescriptor{ + {ID: ruleID}, + }, + }, + }, + Results: []*sarif.Result{result}, + }, + }, + }, + } + + scenarioOptions := options + scenarioOptions.SourceFolder = scenario.sourceFolderCLI + + issues := buildNewIssuesFromSARIF(report, scenarioOptions, scenario.sourceFolderAbs, metadata, logger) + if len(issues) == 0 { + t.Fatalf("expected issues for scenario %q", scenario.name) + } + + issue := issues[0] + if issue.Metadata.Filename != expectedRepoPath { + t.Fatalf("scenario %q expected repo path %q, got %q", scenario.name, expectedRepoPath, issue.Metadata.Filename) + } + if issue.Metadata.SnippetHash == "" { + t.Fatalf("scenario %q expected snippet hash to be populated", scenario.name) + } + if !strings.Contains(issue.Body, permalink) { + t.Fatalf("scenario %q issue body missing permalink %q", scenario.name, permalink) + } + }) + } +} + func TestResolveSourceFolder(t *testing.T) { // Create a test logger logger := hclog.NewNullLogger() @@ -573,16 +748,17 @@ func TestResolveSourceFolder(t *testing.T) { input: "~/testdir", expected: "", // Will be resolved to actual home directory path setup: func() (string, func()) { - homeDir, err := os.UserHomeDir() + tempHome, err := os.MkdirTemp("", "sarif-home-*") if err != nil { - t.Fatalf("failed to get home dir: %v", err) + t.Fatalf("failed to create temp home dir: %v", err) } - testDir := filepath.Join(homeDir, "testdir") - err = os.Mkdir(testDir, 0755) - if err != nil { + t.Setenv("HOME", tempHome) + testDir := filepath.Join(tempHome, "testdir") + if err := os.MkdirAll(testDir, 0o755); err != nil { + os.RemoveAll(tempHome) t.Fatalf("failed to create test dir: %v", err) } - return testDir, func() { os.RemoveAll(testDir) } + return testDir, func() { os.RemoveAll(tempHome) } }, }, { diff --git a/docs/engineering/sarif-issues-path-analysis.md b/docs/engineering/sarif-issues-path-analysis.md index 260c7cd4..3064b58e 100644 --- a/docs/engineering/sarif-issues-path-analysis.md +++ b/docs/engineering/sarif-issues-path-analysis.md @@ -66,7 +66,7 @@ cd /home/jekos/ghq/github.com/scan-io-git/scan-io scanio analyse --scanner semgrep ../scanio-test/apps/demo --format sarif --output data/outside-project-rel.sarif scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif data/outside-project-rel.sarif --source-folder ../scanio-test/apps/demo # validate here: 2 issues with correct permalinks -# incorrect: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/scanio-test/apps/demo/main.py +# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py # 3. From root absolute path cd /home/jekos/ghq/github.com/scan-io-git/scanio-test @@ -81,6 +81,7 @@ scanio analyse --scanner semgrep apps/demo --format sarif --output /home/jekos/g scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-root-rel.sarif --source-folder apps/demo # validate here: 2 issues with correct permalinks # correct https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py +# correct even when .git folder is not there # 5. From subfolder absolute paths cd /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo From 0fe96073d39ee54d8e2557422d8ee742ddd5a4b4 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Wed, 8 Oct 2025 21:23:45 +0200 Subject: [PATCH 28/52] docs: update usage examples for sarif-issues command to recommend running from repository root --- cmd/sarif-issues/sarif-issues.go | 5 ++++- docs/reference/cmd-sarif-issues.md | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cmd/sarif-issues/sarif-issues.go b/cmd/sarif-issues/sarif-issues.go index 74f8ced6..d23b39f0 100644 --- a/cmd/sarif-issues/sarif-issues.go +++ b/cmd/sarif-issues/sarif-issues.go @@ -39,7 +39,10 @@ var ( opts RunOptions // Example usage for the sarif-issues command - exampleSarifIssuesUsage = ` # Create issues from SARIF report with basic configuration + exampleSarifIssuesUsage = ` # Recommended: run from repository root and use relative paths + scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --source-folder apps/demo + + # Create issues from SARIF report with basic configuration scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif # Create issues with labels and assignees diff --git a/docs/reference/cmd-sarif-issues.md b/docs/reference/cmd-sarif-issues.md index 84798bc4..0e114d5a 100644 --- a/docs/reference/cmd-sarif-issues.md +++ b/docs/reference/cmd-sarif-issues.md @@ -84,6 +84,8 @@ For detailed GitHub plugin configuration, refer to [GitHub Plugin Documentation] ## Usage Examples +> **Recommendation:** Run the command from your repository root and pass `--source-folder` as repo-relative paths (for example `--source-folder apps/demo`). This keeps permalinks and snippet hashing consistent across environments. Even when .git repo is corrupted or is missing. + ### Basic Usage in GitHub Actions Create issues from SARIF report using environment variables: ```bash From df5443ec5b6f0a82cb263352152ccd18654879e7 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Thu, 9 Oct 2025 08:54:26 +0200 Subject: [PATCH 29/52] fix: initialize properties map before we write the derived level, preventing the nil-map panic --- .../engineering/sarif-issues-path-analysis.md | 27 ++++++++++ internal/sarif/sarif.go | 3 ++ internal/sarif/sarif_test.go | 54 +++++++++++++++++++ 3 files changed, 84 insertions(+) create mode 100644 internal/sarif/sarif_test.go diff --git a/docs/engineering/sarif-issues-path-analysis.md b/docs/engineering/sarif-issues-path-analysis.md index 3064b58e..6a69b66e 100644 --- a/docs/engineering/sarif-issues-path-analysis.md +++ b/docs/engineering/sarif-issues-path-analysis.md @@ -53,6 +53,7 @@ The CLI assumes `--source-folder` equals the repository root. When the user poin 6. Evaluate whether `internal/sarif`’s enrichment should adopt the same metadata-aware logic; if so, share the helper to keep `to-html` and future commands consistent. # Manual testing +## Semgrep scan of subfolder (monorepo like use case) ```sh # 1. Outside folder absolute paths cd /home/jekos/ghq/github.com/scan-io-git/scan-io @@ -97,3 +98,29 @@ scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b # validate here: 2 issues with correct permalinks # correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py ``` +## snyk +```sh +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test + +# 1. scan root +snyk code test --sarif-file-output=/home/jekos/ghq/github.com/scan-io-git/scan-io/data/snyk-root.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/snyk-root.sarif --source-folder . + +# 2. scan subfolder from root +snyk code test --sarif-file-output=/home/jekos/ghq/github.com/scan-io-git/scan-io/data/snyk-subfolder-from-root.sarif apps/demo +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/snyk-subfolder-from-root.sarif --source-folder apps/demo +``` +## codeql +```sh +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test + +# 1. scan root +/tmp/codeql/codeql database create /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-database-root --language=python --source-root=. +/tmp/codeql/codeql database analyze /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-database-root --format=sarif-latest --output=/home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-root.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-root.sarif --source-folder . + +# 1. scan subfolder +/tmp/codeql/codeql database create /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-database-subfolder --language=python --source-root=apps/demo +/tmp/codeql/codeql database analyze /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-database-subfolder --format=sarif-latest --output=/home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-subfolder.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-subfolder.sarif --source-folder apps/demo +``` \ No newline at end of file diff --git a/internal/sarif/sarif.go b/internal/sarif/sarif.go index 3951c0cd..f1958937 100644 --- a/internal/sarif/sarif.go +++ b/internal/sarif/sarif.go @@ -291,6 +291,9 @@ func (r Report) EnrichResultsLevelProperty() { } for _, result := range r.Runs[0].Results { + if result.Properties == nil { + result.Properties = make(map[string]interface{}) + } if rule, ok := rulesMap[*result.RuleID]; ok { if result.Properties["Level"] == nil { if result.Level != nil { diff --git a/internal/sarif/sarif_test.go b/internal/sarif/sarif_test.go new file mode 100644 index 00000000..76871111 --- /dev/null +++ b/internal/sarif/sarif_test.go @@ -0,0 +1,54 @@ +package sarif + +import ( + "testing" + + gosarif "github.com/owenrumney/go-sarif/v2/sarif" +) + +func TestEnrichResultsLevelPropertyInitialisesResultProperties(t *testing.T) { + ruleID := "CODEQL-0001" + + rule := &gosarif.ReportingDescriptor{ + ID: ruleID, + Properties: gosarif.Properties{ + "problem.severity": "warning", + }, + } + + result := &gosarif.Result{ + RuleID: &ruleID, + } + + report := Report{ + Report: &gosarif.Report{ + Version: string(gosarif.Version210), + Runs: []*gosarif.Run{ + { + Tool: gosarif.Tool{ + Driver: &gosarif.ToolComponent{ + Name: "CodeQL", + Rules: []*gosarif.ReportingDescriptor{rule}, + }, + }, + Results: []*gosarif.Result{result}, + }, + }, + }, + } + + report.EnrichResultsLevelProperty() + + if result.Properties == nil { + t.Fatalf("expected result properties to be initialised, but it was nil") + } + + level, ok := result.Properties["Level"] + if !ok { + t.Fatalf("expected Level property to be set on result properties") + } + + if level != "warning" { + t.Fatalf("expected Level property to be %q, got %v", "warning", level) + } +} From a8f75bde7ecf740804d5c20b07179cc9722e9363 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Fri, 10 Oct 2025 08:21:50 +0200 Subject: [PATCH 30/52] feat: ad rule id toissue body metadata --- cmd/sarif-issues/issue_processing.go | 45 +++++++++++++-------- cmd/sarif-issues/issue_processing_test.go | 49 +++++++++++++++++++++++ docs/reference/cmd-sarif-issues.md | 1 + 3 files changed, 79 insertions(+), 16 deletions(-) create mode 100644 cmd/sarif-issues/issue_processing_test.go diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go index 167b60ea..8b7113a6 100644 --- a/cmd/sarif-issues/issue_processing.go +++ b/cmd/sarif-issues/issue_processing.go @@ -51,18 +51,14 @@ type NewIssueData struct { } // parseIssueBody attempts to read the body produced by this command and extract -// known metadata from blockquote format lines. Only supports the new format: +// known metadata from blockquote format lines. Supports the new format: +// "> **Rule ID**: semgrep.rule.id" // "> **Severity**: Error, **Scanner**: Semgrep OSS" // "> **File**: app.py, **Lines**: 11-29" // Returns an OpenIssueReport with zero values when fields are missing. func parseIssueBody(body string) OpenIssueReport { rep := OpenIssueReport{} - // Extract rule ID from header format: "## 🐞 " - if rid := extractRuleIDFromBody(body); rid != "" { - rep.RuleID = rid - } - for _, line := range strings.Split(body, "\n") { line = strings.TrimSpace(line) @@ -88,6 +84,10 @@ func parseIssueBody(body string) OpenIssueReport { for _, part := range parts { segment := strings.TrimSpace(part) + if strings.HasPrefix(segment, "Rule ID:") { + rep.RuleID = strings.TrimSpace(strings.TrimPrefix(segment, "Rule ID:")) + continue + } if strings.HasPrefix(segment, "Severity:") { rep.Severity = strings.TrimSpace(strings.TrimPrefix(segment, "Severity:")) } else if strings.HasPrefix(segment, "Scanner:") { @@ -102,6 +102,12 @@ func parseIssueBody(body string) OpenIssueReport { } } } + // Prefer the metadata-provided Rule ID and fall back to the legacy header format. + if strings.TrimSpace(rep.RuleID) == "" { + if rid := extractRuleIDFromBody(body); rid != "" { + rep.RuleID = rid + } + } return rep } @@ -245,14 +251,26 @@ func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, s if endLine > line { linesDisp = fmt.Sprintf("%d-%d", line, endLine) } - meta := fmt.Sprintf( - "> **Severity**: %s, **Scanner**: %s\n> **File**: %s, **Lines**: %s\n", - sev, scannerDisp, fileDisp, linesDisp, - ) + var metaBuilder strings.Builder + if trimmedID := strings.TrimSpace(ruleID); trimmedID != "" { + metaBuilder.WriteString(fmt.Sprintf("> **Rule ID**: %s\n", trimmedID)) + } + metaBuilder.WriteString(fmt.Sprintf( + "> **Severity**: %s, **Scanner**: %s\n", sev, scannerDisp, + )) + metaBuilder.WriteString(fmt.Sprintf( + "> **File**: %s, **Lines**: %s\n", fileDisp, linesDisp, + )) + meta := metaBuilder.String() // Only use the new header and blockquote metadata body := header + meta + "\n" var references []string + // Append permalink if available + if link := buildGitHubPermalink(options, repoMetadata, fileURI, line, endLine); link != "" { + body += fmt.Sprintf("\n%s\n", link) + } + // Append rule help markdown if available if r, ok := rulesByID[ruleID]; ok && r != nil && r.Help != nil && r.Help.Markdown != nil { if hm := strings.TrimSpace(*r.Help.Markdown); hm != "" { @@ -266,11 +284,6 @@ func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, s } } - // Append permalink if available - if link := buildGitHubPermalink(options, repoMetadata, fileURI, line, endLine); link != "" { - body += fmt.Sprintf("\n%s\n", link) - } - // Append security identifier tags (CWE, OWASP) with links if available in rule properties if r, ok := rulesByID[ruleID]; ok && r != nil && r.Properties != nil { var tags []string @@ -306,7 +319,7 @@ func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, s newIssueData = append(newIssueData, NewIssueData{ Metadata: issuecorrelation.IssueMetadata{ - IssueID: "", + IssueID: ruleID, Scanner: scannerName, RuleID: ruleID, Severity: level, diff --git a/cmd/sarif-issues/issue_processing_test.go b/cmd/sarif-issues/issue_processing_test.go new file mode 100644 index 00000000..d8b9a5e2 --- /dev/null +++ b/cmd/sarif-issues/issue_processing_test.go @@ -0,0 +1,49 @@ +package sarifissues + +import "testing" + +func TestParseIssueBodyUsesMetadataRuleID(t *testing.T) { + body := ` +## 🐞 legacy-header + +> **Rule ID**: semgrep.python.django.security +> **Severity**: High, **Scanner**: Semgrep OSS +> **File**: app.py, **Lines**: 11-29 + +> **Snippet SHA256**: abcdef123456 +` + + rep := parseIssueBody(body) + if rep.RuleID != "semgrep.python.django.security" { + t.Fatalf("expected RuleID from metadata, got %q", rep.RuleID) + } + if rep.Scanner != "Semgrep OSS" { + t.Fatalf("expected scanner parsed from metadata, got %q", rep.Scanner) + } + if rep.Severity != "High" { + t.Fatalf("expected severity parsed from metadata, got %q", rep.Severity) + } + if rep.FilePath != "app.py" { + t.Fatalf("expected filepath parsed from metadata, got %q", rep.FilePath) + } + if rep.StartLine != 11 || rep.EndLine != 29 { + t.Fatalf("expected start/end lines 11/29, got %d/%d", rep.StartLine, rep.EndLine) + } + if rep.Hash != "abcdef123456" { + t.Fatalf("expected hash parsed from metadata, got %q", rep.Hash) + } +} + +func TestParseIssueBodyFallsBackToHeaderRuleID(t *testing.T) { + body := ` +## 🐞 fallback.rule.id + +> **Severity**: High, **Scanner**: Semgrep OSS +> **File**: main.py, **Lines**: 5-5 +` + + rep := parseIssueBody(body) + if rep.RuleID != "fallback.rule.id" { + t.Fatalf("expected RuleID parsed from header fallback, got %q", rep.RuleID) + } +} diff --git a/docs/reference/cmd-sarif-issues.md b/docs/reference/cmd-sarif-issues.md index 0e114d5a..2b56fab9 100644 --- a/docs/reference/cmd-sarif-issues.md +++ b/docs/reference/cmd-sarif-issues.md @@ -182,6 +182,7 @@ Only specific types of open issues are considered for correlation: **Compact Metadata (Blockquote)** ```markdown +> **Rule ID**: > **Severity**: High, **Scanner**: Semgrep OSS > **File**: app.js, **Lines**: 42-45 ``` From 5815b6e323366d9284477644e542c119315fd973 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Fri, 10 Oct 2025 08:33:18 +0200 Subject: [PATCH 31/52] feat: write rule shoty desecription in issue header instead of rule id --- cmd/sarif-issues/issue_processing.go | 29 ++++++++++++++-- cmd/sarif-issues/issue_processing_test.go | 41 ++++++++++++++++++++++- docs/reference/cmd-sarif-issues.md | 3 +- 3 files changed, 69 insertions(+), 4 deletions(-) diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go index 8b7113a6..45117be2 100644 --- a/cmd/sarif-issues/issue_processing.go +++ b/cmd/sarif-issues/issue_processing.go @@ -232,6 +232,11 @@ func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, s lg.Warn("SARIF run missing scanner/tool name, using fallback", "rule_id", ruleID) } + var ruleDescriptor *sarif.ReportingDescriptor + if r, ok := rulesByID[ruleID]; ok { + ruleDescriptor = r + } + sev := displaySeverity(level) // build body and title with scanner name label @@ -239,8 +244,8 @@ func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, s // New body header and compact metadata blockquote header := "" - if strings.TrimSpace(ruleID) != "" { - header = fmt.Sprintf("## 🐞 %s\n\n", ruleID) + if h := displayRuleHeading(ruleID, ruleDescriptor); strings.TrimSpace(h) != "" { + header = fmt.Sprintf("## 🐞 %s\n\n", h) } scannerDisp := scannerName if scannerDisp == "" { @@ -337,6 +342,26 @@ func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, s return newIssueData } +// displayRuleHeading returns the preferred human-friendly rule heading for the issue body: +// 1. rule.ShortDescription.Text when available. +// 2. rule.Name when available. +// 3. ruleID as a fallback. +func displayRuleHeading(ruleID string, rule *sarif.ReportingDescriptor) string { + if rule != nil { + if rule.ShortDescription != nil && rule.ShortDescription.Text != nil { + if heading := strings.TrimSpace(*rule.ShortDescription.Text); heading != "" { + return heading + } + } + if rule.Name != nil { + if heading := strings.TrimSpace(*rule.Name); heading != "" { + return heading + } + } + } + return strings.TrimSpace(ruleID) +} + // buildKnownIssuesFromOpen converts open GitHub issues into correlation metadata, // filtering for well-structured scanio-managed issues only. func buildKnownIssuesFromOpen(openIssues map[int]OpenIssueEntry, lg hclog.Logger) []issuecorrelation.IssueMetadata { diff --git a/cmd/sarif-issues/issue_processing_test.go b/cmd/sarif-issues/issue_processing_test.go index d8b9a5e2..ea1bd73e 100644 --- a/cmd/sarif-issues/issue_processing_test.go +++ b/cmd/sarif-issues/issue_processing_test.go @@ -1,6 +1,10 @@ package sarifissues -import "testing" +import ( + "testing" + + "github.com/owenrumney/go-sarif/v2/sarif" +) func TestParseIssueBodyUsesMetadataRuleID(t *testing.T) { body := ` @@ -47,3 +51,38 @@ func TestParseIssueBodyFallsBackToHeaderRuleID(t *testing.T) { t.Fatalf("expected RuleID parsed from header fallback, got %q", rep.RuleID) } } + +func TestDisplayRuleHeadingPrefersShortDescription(t *testing.T) { + text := "Short desc" + name := "Rule Name" + rule := &sarif.ReportingDescriptor{ + ShortDescription: &sarif.MultiformatMessageString{ + Text: &text, + }, + Name: &name, + } + + got := displayRuleHeading("rule.id", rule) + if got != "Short desc" { + t.Fatalf("expected short description heading, got %q", got) + } +} + +func TestDisplayRuleHeadingFallsBackToName(t *testing.T) { + name := "Rule Name" + rule := &sarif.ReportingDescriptor{ + Name: &name, + } + + got := displayRuleHeading("rule.id", rule) + if got != "Rule Name" { + t.Fatalf("expected name fallback heading, got %q", got) + } +} + +func TestDisplayRuleHeadingFallsBackToID(t *testing.T) { + got := displayRuleHeading("rule.id", nil) + if got != "rule.id" { + t.Fatalf("expected rule id fallback heading, got %q", got) + } +} diff --git a/docs/reference/cmd-sarif-issues.md b/docs/reference/cmd-sarif-issues.md index 2b56fab9..c06a3817 100644 --- a/docs/reference/cmd-sarif-issues.md +++ b/docs/reference/cmd-sarif-issues.md @@ -177,8 +177,9 @@ Only specific types of open issues are considered for correlation: **Header** ```markdown -## 🐞 +## 🐞 ``` +Scanio prefers the SARIF rule's short description for the heading; if that is missing it falls back to the rule name, then to the raw rule ID. **Compact Metadata (Blockquote)** ```markdown From fb6d5d8c153c4586e0432e8f3fb8ff62b5f3d634 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Fri, 10 Oct 2025 09:04:31 +0200 Subject: [PATCH 32/52] feat: take issue details from different fields --- cmd/sarif-issues/issue_processing.go | 44 +++++++++++++++-- cmd/sarif-issues/issue_processing_test.go | 59 +++++++++++++++++++++++ docs/reference/cmd-sarif-issues.md | 6 ++- 3 files changed, 102 insertions(+), 7 deletions(-) diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go index 45117be2..348efbe4 100644 --- a/cmd/sarif-issues/issue_processing.go +++ b/cmd/sarif-issues/issue_processing.go @@ -240,7 +240,8 @@ func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, s sev := displaySeverity(level) // build body and title with scanner name label - titleText := buildIssueTitle(scannerName, sev, ruleID, fileURI, line, endLine) + ruleTitleComponent := displayRuleTitleComponent(ruleID, ruleDescriptor) + titleText := buildIssueTitle(scannerName, sev, ruleTitleComponent, fileURI, line, endLine) // New body header and compact metadata blockquote header := "" @@ -276,10 +277,9 @@ func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, s body += fmt.Sprintf("\n%s\n", link) } - // Append rule help markdown if available - if r, ok := rulesByID[ruleID]; ok && r != nil && r.Help != nil && r.Help.Markdown != nil { - if hm := strings.TrimSpace(*r.Help.Markdown); hm != "" { - detail, helpRefs := parseRuleHelpMarkdown(hm) + // Append rule detail/help content + if ruleDescriptor != nil { + if detail, helpRefs := extractRuleDetail(ruleDescriptor); detail != "" || len(helpRefs) > 0 { if detail != "" { body += "\n\n" + detail } @@ -362,6 +362,40 @@ func displayRuleHeading(ruleID string, rule *sarif.ReportingDescriptor) string { return strings.TrimSpace(ruleID) } +// displayRuleTitleComponent returns the identifier segment to embed in the GitHub issue title. +// Prefers rule.Name when available; falls back to ruleID. +func displayRuleTitleComponent(ruleID string, rule *sarif.ReportingDescriptor) string { + if rule != nil && rule.Name != nil { + if name := strings.TrimSpace(*rule.Name); name != "" { + return name + } + } + return strings.TrimSpace(ruleID) +} + +// extractRuleDetail returns a detail string (markdown/plain) and optional reference links. +// Prefers rule.Help.Markdown when available; falls back to rule.FullDescription.Text. +func extractRuleDetail(rule *sarif.ReportingDescriptor) (string, []string) { + if rule == nil { + return "", nil + } + + if rule.Help != nil && rule.Help.Markdown != nil { + if hm := strings.TrimSpace(*rule.Help.Markdown); hm != "" { + if detail, refs := parseRuleHelpMarkdown(hm); strings.TrimSpace(detail) != "" || len(refs) > 0 { + return detail, refs + } + } + } + + if rule.FullDescription != nil && rule.FullDescription.Text != nil { + if fd := strings.TrimSpace(*rule.FullDescription.Text); fd != "" { + return fd, nil + } + } + return "", nil +} + // buildKnownIssuesFromOpen converts open GitHub issues into correlation metadata, // filtering for well-structured scanio-managed issues only. func buildKnownIssuesFromOpen(openIssues map[int]OpenIssueEntry, lg hclog.Logger) []issuecorrelation.IssueMetadata { diff --git a/cmd/sarif-issues/issue_processing_test.go b/cmd/sarif-issues/issue_processing_test.go index ea1bd73e..e7e0a0d4 100644 --- a/cmd/sarif-issues/issue_processing_test.go +++ b/cmd/sarif-issues/issue_processing_test.go @@ -86,3 +86,62 @@ func TestDisplayRuleHeadingFallsBackToID(t *testing.T) { t.Fatalf("expected rule id fallback heading, got %q", got) } } + +func TestDisplayRuleTitleComponentPrefersName(t *testing.T) { + name := "Descriptive Rule" + rule := &sarif.ReportingDescriptor{ + Name: &name, + } + got := displayRuleTitleComponent("rule.id", rule) + if got != "Descriptive Rule" { + t.Fatalf("expected rule name for title component, got %q", got) + } +} + +func TestDisplayRuleTitleComponentFallsBackToID(t *testing.T) { + got := displayRuleTitleComponent("rule.id", nil) + if got != "rule.id" { + t.Fatalf("expected rule id fallback for title component, got %q", got) + } +} + +func TestExtractRuleDetailPrefersHelpMarkdown(t *testing.T) { + markdown := "Detailed explanation" + rule := &sarif.ReportingDescriptor{ + Help: &sarif.MultiformatMessageString{ + Markdown: &markdown, + }, + } + + detail, refs := extractRuleDetail(rule) + if detail != "Detailed explanation" { + t.Fatalf("expected help markdown detail, got %q", detail) + } + if len(refs) != 0 { + t.Fatalf("expected no references for plain markdown, got %d", len(refs)) + } +} + +func TestExtractRuleDetailFallsBackToFullDescription(t *testing.T) { + full := "Full description text" + rule := &sarif.ReportingDescriptor{ + FullDescription: &sarif.MultiformatMessageString{ + Text: &full, + }, + } + + detail, refs := extractRuleDetail(rule) + if detail != "Full description text" { + t.Fatalf("expected full description fallback, got %q", detail) + } + if refs != nil { + t.Fatalf("expected nil references for full description fallback, got %#v", refs) + } +} + +func TestExtractRuleDetailEmptyWhenNoContent(t *testing.T) { + detail, refs := extractRuleDetail(nil) + if detail != "" || refs != nil { + t.Fatalf("expected empty detail and nil refs, got %q %#v", detail, refs) + } +} diff --git a/docs/reference/cmd-sarif-issues.md b/docs/reference/cmd-sarif-issues.md index c06a3817..16f81d6a 100644 --- a/docs/reference/cmd-sarif-issues.md +++ b/docs/reference/cmd-sarif-issues.md @@ -169,9 +169,10 @@ Only specific types of open issues are considered for correlation: ### Issue Title Format ``` -[][][] at :[-] +[][][] at :[-] ``` -**Example**: `[Semgrep OSS][High][javascript.express.security.audit.express-check-csurf-middleware-usage.express-check-csurf-middleware-usage] at app.js:42-45` +**Example**: `[Semgrep OSS][High][Express Missing CSRF Protection] at app.js:42-45` +When a rule provides a human-friendly `name`, Scanio uses it; otherwise the rule ID is shown. ### Issue Body Structure @@ -191,6 +192,7 @@ Scanio prefers the SARIF rule's short description for the heading; if that is mi **Rule Description** - Includes help text from SARIF rule definitions - Parses and formats reference links +- Falls back to the rule's full description when markdown help is not available **GitHub Permalink** - Direct link to vulnerable code in repository From 0c82e4a7c3665d5228f811fd5e066b0bcbf01acc Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sat, 11 Oct 2025 08:42:53 +0200 Subject: [PATCH 33/52] feat: enhance SARIF message formatting to include descriptions and hyperlinks for CodeQL and Snyk styles --- .cursorrules | 1 + cmd/sarif-issues/issue_processing.go | 13 + cmd/sarif-issues/utils_test.go | 376 +++++++++++++++++++ internal/sarif/message_formatter.go | 379 +++++++++++++++++++ internal/sarif/message_formatter_test.go | 457 +++++++++++++++++++++++ 5 files changed, 1226 insertions(+) create mode 100644 internal/sarif/message_formatter.go create mode 100644 internal/sarif/message_formatter_test.go diff --git a/.cursorrules b/.cursorrules index cfcd570d..e0ddac32 100644 --- a/.cursorrules +++ b/.cursorrules @@ -4,6 +4,7 @@ - When implementing new features or making changes to the codebase, always check the `docs/engineering/` directory first for established patterns and guidelines: - When in doubt about implementation details, always refer to the engineering documentation first, then examine similar existing implementations in the codebase. - Try to reuse internal packages if relevant. Extend if required functionality does not exist. +- Don't use `data` folder in tests, it will not be available in other environment. But feel free to read content to make proper mocks. ## Commands - Build cli with: `make build-cli` diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go index 348efbe4..c7fc030f 100644 --- a/cmd/sarif-issues/issue_processing.go +++ b/cmd/sarif-issues/issue_processing.go @@ -277,6 +277,19 @@ func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, s body += fmt.Sprintf("\n%s\n", link) } + // Add formatted result message if available + if res.Message.Markdown != nil || res.Message.Text != nil { + formatOpts := internalsarif.MessageFormatOptions{ + Namespace: options.Namespace, + Repository: options.Repository, + Ref: options.Ref, + SourceFolder: sourceFolderAbs, + } + if formatted := internalsarif.FormatResultMessage(res, repoMetadata, formatOpts); formatted != "" { + body += fmt.Sprintf("\n\n### Description\n\n%s\n", formatted) + } + } + // Append rule detail/help content if ruleDescriptor != nil { if detail, helpRefs := extractRuleDetail(ruleDescriptor); detail != "" || len(helpRefs) > 0 { diff --git a/cmd/sarif-issues/utils_test.go b/cmd/sarif-issues/utils_test.go index 7b5589a5..8f156011 100644 --- a/cmd/sarif-issues/utils_test.go +++ b/cmd/sarif-issues/utils_test.go @@ -1169,3 +1169,379 @@ func TestApplyEnvironmentFallbacksEdgeCases(t *testing.T) { } }) } + +func TestBuildIssueBodyWithCodeQLMessage(t *testing.T) { + // Test integration with CodeQL-style SARIF data using mocks + tempDir, err := os.MkdirTemp("", "sarif_codeql_test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create test file structure + repoRoot := filepath.Join(tempDir, "scanio-test") + subfolder := filepath.Join(repoRoot, "apps", "demo") + if err := os.MkdirAll(subfolder, 0o755); err != nil { + t.Fatalf("Failed to create repo subfolder: %v", err) + } + + mainFile := filepath.Join(subfolder, "main.py") + if err := os.WriteFile(mainFile, []byte("line 1\nline 2\nline 3\n"), 0o644); err != nil { + t.Fatalf("Failed to write main.py: %v", err) + } + + logger := hclog.NewNullLogger() + commit := "aec0b795c350ff53fe9ab01adf862408aa34c3fd" + + metadata := &git.RepositoryMetadata{ + RepoRootFolder: repoRoot, + Subfolder: filepath.ToSlash(filepath.Join("apps", "demo")), + CommitHash: &commit, + } + + options := RunOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: commit, + SourceFolder: subfolder, + } + + // Create mock SARIF report with CodeQL-style message + sarifReport := &sarif.Report{ + Runs: []*sarif.Run{ + { + Tool: sarif.Tool{ + Driver: &sarif.ToolComponent{ + Rules: []*sarif.ReportingDescriptor{ + { + ID: "py/template-injection", + Properties: map[string]interface{}{ + "problem.severity": "error", + }, + }, + }, + }, + }, + Results: []*sarif.Result{ + { + RuleID: stringPtr("py/template-injection"), + Level: stringPtr("error"), + Message: sarif.Message{ + Text: stringPtr("This template construction depends on a [user-provided value](1)."), + }, + RelatedLocations: []*sarif.Location{ + { + Id: uintPtr(1), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(1), + StartColumn: intPtr(50), + EndLine: intPtr(1), + EndColumn: intPtr(57), + }, + }, + }, + }, + Locations: []*sarif.Location{ + { + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(10), + StartColumn: intPtr(5), + EndLine: intPtr(10), + EndColumn: intPtr(15), + }, + }, + }, + }, + }, + }, + }, + }, + } + + report := &internalsarif.Report{ + Report: sarifReport, + } + + // Enrich results with level property + report.EnrichResultsLevelProperty() + + // Process SARIF results + issues := buildNewIssuesFromSARIF(report, options, subfolder, metadata, logger) + + // Verify that formatted messages are included in issue bodies + if len(issues) == 0 { + t.Fatalf("Expected at least one issue, got 0") + } + + issue := issues[0] + if !strings.Contains(issue.Body, "### Description") { + t.Errorf("Expected issue body to contain '### Description' section") + } + + // Check that the formatted message contains a hyperlink + if !strings.Contains(issue.Body, "https://github.com/test-org/test-repo/blob/") { + t.Errorf("Expected issue body to contain GitHub permalink") + } + + // Check for CodeQL-style formatting (single reference) + if !strings.Contains(issue.Body, "[user-provided value](") { + t.Errorf("Expected issue body to contain CodeQL-style formatted reference") + } +} + +func TestBuildIssueBodyWithSnykMessage(t *testing.T) { + // Test integration with Snyk-style SARIF data using mocks + tempDir, err := os.MkdirTemp("", "sarif_snyk_test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create test file structure + repoRoot := filepath.Join(tempDir, "scanio-test") + subfolder := filepath.Join(repoRoot, "apps", "demo") + if err := os.MkdirAll(subfolder, 0o755); err != nil { + t.Fatalf("Failed to create repo subfolder: %v", err) + } + + mainFile := filepath.Join(subfolder, "main.py") + if err := os.WriteFile(mainFile, []byte("line 1\nline 2\nline 3\n"), 0o644); err != nil { + t.Fatalf("Failed to write main.py: %v", err) + } + + logger := hclog.NewNullLogger() + commit := "aec0b795c350ff53fe9ab01adf862408aa34c3fd" + + metadata := &git.RepositoryMetadata{ + RepoRootFolder: repoRoot, + Subfolder: filepath.ToSlash(filepath.Join("apps", "demo")), + CommitHash: &commit, + } + + options := RunOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: commit, + SourceFolder: subfolder, + } + + // Create mock SARIF report with Snyk-style message + sarifReport := &sarif.Report{ + Runs: []*sarif.Run{ + { + Tool: sarif.Tool{ + Driver: &sarif.ToolComponent{ + Rules: []*sarif.ReportingDescriptor{ + { + ID: "python/Ssti", + Properties: map[string]interface{}{ + "problem.severity": "error", + }, + }, + }, + }, + }, + Results: []*sarif.Result{ + { + RuleID: stringPtr("python/Ssti"), + Level: stringPtr("error"), + Message: sarif.Message{ + Markdown: stringPtr("Unsanitized input from {0} {1} into {2}, where it is used to render an HTML page returned to the user. This may result in a Cross-Site Scripting attack (XSS)."), + Arguments: []string{ + "[an HTTP parameter](0)", + "[flows](1),(2),(3),(4),(5),(6)", + "[flask.render_template_string](7)", + }, + }, + RelatedLocations: []*sarif.Location{ + { + Id: uintPtr(0), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(1), + StartColumn: intPtr(50), + EndLine: intPtr(1), + EndColumn: intPtr(57), + }, + }, + }, + { + Id: uintPtr(1), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(8), + StartColumn: intPtr(18), + EndLine: intPtr(8), + EndColumn: intPtr(25), + }, + }, + }, + { + Id: uintPtr(2), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(8), + StartColumn: intPtr(18), + EndLine: intPtr(8), + EndColumn: intPtr(30), + }, + }, + }, + { + Id: uintPtr(3), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(8), + StartColumn: intPtr(18), + EndLine: intPtr(8), + EndColumn: intPtr(46), + }, + }, + }, + { + Id: uintPtr(4), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(8), + StartColumn: intPtr(5), + EndLine: intPtr(8), + EndColumn: intPtr(15), + }, + }, + }, + { + Id: uintPtr(5), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(11), + StartColumn: intPtr(5), + EndLine: intPtr(11), + EndColumn: intPtr(13), + }, + }, + }, + { + Id: uintPtr(6), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(29), + StartColumn: intPtr(35), + EndLine: intPtr(29), + EndColumn: intPtr(43), + }, + }, + }, + { + Id: uintPtr(7), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(29), + StartColumn: intPtr(12), + EndLine: intPtr(29), + EndColumn: intPtr(44), + }, + }, + }, + }, + Locations: []*sarif.Location{ + { + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(29), + StartColumn: intPtr(12), + EndLine: intPtr(29), + EndColumn: intPtr(44), + }, + }, + }, + }, + }, + }, + }, + }, + } + + report := &internalsarif.Report{ + Report: sarifReport, + } + + // Enrich results with level property + report.EnrichResultsLevelProperty() + + // Process SARIF results + issues := buildNewIssuesFromSARIF(report, options, subfolder, metadata, logger) + + // Verify that formatted messages are included in issue bodies + if len(issues) == 0 { + t.Fatalf("Expected at least one issue, got 0") + } + + issue := issues[0] + + if !strings.Contains(issue.Body, "### Description") { + t.Errorf("Expected issue body to contain '### Description' section") + } + + // Check that the formatted message contains hyperlinks + if !strings.Contains(issue.Body, "https://github.com/test-org/test-repo/blob/") { + t.Errorf("Expected issue body to contain GitHub permalink") + } + + // Check for flow chain formatting (multiple references) + if !strings.Contains(issue.Body, " > ") { + t.Errorf("Expected issue body to contain flow chain formatting") + } + + // Check for Snyk-style formatting (multiple references in flow chain) + if !strings.Contains(issue.Body, "flows (") { + t.Errorf("Expected issue body to contain Snyk-style formatted flow reference") + } +} + +// Helper functions for creating test data +func stringPtr(s string) *string { + return &s +} + +func intPtr(i int) *int { + return &i +} + +func uintPtr(u uint) *uint { + return &u +} diff --git a/internal/sarif/message_formatter.go b/internal/sarif/message_formatter.go new file mode 100644 index 00000000..afdc6a7b --- /dev/null +++ b/internal/sarif/message_formatter.go @@ -0,0 +1,379 @@ +package sarif + +import ( + "fmt" + "path/filepath" + "regexp" + "strconv" + "strings" + + "github.com/owenrumney/go-sarif/v2/sarif" + "github.com/scan-io-git/scan-io/internal/git" +) + +// MessageFormatOptions contains the configuration needed to format SARIF messages with GitHub links +type MessageFormatOptions struct { + Namespace string + Repository string + Ref string + SourceFolder string +} + +// FormatResultMessage is the main entry point for formatting SARIF result messages +// It processes the message template, substitutes arguments, and converts location references to GitHub hyperlinks +func FormatResultMessage(result *sarif.Result, repoMetadata *git.RepositoryMetadata, options MessageFormatOptions) string { + // Extract locations for reference lookup + locations := extractLocationsForFormatting(result) + if len(locations) == 0 { + // No locations available, return plain text message + if result.Message.Text != nil { + return *result.Message.Text + } + return "" + } + + // Check if this is a CodeQL-style message (direct markdown in text field) + if result.Message.Text != nil && result.Message.Markdown == nil && len(result.Message.Arguments) == 0 { + return formatCodeQLStyleMessage(*result.Message.Text, result, repoMetadata, options) + } + + // Format the message with arguments and location links (Snyk style) + formatted := formatMessageWithArguments(&result.Message, locations, repoMetadata, options) + if formatted != "" { + return formatted + } + + // Fallback to plain text + if result.Message.Text != nil { + return *result.Message.Text + } + + return "" +} + +// formatCodeQLStyleMessage handles CodeQL-style messages where the text contains direct markdown links +// Example: "This template construction depends on a [user-provided value](1)." +func formatCodeQLStyleMessage(text string, result *sarif.Result, repoMetadata *git.RepositoryMetadata, options MessageFormatOptions) string { + // Pattern to match [text](id) where id is a number + pattern := regexp.MustCompile(`\[([^\]]+)\]\((\d+)\)`) + + return pattern.ReplaceAllStringFunc(text, func(match string) string { + matches := pattern.FindStringSubmatch(match) + if len(matches) != 3 { + return match // Return original if pattern doesn't match + } + + linkText := matches[1] + idStr := matches[2] + + // Convert id to integer (CodeQL uses 1-based indexing) + id, err := strconv.Atoi(idStr) + if err != nil { + return match // Return original if id is not a number + } + + // Find the relatedLocation with matching id + var targetLocation *sarif.Location + for _, relLoc := range result.RelatedLocations { + if relLoc.Id != nil && *relLoc.Id == uint(id) { + // Create a Location from RelatedLocation + targetLocation = &sarif.Location{ + PhysicalLocation: relLoc.PhysicalLocation, + } + break + } + } + + if targetLocation != nil { + link := buildLocationLink(targetLocation, repoMetadata, options) + if link != "" { + return fmt.Sprintf("[%s](%s)", linkText, link) + } + } + + // If we can't build a link, return the original text without the reference + return linkText + }) +} + +// extractLocationsForFormatting extracts locations from SARIF result in priority order: +// 1) relatedLocations, 2) codeFlows[0].threadFlows[0].locations, 3) empty array +func extractLocationsForFormatting(result *sarif.Result) []*sarif.Location { + var locations []*sarif.Location + + // Priority 1: relatedLocations + if len(result.RelatedLocations) > 0 { + for _, relLoc := range result.RelatedLocations { + if relLoc != nil { + locations = append(locations, relLoc) + } + } + return locations + } + + // Priority 2: codeFlows[0].threadFlows[0].locations + if len(result.CodeFlows) > 0 && len(result.CodeFlows[0].ThreadFlows) > 0 { + threadFlow := result.CodeFlows[0].ThreadFlows[0] + for _, threadLoc := range threadFlow.Locations { + if threadLoc.Location != nil { + locations = append(locations, threadLoc.Location) + } + } + return locations + } + + // Priority 3: empty array (fallback) + return locations +} + +// formatMessageWithArguments processes the message template, substitutes placeholders, and converts location references +func formatMessageWithArguments(message *sarif.Message, locations []*sarif.Location, repoMetadata *git.RepositoryMetadata, options MessageFormatOptions) string { + // Use markdown template if available, otherwise fall back to text + template := "" + if message.Markdown != nil { + template = *message.Markdown + } else if message.Text != nil { + template = *message.Text + } else { + return "" + } + + // If no arguments, return template as-is + if len(message.Arguments) == 0 { + return template + } + + // Process each argument and substitute placeholders + result := template + for i, arg := range message.Arguments { + placeholder := fmt.Sprintf("{%d}", i) + + // Parse the argument to extract text and location references + text, refs := parseLocationReference(arg) + + // Convert location references to hyperlinks + formattedArg := formatLocationReferences(text, refs, locations, repoMetadata, options) + + // Substitute the placeholder + result = strings.ReplaceAll(result, placeholder, formattedArg) + } + + return result +} + +// parseLocationReference parses SARIF message arguments to extract text and location reference numbers +// Examples: +// +// "[user-provided value](1)" -> text="user-provided value", refs=[1] +// "[flows](1),(2),(3),(4),(5),(6)" -> text="flows", refs=[1,2,3,4,5,6] +func parseLocationReference(arg string) (text string, refs []int) { + // Pattern to match [text](ref1),(ref2),... + // This handles both single references and multiple references + pattern := regexp.MustCompile(`^\[([^\]]+)\]\((.+)\)$`) + matches := pattern.FindStringSubmatch(arg) + + if len(matches) != 3 { + // Malformed argument, return as-is + return arg, nil + } + + text = matches[1] + refsStr := matches[2] + + // Parse reference numbers (handle both single and multiple) + // The format is like "1),(2),(3),(4),(5),(6" - we need to extract numbers + refParts := strings.Split(refsStr, "),(") + for _, part := range refParts { + part = strings.TrimSpace(part) + // Remove any remaining parentheses + part = strings.Trim(part, "()") + if refNum, err := strconv.Atoi(part); err == nil { + refs = append(refs, refNum) + } + } + + return text, refs +} + +// formatLocationReferences converts location reference numbers to GitHub hyperlinks +func formatLocationReferences(text string, refs []int, locations []*sarif.Location, repoMetadata *git.RepositoryMetadata, options MessageFormatOptions) string { + if len(refs) == 0 { + return text + } + + // Build links for each reference + var links []string + for _, ref := range refs { + if ref >= 0 && ref < len(locations) { + link := buildLocationLink(locations[ref], repoMetadata, options) + if link != "" { + links = append(links, fmt.Sprintf("[%d](%s)", ref, link)) + } else { + links = append(links, fmt.Sprintf("%d", ref)) + } + } else { + // Invalid reference, use as-is + links = append(links, fmt.Sprintf("%d", ref)) + } + } + + // Format based on number of references + if len(refs) == 1 { + // Single reference: "[text](link)" + if refs[0] >= 0 && refs[0] < len(locations) { + link := buildLocationLink(locations[refs[0]], repoMetadata, options) + if link != "" { + return fmt.Sprintf("[%s](%s)", text, link) + } else { + return fmt.Sprintf("%s (%d)", text, refs[0]) + } + } else { + return fmt.Sprintf("%s (%d)", text, refs[0]) + } + } else { + // Multiple references: "text ([1](link1) > [2](link2) > ...)" + linkChain := strings.Join(links, " > ") + return fmt.Sprintf("%s (%s)", text, linkChain) + } +} + +// buildLocationLink constructs a GitHub permalink for a SARIF location +func buildLocationLink(location *sarif.Location, repoMetadata *git.RepositoryMetadata, options MessageFormatOptions) string { + if location.PhysicalLocation == nil || location.PhysicalLocation.ArtifactLocation == nil { + return "" + } + + artifact := location.PhysicalLocation.ArtifactLocation + if artifact.URI == nil { + return "" + } + + // Get file path and convert to repository-relative path + filePath := *artifact.URI + repoPath := convertToRepoRelativePath(filePath, repoMetadata, options.SourceFolder) + + // Get line information + region := location.PhysicalLocation.Region + if region == nil { + return "" + } + + startLine := 1 + endLine := 1 + + if region.StartLine != nil { + startLine = *region.StartLine + } + if region.EndLine != nil { + endLine = *region.EndLine + } else { + endLine = startLine + } + + // Build GitHub permalink + // Format: https://github.com/{namespace}/{repo}/blob/{ref}/{file}#L{start}-L{end} + baseURL := fmt.Sprintf("https://github.com/%s/%s/blob/%s/%s", + options.Namespace, options.Repository, options.Ref, repoPath) + + if startLine == endLine { + return fmt.Sprintf("%s#L%d", baseURL, startLine) + } else { + return fmt.Sprintf("%s#L%d-L%d", baseURL, startLine, endLine) + } +} + +// convertToRepoRelativePath converts a SARIF artifact URI to a repository-relative path +// This mimics the logic from extractFileURIFromResult in cmd/sarif-issues/utils.go +func convertToRepoRelativePath(rawURI string, repoMetadata *git.RepositoryMetadata, sourceFolder string) string { + rawURI = strings.TrimSpace(rawURI) + if rawURI == "" { + return "" + } + + repoPath := "" + subfolder := normalisedSubfolder(repoMetadata) + var repoRoot string + if repoMetadata != nil && strings.TrimSpace(repoMetadata.RepoRootFolder) != "" { + repoRoot = filepath.Clean(repoMetadata.RepoRootFolder) + } + absSource := strings.TrimSpace(sourceFolder) + if absSource != "" { + if abs, err := filepath.Abs(absSource); err == nil { + absSource = abs + } else { + absSource = filepath.Clean(absSource) + } + } + + // Normalise URI to the host OS path representation + osURI := filepath.FromSlash(rawURI) + osURI = strings.TrimPrefix(osURI, "file://") + cleanURI := filepath.Clean(osURI) + + if filepath.IsAbs(cleanURI) { + localPath := cleanURI + if repoRoot != "" { + if rel, err := filepath.Rel(repoRoot, localPath); err == nil { + if rel != "." && !strings.HasPrefix(rel, "..") { + repoPath = filepath.ToSlash(rel) + } + } + } + if repoPath == "" && absSource != "" { + if rel, err := filepath.Rel(absSource, localPath); err == nil { + repoPath = filepath.ToSlash(rel) + } + } + if repoPath == "" { + repoPath = filepath.ToSlash(strings.TrimPrefix(localPath, string(filepath.Separator))) + } + } else { + localPath := resolveRelativeLocalPath(cleanURI, repoRoot, subfolder, absSource) + + if repoRoot != "" && localPath != "" && pathWithin(localPath, repoRoot) { + if rel, err := filepath.Rel(repoRoot, localPath); err == nil { + if rel != "." { + repoPath = filepath.ToSlash(rel) + } + } + } + + if repoPath == "" && subfolder != "" { + repoPath = filepath.ToSlash(filepath.Join(subfolder, cleanURI)) + } + if repoPath == "" { + repoPath = filepath.ToSlash(cleanURI) + } + } + + return repoPath +} + +// normalisedSubfolder extracts and normalizes the subfolder from repository metadata +func normalisedSubfolder(md *git.RepositoryMetadata) string { + if md == nil { + return "" + } + sub := strings.Trim(md.Subfolder, "/\\") + return filepath.ToSlash(sub) +} + +// resolveRelativeLocalPath resolves a relative URI to a local filesystem path +func resolveRelativeLocalPath(cleanURI, repoRoot, subfolder, absSource string) string { + if repoRoot != "" { + if subfolder != "" { + return filepath.Join(repoRoot, subfolder, cleanURI) + } + return filepath.Join(repoRoot, cleanURI) + } + if absSource != "" { + return filepath.Join(absSource, cleanURI) + } + return cleanURI +} + +// pathWithin checks if a path is within another path +func pathWithin(path, within string) bool { + rel, err := filepath.Rel(within, path) + return err == nil && !strings.HasPrefix(rel, "..") +} diff --git a/internal/sarif/message_formatter_test.go b/internal/sarif/message_formatter_test.go new file mode 100644 index 00000000..90df32c0 --- /dev/null +++ b/internal/sarif/message_formatter_test.go @@ -0,0 +1,457 @@ +package sarif + +import ( + "testing" + + "github.com/owenrumney/go-sarif/v2/sarif" + "github.com/scan-io-git/scan-io/internal/git" +) + +func TestFormatMessageWithSingleReference(t *testing.T) { + // Test CodeQL style single reference: "[user-provided value](1)" + message := &sarif.Message{ + Markdown: stringPtr("This template construction depends on a {0}."), + Arguments: []string{ + "[user-provided value](0)", + }, + } + + locations := []*sarif.Location{ + createTestLocation("main.py", 1, 50, 1, 57), + } + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/source", + CommitHash: stringPtr("abc123"), + } + + options := MessageFormatOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "main", + SourceFolder: "/test/source", + } + + result := formatMessageWithArguments(message, locations, repoMetadata, options) + expected := "This template construction depends on a [user-provided value](https://github.com/test-org/test-repo/blob/main/main.py#L1)." + + if result != expected { + t.Errorf("Expected: %s\nGot: %s", expected, result) + } +} + +func TestFormatMessageWithMultipleReferences(t *testing.T) { + // Test Snyk style multiple references: "[flows](1),(2),(3),(4),(5),(6)" + message := &sarif.Message{ + Markdown: stringPtr("Unsanitized input from {0} {1} into {2}, where it is used to render an HTML page returned to the user. This may result in a Cross-Site Scripting attack (XSS)."), + Arguments: []string{ + "[an HTTP parameter](0)", + "[flows](1),(2),(3),(4),(5),(6)", + "[flask.render_template_string](7)", + }, + } + + locations := []*sarif.Location{ + createTestLocation("main.py", 1, 50, 1, 57), // 0 + createTestLocation("main.py", 8, 18, 8, 25), // 1 + createTestLocation("main.py", 8, 18, 8, 30), // 2 + createTestLocation("main.py", 8, 18, 8, 46), // 3 + createTestLocation("main.py", 8, 5, 8, 15), // 4 + createTestLocation("main.py", 11, 5, 11, 13), // 5 + createTestLocation("main.py", 29, 35, 29, 43), // 6 + createTestLocation("main.py", 29, 12, 29, 44), // 7 + } + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/source", + CommitHash: stringPtr("abc123"), + } + + options := MessageFormatOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "main", + SourceFolder: "/test/source", + } + + result := formatMessageWithArguments(message, locations, repoMetadata, options) + expected := "Unsanitized input from [an HTTP parameter](https://github.com/test-org/test-repo/blob/main/main.py#L1) flows ([1](https://github.com/test-org/test-repo/blob/main/main.py#L8) > [2](https://github.com/test-org/test-repo/blob/main/main.py#L8) > [3](https://github.com/test-org/test-repo/blob/main/main.py#L8) > [4](https://github.com/test-org/test-repo/blob/main/main.py#L8) > [5](https://github.com/test-org/test-repo/blob/main/main.py#L11) > [6](https://github.com/test-org/test-repo/blob/main/main.py#L29)) into [flask.render_template_string](https://github.com/test-org/test-repo/blob/main/main.py#L29), where it is used to render an HTML page returned to the user. This may result in a Cross-Site Scripting attack (XSS)." + + if result != expected { + t.Errorf("Expected: %s\nGot: %s", expected, result) + } +} + +func TestFormatMessageWithPlaceholders(t *testing.T) { + // Test template with placeholders + message := &sarif.Message{ + Markdown: stringPtr("Input from {0} flows to {1}"), + Arguments: []string{ + "[user input](0)", + "[template](1)", + }, + } + + locations := []*sarif.Location{ + createTestLocation("main.py", 1, 1, 1, 10), + createTestLocation("main.py", 2, 1, 2, 10), + } + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/source", + CommitHash: stringPtr("abc123"), + } + + options := MessageFormatOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "main", + SourceFolder: "/test/source", + } + + result := formatMessageWithArguments(message, locations, repoMetadata, options) + expected := "Input from [user input](https://github.com/test-org/test-repo/blob/main/main.py#L1) flows to [template](https://github.com/test-org/test-repo/blob/main/main.py#L2)" + + if result != expected { + t.Errorf("Expected: %s\nGot: %s", expected, result) + } +} + +func TestFormatMessageNoMarkdown(t *testing.T) { + // Test fallback to plain text when no markdown template + message := &sarif.Message{ + Text: stringPtr("Plain text message without formatting"), + } + + locations := []*sarif.Location{} + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/source", + CommitHash: stringPtr("abc123"), + } + + options := MessageFormatOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "main", + SourceFolder: "/test/source", + } + + result := formatMessageWithArguments(message, locations, repoMetadata, options) + expected := "Plain text message without formatting" + + if result != expected { + t.Errorf("Expected: %s\nGot: %s", expected, result) + } +} + +func TestFormatMessageMissingLocations(t *testing.T) { + // Test graceful degradation when locations are missing + message := &sarif.Message{ + Markdown: stringPtr("Input from {0} flows to {1}"), + Arguments: []string{ + "[user input](0)", + "[template](1)", + }, + } + + locations := []*sarif.Location{} // Empty locations + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/source", + CommitHash: stringPtr("abc123"), + } + + options := MessageFormatOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "main", + SourceFolder: "/test/source", + } + + result := formatMessageWithArguments(message, locations, repoMetadata, options) + expected := "Input from user input (0) flows to template (1)" // References show as plain text with numbers + + if result != expected { + t.Errorf("Expected: %s\nGot: %s", expected, result) + } +} + +func TestExtractLocationsFromRelatedLocations(t *testing.T) { + // Test priority 1: relatedLocations + result := &sarif.Result{ + RelatedLocations: []*sarif.Location{ + createTestLocation("main.py", 1, 50, 1, 57), + }, + } + + locations := extractLocationsForFormatting(result) + + if len(locations) != 1 { + t.Errorf("Expected 1 location, got %d", len(locations)) + } + + if locations[0].PhysicalLocation.ArtifactLocation.URI == nil || *locations[0].PhysicalLocation.ArtifactLocation.URI != "main.py" { + t.Errorf("Expected location URI 'main.py', got %v", locations[0].PhysicalLocation.ArtifactLocation.URI) + } +} + +func TestExtractLocationsFromCodeFlows(t *testing.T) { + // Test priority 2: codeFlows fallback + result := &sarif.Result{ + CodeFlows: []*sarif.CodeFlow{ + { + ThreadFlows: []*sarif.ThreadFlow{ + { + Locations: []*sarif.ThreadFlowLocation{ + { + Location: createTestLocation("main.py", 1, 50, 1, 57), + }, + { + Location: createTestLocation("main.py", 8, 18, 8, 25), + }, + }, + }, + }, + }, + }, + } + + locations := extractLocationsForFormatting(result) + + if len(locations) != 2 { + t.Errorf("Expected 2 locations, got %d", len(locations)) + } +} + +func TestExtractLocationsEmpty(t *testing.T) { + // Test priority 3: empty fallback + result := &sarif.Result{} + + locations := extractLocationsForFormatting(result) + + if len(locations) != 0 { + t.Errorf("Expected 0 locations, got %d", len(locations)) + } +} + +func TestParseLocationReference(t *testing.T) { + tests := []struct { + name string + input string + expected string + refs []int + }{ + { + name: "single reference", + input: "[user-provided value](1)", + expected: "user-provided value", + refs: []int{1}, + }, + { + name: "multiple references", + input: "[flows](1),(2),(3),(4),(5),(6)", + expected: "flows", + refs: []int{1, 2, 3, 4, 5, 6}, + }, + { + name: "malformed input", + input: "plain text without brackets", + expected: "plain text without brackets", + refs: nil, + }, + { + name: "invalid reference numbers", + input: "[text](abc),(def)", + expected: "text", + refs: []int{}, // Invalid numbers are skipped + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + text, refs := parseLocationReference(tt.input) + if text != tt.expected { + t.Errorf("Expected text '%s', got '%s'", tt.expected, text) + } + if len(refs) != len(tt.refs) { + t.Errorf("Expected %d refs, got %d", len(tt.refs), len(refs)) + } + for i, ref := range refs { + if ref != tt.refs[i] { + t.Errorf("Expected ref[%d] = %d, got %d", i, tt.refs[i], ref) + } + } + }) + } +} + +func TestBuildLocationLink(t *testing.T) { + location := createTestLocation("main.py", 10, 5, 10, 15) + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/source", + CommitHash: stringPtr("abc123"), + } + + options := MessageFormatOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "main", + SourceFolder: "/test/source", + } + + result := buildLocationLink(location, repoMetadata, options) + expected := "https://github.com/test-org/test-repo/blob/main/main.py#L10" + + if result != expected { + t.Errorf("Expected: %s\nGot: %s", expected, result) + } +} + +func TestBuildLocationLinkRange(t *testing.T) { + location := createTestLocation("main.py", 10, 5, 15, 20) + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/source", + CommitHash: stringPtr("abc123"), + } + + options := MessageFormatOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "main", + SourceFolder: "/test/source", + } + + result := buildLocationLink(location, repoMetadata, options) + expected := "https://github.com/test-org/test-repo/blob/main/main.py#L10-L15" + + if result != expected { + t.Errorf("Expected: %s\nGot: %s", expected, result) + } +} + +func TestBuildLocationLinkAbsolutePath(t *testing.T) { + location := createTestLocation("/test/source/main.py", 10, 5, 10, 15) + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/source", + CommitHash: stringPtr("abc123"), + } + + options := MessageFormatOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "main", + SourceFolder: "/test/source", + } + + result := buildLocationLink(location, repoMetadata, options) + expected := "https://github.com/test-org/test-repo/blob/main/main.py#L10" + + if result != expected { + t.Errorf("Expected: %s\nGot: %s", expected, result) + } +} + +func TestBuildLocationLinkWithSubfolder(t *testing.T) { + // Test the specific case mentioned in the issue: subfolder path resolution + location := createTestLocation("main.py", 34, 1, 34, 10) + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/tmp/scanio-test", + Subfolder: "apps/demo", + CommitHash: stringPtr("aec0b795c350ff53fe9ab01adf862408aa34c3fd"), + } + + options := MessageFormatOptions{ + Namespace: "scan-io-git", + Repository: "scanio-test", + Ref: "aec0b795c350ff53fe9ab01adf862408aa34c3fd", + SourceFolder: "/tmp/scanio-test/apps/demo", + } + + result := buildLocationLink(location, repoMetadata, options) + expected := "https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py#L34" + + if result != expected { + t.Errorf("Expected: %s\nGot: %s", expected, result) + } +} + +func TestFormatCodeQLStyleMessage(t *testing.T) { + // Test CodeQL style message with direct markdown links + message := &sarif.Message{ + Text: stringPtr("This template construction depends on a [user-provided value](1)."), + } + + result := &sarif.Result{ + Message: *message, + RelatedLocations: []*sarif.Location{ + { + Id: uintPtr(1), + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(1), + StartColumn: intPtr(50), + EndLine: intPtr(1), + EndColumn: intPtr(57), + }, + }, + }, + }, + } + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/source", + CommitHash: stringPtr("abc123"), + } + + options := MessageFormatOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "main", + SourceFolder: "/test/source", + } + + formatted := FormatResultMessage(result, repoMetadata, options) + expected := "This template construction depends on a [user-provided value](https://github.com/test-org/test-repo/blob/main/main.py#L1)." + + if formatted != expected { + t.Errorf("Expected: %s\nGot: %s", expected, formatted) + } +} + +// Helper functions for creating test data + +func createTestLocation(uri string, startLine, startCol, endLine, endCol int) *sarif.Location { + return &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr(uri), + }, + Region: &sarif.Region{ + StartLine: intPtr(startLine), + StartColumn: intPtr(startCol), + EndLine: intPtr(endLine), + EndColumn: intPtr(endCol), + }, + }, + } +} + +func stringPtr(s string) *string { + return &s +} + +func intPtr(i int) *int { + return &i +} + +func uintPtr(u uint) *uint { + return &u +} From fc5c52b8a2ad9c3fdbabfef31c7b65b1a7c5aa0b Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sat, 11 Oct 2025 10:43:58 +0200 Subject: [PATCH 34/52] feat: add git metadata fallback support for SARIF issues command and update usage examples --- cmd/sarif-issues/sarif-issues.go | 16 ++++-- cmd/sarif-issues/utils.go | 56 +++++++++++++++++++ .../engineering/sarif-issues-path-analysis.md | 19 ++++++- 3 files changed, 83 insertions(+), 8 deletions(-) diff --git a/cmd/sarif-issues/sarif-issues.go b/cmd/sarif-issues/sarif-issues.go index d23b39f0..bc898ffe 100644 --- a/cmd/sarif-issues/sarif-issues.go +++ b/cmd/sarif-issues/sarif-issues.go @@ -42,6 +42,9 @@ var ( exampleSarifIssuesUsage = ` # Recommended: run from repository root and use relative paths scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --source-folder apps/demo + # Run inside git repository (auto-detects namespace, repository, ref) + scanio sarif-issues --sarif semgrep-demo.sarif --source-folder apps/demo + # Create issues from SARIF report with basic configuration scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif @@ -87,13 +90,16 @@ func runSarifIssues(cmd *cobra.Command, args []string) error { // 3. Handle environment variable fallbacks ApplyEnvironmentFallbacks(&opts) - // 4. Validate arguments + // 4. Handle git metadata fallbacks + ApplyGitMetadataFallbacks(&opts, lg) + + // 5. Validate arguments if err := validate(&opts); err != nil { lg.Error("invalid arguments", "error", err) return errors.NewCommandError(opts, nil, fmt.Errorf("invalid arguments: %w", err), 1) } - // 5. Read and process SARIF report + // 6. Read and process SARIF report report, err := internalsarif.ReadReport(opts.SarifPath, lg, opts.SourceFolder, true) if err != nil { lg.Error("failed to read SARIF report", "error", err) @@ -110,7 +116,7 @@ func runSarifIssues(cmd *cobra.Command, args []string) error { report.EnrichResultsLevelProperty() report.EnrichResultsTitleProperty() - // 6. Get all open GitHub issues + // 7. Get all open GitHub issues openIssues, err := listOpenIssues(opts) if err != nil { lg.Error("failed to list open issues", "error", err) @@ -118,14 +124,14 @@ func runSarifIssues(cmd *cobra.Command, args []string) error { } lg.Info("fetched open issues from repository", "count", len(openIssues)) - // 7. Process SARIF report and create/close issues + // 8. Process SARIF report and create/close issues created, err := processSARIFReport(report, opts, sourceFolderAbs, repoMetadata, lg, openIssues) if err != nil { lg.Error("failed to process SARIF report", "error", err) return err } - // 8. Log success and handle output + // 9. Log success and handle output lg.Info("issues created from SARIF high severity findings", "count", created) fmt.Printf("Created %d issue(s) from SARIF high severity findings\n", created) diff --git a/cmd/sarif-issues/utils.go b/cmd/sarif-issues/utils.go index 50c81145..74194184 100644 --- a/cmd/sarif-issues/utils.go +++ b/cmd/sarif-issues/utils.go @@ -15,6 +15,7 @@ import ( "github.com/owenrumney/go-sarif/v2/sarif" "github.com/scan-io-git/scan-io/internal/git" "github.com/scan-io-git/scan-io/pkg/shared/files" + "github.com/scan-io-git/scan-io/pkg/shared/vcsurl" ) // parseLineRange parses line range from strings like "123" or "123-456". @@ -477,3 +478,58 @@ func ApplyEnvironmentFallbacks(opts *RunOptions) { } } } + +// ApplyGitMetadataFallbacks applies git metadata fallbacks to the run options. +// It extracts namespace, repository, and ref from local git repository metadata +// when the corresponding flags are not already provided. +func ApplyGitMetadataFallbacks(opts *RunOptions, logger hclog.Logger) { + // Determine the base folder for git metadata extraction + baseFolder := strings.TrimSpace(opts.SourceFolder) + if baseFolder == "" { + // Use current working directory if source-folder is not provided + if cwd, err := os.Getwd(); err == nil { + baseFolder = cwd + } else { + logger.Debug("failed to get current working directory for git metadata extraction", "error", err) + return + } + } + + // Collect git repository metadata + repoMetadata, err := git.CollectRepositoryMetadata(baseFolder) + if err != nil { + logger.Debug("unable to collect git repository metadata", "error", err, "baseFolder", baseFolder) + return + } + + // Extract namespace and repository from git remote URL if not already set + if strings.TrimSpace(opts.Namespace) == "" || strings.TrimSpace(opts.Repository) == "" { + if repoMetadata.RepositoryFullName != nil && *repoMetadata.RepositoryFullName != "" { + // Parse the repository URL to extract namespace and repository + vcsURL, err := vcsurl.ParseForVCSType(*repoMetadata.RepositoryFullName, vcsurl.UnknownVCS) + if err != nil { + logger.Debug("failed to parse git repository URL", "error", err, "url", *repoMetadata.RepositoryFullName) + } else { + // Apply namespace if not already set + if strings.TrimSpace(opts.Namespace) == "" && vcsURL.Namespace != "" { + opts.Namespace = vcsURL.Namespace + logger.Debug("auto-detected namespace from git metadata", "namespace", vcsURL.Namespace) + } + + // Apply repository if not already set + if strings.TrimSpace(opts.Repository) == "" && vcsURL.Repository != "" { + opts.Repository = vcsURL.Repository + logger.Debug("auto-detected repository from git metadata", "repository", vcsURL.Repository) + } + } + } + } + + // Extract commit hash for ref if not already set + if strings.TrimSpace(opts.Ref) == "" { + if repoMetadata.CommitHash != nil && *repoMetadata.CommitHash != "" { + opts.Ref = *repoMetadata.CommitHash + logger.Debug("auto-detected ref from git metadata", "ref", *repoMetadata.CommitHash) + } + } +} diff --git a/docs/engineering/sarif-issues-path-analysis.md b/docs/engineering/sarif-issues-path-analysis.md index 6a69b66e..9f2af69c 100644 --- a/docs/engineering/sarif-issues-path-analysis.md +++ b/docs/engineering/sarif-issues-path-analysis.md @@ -53,7 +53,8 @@ The CLI assumes `--source-folder` equals the repository root. When the user poin 6. Evaluate whether `internal/sarif`’s enrichment should adopt the same metadata-aware logic; if so, share the helper to keep `to-html` and future commands consistent. # Manual testing -## Semgrep scan of subfolder (monorepo like use case) +## Scans from root, subfolder, outside, with abs and relative paths +### Semgrep scan of subfolder (monorepo like use case) ```sh # 1. Outside folder absolute paths cd /home/jekos/ghq/github.com/scan-io-git/scan-io @@ -98,7 +99,7 @@ scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b # validate here: 2 issues with correct permalinks # correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py ``` -## snyk +### snyk ```sh cd /home/jekos/ghq/github.com/scan-io-git/scanio-test @@ -110,7 +111,7 @@ scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b snyk code test --sarif-file-output=/home/jekos/ghq/github.com/scan-io-git/scan-io/data/snyk-subfolder-from-root.sarif apps/demo scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/snyk-subfolder-from-root.sarif --source-folder apps/demo ``` -## codeql +### codeql ```sh cd /home/jekos/ghq/github.com/scan-io-git/scanio-test @@ -123,4 +124,16 @@ scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b /tmp/codeql/codeql database create /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-database-subfolder --language=python --source-root=apps/demo /tmp/codeql/codeql database analyze /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-database-subfolder --format=sarif-latest --output=/home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-subfolder.sarif scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-subfolder.sarif --source-folder apps/demo +``` +## How to handle 2 subfolders with 2 separate scans +```sh +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test + +# scan projects +scanio analyse --scanner semgrep apps/demo --format sarif --output semgrep-demo.sarif +snyk code test --sarif-file-output=snyk-another.sarif apps/another + +# create issues +scanio sarif-issues --sarif semgrep-demo.sarif --source-folder apps/demo +scanio sarif-issues --sarif snyk-another.sarif --source-folder apps/another ``` \ No newline at end of file From 4a6ba2a3eb28c9cb0a0d7b6a0c1ef5e6cb329907 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sat, 11 Oct 2025 11:13:08 +0200 Subject: [PATCH 35/52] feat: implement filtering of open issues by source folder scope for SARIF issues command --- .cursorrules | 1 + cmd/sarif-issues/issue_processing.go | 37 +++++- cmd/sarif-issues/issue_processing_test.go | 119 ++++++++++++++++++ .../engineering/sarif-issues-path-analysis.md | 12 +- docs/reference/cmd-sarif-issues.md | 18 +++ 5 files changed, 184 insertions(+), 3 deletions(-) diff --git a/.cursorrules b/.cursorrules index e0ddac32..70ccd441 100644 --- a/.cursorrules +++ b/.cursorrules @@ -21,3 +21,4 @@ - The plan may contain one or more phases. Each phase contains tasks. Write inputs and deliverables for each phase, task or group of tasks. - Ensure that new functionality has tasks related to having tests for that functionality. - Default plan file is `PLAN.md` in the root. +- Add documentation and help message update when necessary. diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go index c7fc030f..2b3e309f 100644 --- a/cmd/sarif-issues/issue_processing.go +++ b/cmd/sarif-issues/issue_processing.go @@ -444,6 +444,36 @@ func buildKnownIssuesFromOpen(openIssues map[int]OpenIssueEntry, lg hclog.Logger return knownIssues } +// filterIssuesBySourceFolder filters open issues to only those within the current source folder scope. +// This enables independent issue management for different subfolders in monorepo CI workflows. +// Issues are filtered based on their FilePath metadata matching the normalized subfolder path. +func filterIssuesBySourceFolder(openIssues map[int]OpenIssueEntry, repoMetadata *git.RepositoryMetadata, lg hclog.Logger) map[int]OpenIssueEntry { + // Determine the subfolder scope from repo metadata + subfolder := normalisedSubfolder(repoMetadata) + + // If no subfolder (scanning from root), include all issues + if subfolder == "" { + lg.Debug("no subfolder scope, including all issues") + return openIssues + } + + // Filter issues: keep only those whose FilePath starts with subfolder + filtered := make(map[int]OpenIssueEntry) + for num, entry := range openIssues { + filePath := filepath.ToSlash(entry.OpenIssueReport.FilePath) + if strings.HasPrefix(filePath, subfolder+"/") || filePath == subfolder { + filtered[num] = entry + } else { + lg.Debug("excluding issue outside source folder scope", + "number", num, "file", filePath, "scope", subfolder) + } + } + + lg.Info("filtered issues by source folder scope", + "total", len(openIssues), "scoped", len(filtered), "subfolder", subfolder) + return filtered +} + // createUnmatchedIssues creates GitHub issues for new findings that don't correlate with existing issues. // Returns the number of successfully created issues. func createUnmatchedIssues(unmatchedNew []issuecorrelation.IssueMetadata, newIssues []issuecorrelation.IssueMetadata, newBodies, newTitles []string, options RunOptions, lg hclog.Logger) (int, error) { @@ -575,8 +605,11 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, source newTitles[i] = data.Title } - // Build list of known issues from the provided open issues data - knownIssues := buildKnownIssuesFromOpen(openIssues, lg) + // Filter open issues to only those within the current source folder scope + scopedOpenIssues := filterIssuesBySourceFolder(openIssues, repoMetadata, lg) + + // Build list of known issues from the filtered open issues data + knownIssues := buildKnownIssuesFromOpen(scopedOpenIssues, lg) // correlate corr := issuecorrelation.NewCorrelator(newIssues, knownIssues) diff --git a/cmd/sarif-issues/issue_processing_test.go b/cmd/sarif-issues/issue_processing_test.go index e7e0a0d4..b1928286 100644 --- a/cmd/sarif-issues/issue_processing_test.go +++ b/cmd/sarif-issues/issue_processing_test.go @@ -3,7 +3,9 @@ package sarifissues import ( "testing" + "github.com/hashicorp/go-hclog" "github.com/owenrumney/go-sarif/v2/sarif" + "github.com/scan-io-git/scan-io/internal/git" ) func TestParseIssueBodyUsesMetadataRuleID(t *testing.T) { @@ -145,3 +147,120 @@ func TestExtractRuleDetailEmptyWhenNoContent(t *testing.T) { t.Fatalf("expected empty detail and nil refs, got %q %#v", detail, refs) } } + +func TestFilterIssuesBySourceFolder(t *testing.T) { + // Create test logger + logger := hclog.NewNullLogger() + + // Test cases + tests := []struct { + name string + repoMetadata *git.RepositoryMetadata + openIssues map[int]OpenIssueEntry + expectedCount int + expectedIssues []int // issue numbers that should be included + }{ + { + name: "no subfolder - include all issues", + repoMetadata: &git.RepositoryMetadata{ + Subfolder: "", + }, + openIssues: map[int]OpenIssueEntry{ + 1: {OpenIssueReport: OpenIssueReport{FilePath: "apps/demo/main.py"}}, + 2: {OpenIssueReport: OpenIssueReport{FilePath: "apps/another/main.py"}}, + 3: {OpenIssueReport: OpenIssueReport{FilePath: "root/file.py"}}, + }, + expectedCount: 3, + expectedIssues: []int{1, 2, 3}, + }, + { + name: "subfolder scope - filter correctly", + repoMetadata: &git.RepositoryMetadata{ + Subfolder: "apps/demo", + }, + openIssues: map[int]OpenIssueEntry{ + 1: {OpenIssueReport: OpenIssueReport{FilePath: "apps/demo/main.py"}}, + 2: {OpenIssueReport: OpenIssueReport{FilePath: "apps/another/main.py"}}, + 3: {OpenIssueReport: OpenIssueReport{FilePath: "apps/demo/utils.py"}}, + 4: {OpenIssueReport: OpenIssueReport{FilePath: "root/file.py"}}, + }, + expectedCount: 2, + expectedIssues: []int{1, 3}, + }, + { + name: "exact subfolder match", + repoMetadata: &git.RepositoryMetadata{ + Subfolder: "apps/demo", + }, + openIssues: map[int]OpenIssueEntry{ + 1: {OpenIssueReport: OpenIssueReport{FilePath: "apps/demo"}}, + 2: {OpenIssueReport: OpenIssueReport{FilePath: "apps/demo/main.py"}}, + 3: {OpenIssueReport: OpenIssueReport{FilePath: "apps/demo/subdir/file.py"}}, + }, + expectedCount: 3, + expectedIssues: []int{1, 2, 3}, + }, + { + name: "nil repo metadata - include all", + repoMetadata: nil, + openIssues: map[int]OpenIssueEntry{ + 1: {OpenIssueReport: OpenIssueReport{FilePath: "any/path/file.py"}}, + 2: {OpenIssueReport: OpenIssueReport{FilePath: "another/path/file.py"}}, + }, + expectedCount: 2, + expectedIssues: []int{1, 2}, + }, + { + name: "empty open issues", + repoMetadata: &git.RepositoryMetadata{ + Subfolder: "apps/demo", + }, + openIssues: map[int]OpenIssueEntry{}, + expectedCount: 0, + expectedIssues: []int{}, + }, + { + name: "subfolder with trailing slashes", + repoMetadata: &git.RepositoryMetadata{ + Subfolder: "/apps/demo/", + }, + openIssues: map[int]OpenIssueEntry{ + 1: {OpenIssueReport: OpenIssueReport{FilePath: "apps/demo/main.py"}}, + 2: {OpenIssueReport: OpenIssueReport{FilePath: "apps/another/main.py"}}, + }, + expectedCount: 1, + expectedIssues: []int{1}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + filtered := filterIssuesBySourceFolder(tt.openIssues, tt.repoMetadata, logger) + + if len(filtered) != tt.expectedCount { + t.Fatalf("expected %d filtered issues, got %d", tt.expectedCount, len(filtered)) + } + + // Check that only expected issues are present + for _, expectedNum := range tt.expectedIssues { + if _, exists := filtered[expectedNum]; !exists { + t.Fatalf("expected issue %d to be included in filtered results", expectedNum) + } + } + + // Check that no unexpected issues are present + for num := range filtered { + found := false + for _, expectedNum := range tt.expectedIssues { + if num == expectedNum { + found = true + break + } + } + if !found { + t.Fatalf("unexpected issue %d found in filtered results", num) + } + } + }) + } +} diff --git a/docs/engineering/sarif-issues-path-analysis.md b/docs/engineering/sarif-issues-path-analysis.md index 9f2af69c..ff8fc454 100644 --- a/docs/engineering/sarif-issues-path-analysis.md +++ b/docs/engineering/sarif-issues-path-analysis.md @@ -136,4 +136,14 @@ snyk code test --sarif-file-output=snyk-another.sarif apps/another # create issues scanio sarif-issues --sarif semgrep-demo.sarif --source-folder apps/demo scanio sarif-issues --sarif snyk-another.sarif --source-folder apps/another -``` \ No newline at end of file +``` + +**Solution Implemented**: The `sarif-issues` command now filters open issues by source folder scope before correlation. Issues are scoped based on their file path metadata matching the normalized subfolder path. This enables independent issue management for different subfolders in monorepo CI workflows. + +**Key Changes**: +- Added `filterIssuesBySourceFolder()` function that filters open issues to only those within the current `--source-folder` scope +- Issues are filtered before correlation, ensuring each subfolder's issues are managed independently +- When `--source-folder` points to a subfolder, only issues whose file paths start with that subfolder are considered +- When scanning from root (no subfolder), all issues are included as before + +**Expected Behavior**: Both sets of issues remain open and are managed independently. Issues from `apps/demo` won't be closed when running the second command for `apps/another`. \ No newline at end of file diff --git a/docs/reference/cmd-sarif-issues.md b/docs/reference/cmd-sarif-issues.md index 16f81d6a..c16a904c 100644 --- a/docs/reference/cmd-sarif-issues.md +++ b/docs/reference/cmd-sarif-issues.md @@ -165,6 +165,24 @@ Only specific types of open issues are considered for correlation: - **Scanio-managed issues**: Must contain the scanio-managed annotation - **Malformed issues are skipped**: Issues without proper metadata are ignored to prevent accidental closure of manually created issues +### Subfolder Scoping +The command supports independent issue management for different subfolders in monorepo workflows: + +- **Scoped Correlation**: When `--source-folder` points to a subfolder, only open issues whose file paths fall within that subfolder are considered for correlation +- **Independent Management**: Issues from different subfolders are managed independently, preventing cross-subfolder interference +- **Root Scope**: When scanning from repository root (no `--source-folder` or `--source-folder` points to root), all issues are considered + +**Example Monorepo Workflow**: +```bash +# Frontend CI job - manages issues in apps/frontend only +scanio sarif-issues --sarif frontend-results.sarif --source-folder apps/frontend + +# Backend CI job - manages issues in apps/backend only +scanio sarif-issues --sarif backend-results.sarif --source-folder apps/backend +``` + +This enables separate CI jobs for different parts of a monorepo without issues from one subfolder affecting the other. + ## Issue Format ### Issue Title Format From f86e6064704cb845226126c3702032c8528ecfda Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sat, 11 Oct 2025 12:07:24 +0200 Subject: [PATCH 36/52] feat: add code flow formatting to SARIF issue body and implement corresponding tests --- cmd/sarif-issues/issue_processing.go | 5 + cmd/sarif-issues/utils.go | 107 +++++++++ cmd/sarif-issues/utils_test.go | 314 +++++++++++++++++++++++++++ 3 files changed, 426 insertions(+) diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go index 2b3e309f..ed7a6cb6 100644 --- a/cmd/sarif-issues/issue_processing.go +++ b/cmd/sarif-issues/issue_processing.go @@ -302,6 +302,11 @@ func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, s } } + // Add code flow section if available + if codeFlowSection := FormatCodeFlows(res, options, repoMetadata, sourceFolderAbs); codeFlowSection != "" { + body += "\n\n---\n\n" + codeFlowSection + "\n\n---\n\n" + } + // Append security identifier tags (CWE, OWASP) with links if available in rule properties if r, ok := rulesByID[ruleID]; ok && r != nil && r.Properties != nil { var tags []string diff --git a/cmd/sarif-issues/utils.go b/cmd/sarif-issues/utils.go index 74194184..b7520616 100644 --- a/cmd/sarif-issues/utils.go +++ b/cmd/sarif-issues/utils.go @@ -479,6 +479,113 @@ func ApplyEnvironmentFallbacks(opts *RunOptions) { } } +// FormatCodeFlows formats code flows from SARIF results into collapsible markdown sections. +// Each thread flow is displayed in a separate
block with numbered steps and GitHub permalinks. +func FormatCodeFlows(result *sarif.Result, options RunOptions, repoMetadata *git.RepositoryMetadata, sourceFolderAbs string) string { + if result == nil || len(result.CodeFlows) == 0 { + return "" + } + + var sections []string + threadFlowCounter := 0 + + for _, codeFlow := range result.CodeFlows { + if codeFlow == nil || len(codeFlow.ThreadFlows) == 0 { + continue + } + + for _, threadFlow := range codeFlow.ThreadFlows { + if threadFlow == nil || len(threadFlow.Locations) == 0 { + continue + } + + threadFlowCounter++ + var steps []string + seenSteps := make(map[string]bool) // Track seen permalink+message combinations + actualStepNum := 0 // Track actual step number for sequential numbering + + for _, threadFlowLocation := range threadFlow.Locations { + if threadFlowLocation == nil || threadFlowLocation.Location == nil { + continue + } + + location := threadFlowLocation.Location + if location.PhysicalLocation == nil || location.PhysicalLocation.ArtifactLocation == nil { + continue + } + + // Extract file path and line information + fileURI, _ := extractFileURIFromResult(&sarif.Result{ + Locations: []*sarif.Location{location}, + }, sourceFolderAbs, repoMetadata) + + if fileURI == "" { + continue + } + + // Extract line numbers + startLine := 0 + endLine := 0 + if location.PhysicalLocation.Region != nil { + if location.PhysicalLocation.Region.StartLine != nil { + startLine = *location.PhysicalLocation.Region.StartLine + } + if location.PhysicalLocation.Region.EndLine != nil { + endLine = *location.PhysicalLocation.Region.EndLine + } + } + + // Create GitHub permalink + permalink := buildGitHubPermalink(options, repoMetadata, fileURI, startLine, endLine) + + // Format step with optional message text + messageText := "" + if location.Message != nil && location.Message.Text != nil && strings.TrimSpace(*location.Message.Text) != "" { + messageText = strings.TrimSpace(*location.Message.Text) + } + + // Create unique key for deduplication (permalink + message text) + dedupKey := fmt.Sprintf("%s|%s", permalink, messageText) + + // Skip if we've already seen this exact combination + if seenSteps[dedupKey] { + continue + } + seenSteps[dedupKey] = true + + // Increment actual step number only when we add a step + actualStepNum++ + + // Format step with optional message text + stepText := fmt.Sprintf("Step %d:", actualStepNum) + if messageText != "" { + stepText = fmt.Sprintf("Step %d: %s", actualStepNum, messageText) + } + + // Add step text and permalink on separate lines + if permalink != "" { + steps = append(steps, stepText+"\n"+permalink) + } else { + steps = append(steps, stepText) + } + } + + if len(steps) > 0 { + summary := fmt.Sprintf("Code Flow %d", threadFlowCounter) + section := fmt.Sprintf("
\n%s\n\n%s\n
", + summary, strings.Join(steps, "\n\n")) + sections = append(sections, section) + } + } + } + + if len(sections) == 0 { + return "" + } + + return strings.Join(sections, "\n\n") +} + // ApplyGitMetadataFallbacks applies git metadata fallbacks to the run options. // It extracts namespace, repository, and ref from local git repository metadata // when the corresponding flags are not already provided. diff --git a/cmd/sarif-issues/utils_test.go b/cmd/sarif-issues/utils_test.go index 8f156011..40f826ef 100644 --- a/cmd/sarif-issues/utils_test.go +++ b/cmd/sarif-issues/utils_test.go @@ -1533,6 +1533,320 @@ func TestBuildIssueBodyWithSnykMessage(t *testing.T) { } } +func TestFormatCodeFlows(t *testing.T) { + tests := []struct { + name string + result *sarif.Result + expected string + }{ + { + name: "no code flows", + result: &sarif.Result{}, + expected: "", + }, + { + name: "nil code flows", + result: &sarif.Result{ + CodeFlows: nil, + }, + expected: "", + }, + { + name: "empty code flows", + result: &sarif.Result{ + CodeFlows: []*sarif.CodeFlow{}, + }, + expected: "", + }, + { + name: "single thread flow with message text", + result: &sarif.Result{ + CodeFlows: []*sarif.CodeFlow{ + { + ThreadFlows: []*sarif.ThreadFlow{ + { + Locations: []*sarif.ThreadFlowLocation{ + { + Location: &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(1), + EndLine: intPtr(1), + }, + }, + Message: &sarif.Message{ + Text: stringPtr("ControlFlowNode for ImportMember"), + }, + }, + }, + { + Location: &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(8), + EndLine: intPtr(8), + }, + }, + Message: &sarif.Message{ + Text: stringPtr("ControlFlowNode for request"), + }, + }, + }, + }, + }, + }, + }, + }, + }, + expected: `
+Code Flow 1 + +Step 1: ControlFlowNode for ImportMember +https://github.com/test-org/test-repo/blob/test-ref/main.py#L1 + +Step 2: ControlFlowNode for request +https://github.com/test-org/test-repo/blob/test-ref/main.py#L8 +
`, + }, + { + name: "single thread flow without message text", + result: &sarif.Result{ + CodeFlows: []*sarif.CodeFlow{ + { + ThreadFlows: []*sarif.ThreadFlow{ + { + Locations: []*sarif.ThreadFlowLocation{ + { + Location: &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("app.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(5), + EndLine: intPtr(5), + }, + }, + Message: &sarif.Message{ + Text: stringPtr(""), + }, + }, + }, + }, + }, + }, + }, + }, + }, + expected: `
+Code Flow 1 + +Step 1: +https://github.com/test-org/test-repo/blob/test-ref/app.py#L5 +
`, + }, + { + name: "multiple thread flows", + result: &sarif.Result{ + CodeFlows: []*sarif.CodeFlow{ + { + ThreadFlows: []*sarif.ThreadFlow{ + { + Locations: []*sarif.ThreadFlowLocation{ + { + Location: &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("file1.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(1), + EndLine: intPtr(1), + }, + }, + Message: &sarif.Message{ + Text: stringPtr("First flow step"), + }, + }, + }, + }, + }, + { + Locations: []*sarif.ThreadFlowLocation{ + { + Location: &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("file2.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(10), + EndLine: intPtr(10), + }, + }, + Message: &sarif.Message{ + Text: stringPtr("Second flow step"), + }, + }, + }, + }, + }, + }, + }, + }, + }, + expected: `
+Code Flow 1 + +Step 1: First flow step +https://github.com/test-org/test-repo/blob/test-ref/file1.py#L1 +
+ +
+Code Flow 2 + +Step 1: Second flow step +https://github.com/test-org/test-repo/blob/test-ref/file2.py#L10 +
`, + }, + { + name: "thread flow with line range", + result: &sarif.Result{ + CodeFlows: []*sarif.CodeFlow{ + { + ThreadFlows: []*sarif.ThreadFlow{ + { + Locations: []*sarif.ThreadFlowLocation{ + { + Location: &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(10), + EndLine: intPtr(15), + }, + }, + Message: &sarif.Message{ + Text: stringPtr("Multi-line location"), + }, + }, + }, + }, + }, + }, + }, + }, + }, + expected: `
+Code Flow 1 + +Step 1: Multi-line location +https://github.com/test-org/test-repo/blob/test-ref/main.py#L10-L15 +
`, + }, + { + name: "duplicate steps with same permalink and message", + result: &sarif.Result{ + CodeFlows: []*sarif.CodeFlow{ + { + ThreadFlows: []*sarif.ThreadFlow{ + { + Locations: []*sarif.ThreadFlowLocation{ + { + Location: &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(1), + EndLine: intPtr(1), + }, + }, + Message: &sarif.Message{ + Text: stringPtr("First step"), + }, + }, + }, + { + Location: &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(1), + EndLine: intPtr(1), + }, + }, + Message: &sarif.Message{ + Text: stringPtr("First step"), // Same message as previous + }, + }, + }, + { + Location: &sarif.Location{ + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: stringPtr("main.py"), + }, + Region: &sarif.Region{ + StartLine: intPtr(2), + EndLine: intPtr(2), + }, + }, + Message: &sarif.Message{ + Text: stringPtr("Second step"), // Different message + }, + }, + }, + }, + }, + }, + }, + }, + }, + expected: `
+Code Flow 1 + +Step 1: First step +https://github.com/test-org/test-repo/blob/test-ref/main.py#L1 + +Step 2: Second step +https://github.com/test-org/test-repo/blob/test-ref/main.py#L2 +
`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + options := RunOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: "test-ref", + } + + repoMetadata := &git.RepositoryMetadata{ + RepoRootFolder: "/test/repo", + } + + result := FormatCodeFlows(tt.result, options, repoMetadata, "/test/repo") + + if result != tt.expected { + t.Errorf("Expected:\n%s\nGot:\n%s", tt.expected, result) + } + }) + } +} + // Helper functions for creating test data func stringPtr(s string) *string { return &s From f287f9e28634521ab1d595660dae9bf8bbc81bf4 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sat, 11 Oct 2025 16:56:56 +0200 Subject: [PATCH 37/52] refactor: replace local subfolder normalization with internal package functions for improved path handling in SARIF issue processing --- cmd/sarif-issues/issue_processing.go | 2 +- cmd/sarif-issues/utils.go | 135 ++------ internal/sarif/message_formatter.go | 99 +----- internal/sarif/message_formatter_test.go | 25 +- internal/sarif/path_helpers.go | 198 ++++++++++++ internal/sarif/path_helpers_test.go | 390 +++++++++++++++++++++++ 6 files changed, 631 insertions(+), 218 deletions(-) create mode 100644 internal/sarif/path_helpers.go create mode 100644 internal/sarif/path_helpers_test.go diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go index ed7a6cb6..5f2f4095 100644 --- a/cmd/sarif-issues/issue_processing.go +++ b/cmd/sarif-issues/issue_processing.go @@ -454,7 +454,7 @@ func buildKnownIssuesFromOpen(openIssues map[int]OpenIssueEntry, lg hclog.Logger // Issues are filtered based on their FilePath metadata matching the normalized subfolder path. func filterIssuesBySourceFolder(openIssues map[int]OpenIssueEntry, repoMetadata *git.RepositoryMetadata, lg hclog.Logger) map[int]OpenIssueEntry { // Determine the subfolder scope from repo metadata - subfolder := normalisedSubfolder(repoMetadata) + subfolder := internalsarif.NormalisedSubfolder(repoMetadata) // If no subfolder (scanning from root), include all issues if subfolder == "" { diff --git a/cmd/sarif-issues/utils.go b/cmd/sarif-issues/utils.go index b7520616..8ea1bf0e 100644 --- a/cmd/sarif-issues/utils.go +++ b/cmd/sarif-issues/utils.go @@ -14,6 +14,7 @@ import ( "github.com/hashicorp/go-hclog" "github.com/owenrumney/go-sarif/v2/sarif" "github.com/scan-io-git/scan-io/internal/git" + internalsarif "github.com/scan-io-git/scan-io/internal/sarif" "github.com/scan-io-git/scan-io/pkg/shared/files" "github.com/scan-io-git/scan-io/pkg/shared/vcsurl" ) @@ -269,9 +270,19 @@ func extractFileURIFromResult(res *sarif.Result, absSourceFolder string, repoMet return "", "" } - repoPath := "" - localPath := "" - subfolder := normalisedSubfolder(repoMetadata) + // Use shared function to get repo-relative path + repoPath := internalsarif.ConvertToRepoRelativePath(rawURI, repoMetadata, absSourceFolder) + + // Calculate local path for file operations (snippet hashing, etc.) + localPath := calculateLocalPath(rawURI, repoMetadata, absSourceFolder) + + return repoPath, localPath +} + +// calculateLocalPath determines the absolute local filesystem path for a SARIF URI. +// This is used for reading files for snippet hashing and other local file operations. +func calculateLocalPath(rawURI string, repoMetadata *git.RepositoryMetadata, absSourceFolder string) string { + subfolder := internalsarif.NormalisedSubfolder(repoMetadata) var repoRoot string if repoMetadata != nil && strings.TrimSpace(repoMetadata.RepoRootFolder) != "" { repoRoot = filepath.Clean(repoMetadata.RepoRootFolder) @@ -285,127 +296,17 @@ func extractFileURIFromResult(res *sarif.Result, absSourceFolder string, repoMet } } - // Normalise URI to the host OS path representation + // Normalize URI to the host OS path representation osURI := filepath.FromSlash(rawURI) osURI = strings.TrimPrefix(osURI, "file://") cleanURI := filepath.Clean(osURI) if filepath.IsAbs(cleanURI) { - localPath = cleanURI - if repoRoot != "" { - if rel, err := filepath.Rel(repoRoot, localPath); err == nil { - if rel != "." && !strings.HasPrefix(rel, "..") { - repoPath = filepath.ToSlash(rel) - } - } - } - if repoPath == "" && absSource != "" { - if rel, err := filepath.Rel(absSource, localPath); err == nil { - repoPath = filepath.ToSlash(rel) - } - } - if repoPath == "" { - repoPath = filepath.ToSlash(strings.TrimPrefix(localPath, string(filepath.Separator))) - } - } else { - localPath = resolveRelativeLocalPath(cleanURI, repoRoot, subfolder, absSource) - - if repoRoot != "" && localPath != "" && pathWithin(localPath, repoRoot) { - if rel, err := filepath.Rel(repoRoot, localPath); err == nil { - if rel != "." { - repoPath = filepath.ToSlash(rel) - } - } - } - - if repoPath == "" { - normalised := strings.TrimLeft(filepath.ToSlash(cleanURI), "./") - if subfolder != "" && !strings.HasPrefix(normalised, subfolder+"/") && normalised != subfolder { - repoPath = filepath.ToSlash(filepath.Join(subfolder, normalised)) - } else { - repoPath = filepath.ToSlash(normalised) - } - } - } - - repoPath = strings.TrimLeft(repoPath, "/") - repoPath = filepath.ToSlash(repoPath) - return repoPath, localPath -} - -func resolveRelativeLocalPath(cleanURI, repoRoot, subfolder, absSource string) string { - candidateRel := cleanURI - var bases []string - seen := map[string]struct{}{} - - addBase := func(base string) { - if base == "" { - return - } - if abs, err := filepath.Abs(base); err == nil { - base = abs - } else { - base = filepath.Clean(base) - } - if _, ok := seen[base]; ok { - return - } - seen[base] = struct{}{} - bases = append(bases, base) - } - - addBase(repoRoot) - if repoRoot != "" && subfolder != "" { - addBase(filepath.Join(repoRoot, filepath.FromSlash(subfolder))) + return cleanURI } - addBase(absSource) - for _, base := range bases { - candidate := filepath.Clean(filepath.Join(base, candidateRel)) - if repoRoot != "" && !pathWithin(candidate, repoRoot) { - continue - } - if _, err := os.Stat(candidate); err == nil { - return candidate - } - } - - if len(bases) > 0 { - candidate := filepath.Clean(filepath.Join(bases[0], candidateRel)) - if repoRoot == "" || pathWithin(candidate, repoRoot) { - return candidate - } - } - - if absSource != "" { - return filepath.Clean(filepath.Join(absSource, candidateRel)) - } - return "" -} - -func pathWithin(path, root string) bool { - if root == "" { - return true - } - cleanPath, err1 := filepath.Abs(path) - cleanRoot, err2 := filepath.Abs(root) - if err1 != nil || err2 != nil { - cleanPath = filepath.Clean(path) - cleanRoot = filepath.Clean(root) - } - if cleanPath == cleanRoot { - return true - } - rootWithSep := cleanRoot + string(filepath.Separator) - return strings.HasPrefix(cleanPath, rootWithSep) -} - -func normalisedSubfolder(md *git.RepositoryMetadata) string { - if md == nil { - return "" - } - sub := strings.Trim(md.Subfolder, "/\\") - return filepath.ToSlash(sub) + // Relative path - resolve to absolute + return internalsarif.ResolveRelativeLocalPath(cleanURI, repoRoot, subfolder, absSource) } // extractRegionFromResult returns start and end line numbers (0 when not present) diff --git a/internal/sarif/message_formatter.go b/internal/sarif/message_formatter.go index afdc6a7b..68e98a23 100644 --- a/internal/sarif/message_formatter.go +++ b/internal/sarif/message_formatter.go @@ -2,7 +2,6 @@ package sarif import ( "fmt" - "path/filepath" "regexp" "strconv" "strings" @@ -250,7 +249,7 @@ func buildLocationLink(location *sarif.Location, repoMetadata *git.RepositoryMet // Get file path and convert to repository-relative path filePath := *artifact.URI - repoPath := convertToRepoRelativePath(filePath, repoMetadata, options.SourceFolder) + repoPath := ConvertToRepoRelativePath(filePath, repoMetadata, options.SourceFolder) // Get line information region := location.PhysicalLocation.Region @@ -281,99 +280,3 @@ func buildLocationLink(location *sarif.Location, repoMetadata *git.RepositoryMet return fmt.Sprintf("%s#L%d-L%d", baseURL, startLine, endLine) } } - -// convertToRepoRelativePath converts a SARIF artifact URI to a repository-relative path -// This mimics the logic from extractFileURIFromResult in cmd/sarif-issues/utils.go -func convertToRepoRelativePath(rawURI string, repoMetadata *git.RepositoryMetadata, sourceFolder string) string { - rawURI = strings.TrimSpace(rawURI) - if rawURI == "" { - return "" - } - - repoPath := "" - subfolder := normalisedSubfolder(repoMetadata) - var repoRoot string - if repoMetadata != nil && strings.TrimSpace(repoMetadata.RepoRootFolder) != "" { - repoRoot = filepath.Clean(repoMetadata.RepoRootFolder) - } - absSource := strings.TrimSpace(sourceFolder) - if absSource != "" { - if abs, err := filepath.Abs(absSource); err == nil { - absSource = abs - } else { - absSource = filepath.Clean(absSource) - } - } - - // Normalise URI to the host OS path representation - osURI := filepath.FromSlash(rawURI) - osURI = strings.TrimPrefix(osURI, "file://") - cleanURI := filepath.Clean(osURI) - - if filepath.IsAbs(cleanURI) { - localPath := cleanURI - if repoRoot != "" { - if rel, err := filepath.Rel(repoRoot, localPath); err == nil { - if rel != "." && !strings.HasPrefix(rel, "..") { - repoPath = filepath.ToSlash(rel) - } - } - } - if repoPath == "" && absSource != "" { - if rel, err := filepath.Rel(absSource, localPath); err == nil { - repoPath = filepath.ToSlash(rel) - } - } - if repoPath == "" { - repoPath = filepath.ToSlash(strings.TrimPrefix(localPath, string(filepath.Separator))) - } - } else { - localPath := resolveRelativeLocalPath(cleanURI, repoRoot, subfolder, absSource) - - if repoRoot != "" && localPath != "" && pathWithin(localPath, repoRoot) { - if rel, err := filepath.Rel(repoRoot, localPath); err == nil { - if rel != "." { - repoPath = filepath.ToSlash(rel) - } - } - } - - if repoPath == "" && subfolder != "" { - repoPath = filepath.ToSlash(filepath.Join(subfolder, cleanURI)) - } - if repoPath == "" { - repoPath = filepath.ToSlash(cleanURI) - } - } - - return repoPath -} - -// normalisedSubfolder extracts and normalizes the subfolder from repository metadata -func normalisedSubfolder(md *git.RepositoryMetadata) string { - if md == nil { - return "" - } - sub := strings.Trim(md.Subfolder, "/\\") - return filepath.ToSlash(sub) -} - -// resolveRelativeLocalPath resolves a relative URI to a local filesystem path -func resolveRelativeLocalPath(cleanURI, repoRoot, subfolder, absSource string) string { - if repoRoot != "" { - if subfolder != "" { - return filepath.Join(repoRoot, subfolder, cleanURI) - } - return filepath.Join(repoRoot, cleanURI) - } - if absSource != "" { - return filepath.Join(absSource, cleanURI) - } - return cleanURI -} - -// pathWithin checks if a path is within another path -func pathWithin(path, within string) bool { - rel, err := filepath.Rel(within, path) - return err == nil && !strings.HasPrefix(rel, "..") -} diff --git a/internal/sarif/message_formatter_test.go b/internal/sarif/message_formatter_test.go index 90df32c0..01bbdaee 100644 --- a/internal/sarif/message_formatter_test.go +++ b/internal/sarif/message_formatter_test.go @@ -1,6 +1,8 @@ package sarif import ( + "os" + "path/filepath" "testing" "github.com/owenrumney/go-sarif/v2/sarif" @@ -357,10 +359,29 @@ func TestBuildLocationLinkAbsolutePath(t *testing.T) { func TestBuildLocationLinkWithSubfolder(t *testing.T) { // Test the specific case mentioned in the issue: subfolder path resolution + // Create a temporary directory structure to simulate the repository + tempDir, err := os.MkdirTemp("", "sarif_test") + if err != nil { + t.Fatalf("failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + repoRoot := filepath.Join(tempDir, "scanio-test") + subfolder := filepath.Join(repoRoot, "apps", "demo") + if err := os.MkdirAll(subfolder, 0755); err != nil { + t.Fatalf("failed to create subfolder: %v", err) + } + + // Create the file so path resolution works correctly + mainFile := filepath.Join(subfolder, "main.py") + if err := os.WriteFile(mainFile, []byte("test"), 0644); err != nil { + t.Fatalf("failed to create test file: %v", err) + } + location := createTestLocation("main.py", 34, 1, 34, 10) repoMetadata := &git.RepositoryMetadata{ - RepoRootFolder: "/tmp/scanio-test", + RepoRootFolder: repoRoot, Subfolder: "apps/demo", CommitHash: stringPtr("aec0b795c350ff53fe9ab01adf862408aa34c3fd"), } @@ -369,7 +390,7 @@ func TestBuildLocationLinkWithSubfolder(t *testing.T) { Namespace: "scan-io-git", Repository: "scanio-test", Ref: "aec0b795c350ff53fe9ab01adf862408aa34c3fd", - SourceFolder: "/tmp/scanio-test/apps/demo", + SourceFolder: subfolder, } result := buildLocationLink(location, repoMetadata, options) diff --git a/internal/sarif/path_helpers.go b/internal/sarif/path_helpers.go new file mode 100644 index 00000000..23e5d3d4 --- /dev/null +++ b/internal/sarif/path_helpers.go @@ -0,0 +1,198 @@ +package sarif + +import ( + "os" + "path/filepath" + "strings" + + "github.com/scan-io-git/scan-io/internal/git" +) + +// NormalisedSubfolder extracts and normalizes the subfolder from repository metadata. +// It returns the subfolder path with forward slashes and no leading/trailing slashes. +// Returns empty string if metadata is nil or subfolder is empty. +func NormalisedSubfolder(md *git.RepositoryMetadata) string { + if md == nil { + return "" + } + sub := strings.Trim(md.Subfolder, "/\\") + // Replace all backslashes with forward slashes for cross-platform compatibility + sub = strings.ReplaceAll(sub, "\\", "/") + return sub +} + +// PathWithin checks if a path is within another path (root). +// It handles both absolute and relative paths, attempting to resolve them first. +// Returns true if path is within root, or if root is empty. +func PathWithin(path, root string) bool { + if root == "" { + return true + } + cleanPath, err1 := filepath.Abs(path) + cleanRoot, err2 := filepath.Abs(root) + if err1 != nil || err2 != nil { + cleanPath = filepath.Clean(path) + cleanRoot = filepath.Clean(root) + } + if cleanPath == cleanRoot { + return true + } + rootWithSep := cleanRoot + string(filepath.Separator) + return strings.HasPrefix(cleanPath, rootWithSep) +} + +// ResolveRelativeLocalPath resolves a relative URI to a local filesystem path. +// It tries multiple base directories in order of preference: +// 1. repoRoot +// 2. repoRoot/subfolder (if subfolder is provided) +// 3. absSource +// +// For each base, it checks if the resolved path exists on the filesystem and is within repoRoot. +// If no path exists, it returns the first candidate that would be within repoRoot. +// Falls back to absSource-based path if all else fails. +// +// Parameters: +// - cleanURI: the relative URI path (already cleaned) +// - repoRoot: the repository root directory (optional) +// - subfolder: the subfolder within the repository (optional) +// - absSource: the absolute source folder path (optional) +func ResolveRelativeLocalPath(cleanURI, repoRoot, subfolder, absSource string) string { + candidateRel := cleanURI + var bases []string + seen := map[string]struct{}{} + + addBase := func(base string) { + if base == "" { + return + } + if abs, err := filepath.Abs(base); err == nil { + base = abs + } else { + base = filepath.Clean(base) + } + if _, ok := seen[base]; ok { + return + } + seen[base] = struct{}{} + bases = append(bases, base) + } + + addBase(repoRoot) + if repoRoot != "" && subfolder != "" { + addBase(filepath.Join(repoRoot, filepath.FromSlash(subfolder))) + } + addBase(absSource) + + // Try each base directory, checking if the file exists + for _, base := range bases { + candidate := filepath.Clean(filepath.Join(base, candidateRel)) + if repoRoot != "" && !PathWithin(candidate, repoRoot) { + continue + } + if _, err := os.Stat(candidate); err == nil { + return candidate + } + } + + // If no file exists, return the first valid candidate path + if len(bases) > 0 { + candidate := filepath.Clean(filepath.Join(bases[0], candidateRel)) + if repoRoot == "" || PathWithin(candidate, repoRoot) { + return candidate + } + } + + // Final fallback to absSource + if absSource != "" { + return filepath.Clean(filepath.Join(absSource, candidateRel)) + } + return "" +} + +// ConvertToRepoRelativePath converts a SARIF artifact URI to a repository-relative path. +// This function handles both absolute and relative URIs, normalizing them to repo-relative paths. +// +// The conversion process: +// 1. Normalizes the URI (removes file:// prefix, converts to OS path separators) +// 2. For absolute paths: calculates relative path from repoRoot or sourceFolder +// 3. For relative paths: resolves to absolute first, then calculates repo-relative path +// 4. Ensures subfolder prefix is included when scanning from a subdirectory +// +// Parameters: +// - rawURI: the artifact URI from SARIF (may be absolute or relative, may have file:// prefix) +// - repoMetadata: repository metadata containing RepoRootFolder and Subfolder (optional) +// - sourceFolder: the source folder provided by the user (optional) +// +// Returns: +// - A forward-slash separated path relative to the repository root +// - Empty string if the URI is invalid or empty +func ConvertToRepoRelativePath(rawURI string, repoMetadata *git.RepositoryMetadata, sourceFolder string) string { + rawURI = strings.TrimSpace(rawURI) + if rawURI == "" { + return "" + } + + repoPath := "" + subfolder := NormalisedSubfolder(repoMetadata) + var repoRoot string + if repoMetadata != nil && strings.TrimSpace(repoMetadata.RepoRootFolder) != "" { + repoRoot = filepath.Clean(repoMetadata.RepoRootFolder) + } + absSource := strings.TrimSpace(sourceFolder) + if absSource != "" { + if abs, err := filepath.Abs(absSource); err == nil { + absSource = abs + } else { + absSource = filepath.Clean(absSource) + } + } + + // Normalize URI to the host OS path representation + osURI := filepath.FromSlash(rawURI) + osURI = strings.TrimPrefix(osURI, "file://") + cleanURI := filepath.Clean(osURI) + + if filepath.IsAbs(cleanURI) { + // Absolute path: calculate repo-relative path + localPath := cleanURI + if repoRoot != "" { + if rel, err := filepath.Rel(repoRoot, localPath); err == nil { + if rel != "." && !strings.HasPrefix(rel, "..") { + repoPath = filepath.ToSlash(rel) + } + } + } + if repoPath == "" && absSource != "" { + if rel, err := filepath.Rel(absSource, localPath); err == nil { + repoPath = filepath.ToSlash(rel) + } + } + if repoPath == "" { + repoPath = filepath.ToSlash(strings.TrimPrefix(localPath, string(filepath.Separator))) + } + } else { + // Relative path: resolve to absolute first + localPath := ResolveRelativeLocalPath(cleanURI, repoRoot, subfolder, absSource) + + if repoRoot != "" && localPath != "" && PathWithin(localPath, repoRoot) { + if rel, err := filepath.Rel(repoRoot, localPath); err == nil { + if rel != "." { + repoPath = filepath.ToSlash(rel) + } + } + } + + if repoPath == "" { + normalised := strings.TrimLeft(filepath.ToSlash(cleanURI), "./") + if subfolder != "" && !strings.HasPrefix(normalised, subfolder+"/") && normalised != subfolder { + repoPath = filepath.ToSlash(filepath.Join(subfolder, normalised)) + } else { + repoPath = filepath.ToSlash(normalised) + } + } + } + + repoPath = strings.TrimLeft(repoPath, "/") + repoPath = filepath.ToSlash(repoPath) + return repoPath +} diff --git a/internal/sarif/path_helpers_test.go b/internal/sarif/path_helpers_test.go new file mode 100644 index 00000000..8a7c4fa0 --- /dev/null +++ b/internal/sarif/path_helpers_test.go @@ -0,0 +1,390 @@ +package sarif + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/scan-io-git/scan-io/internal/git" +) + +func TestNormalisedSubfolder(t *testing.T) { + tests := []struct { + name string + metadata *git.RepositoryMetadata + expected string + }{ + { + name: "nil metadata", + metadata: nil, + expected: "", + }, + { + name: "empty subfolder", + metadata: &git.RepositoryMetadata{ + Subfolder: "", + }, + expected: "", + }, + { + name: "subfolder with forward slash", + metadata: &git.RepositoryMetadata{ + Subfolder: "apps/demo", + }, + expected: "apps/demo", + }, + { + name: "subfolder with leading slash", + metadata: &git.RepositoryMetadata{ + Subfolder: "/apps/demo", + }, + expected: "apps/demo", + }, + { + name: "subfolder with trailing slash", + metadata: &git.RepositoryMetadata{ + Subfolder: "apps/demo/", + }, + expected: "apps/demo", + }, + { + name: "subfolder with backslash", + metadata: &git.RepositoryMetadata{ + Subfolder: "apps\\demo", + }, + expected: "apps/demo", + }, + { + name: "subfolder with both slashes", + metadata: &git.RepositoryMetadata{ + Subfolder: "/apps/demo\\", + }, + expected: "apps/demo", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := NormalisedSubfolder(tt.metadata) + if result != tt.expected { + t.Errorf("expected %q, got %q", tt.expected, result) + } + }) + } +} + +func TestPathWithin(t *testing.T) { + tempDir, err := os.MkdirTemp("", "path_within_test") + if err != nil { + t.Fatalf("failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + subdir := filepath.Join(tempDir, "subdir") + if err := os.Mkdir(subdir, 0755); err != nil { + t.Fatalf("failed to create subdirectory: %v", err) + } + + tests := []struct { + name string + path string + root string + expected bool + }{ + { + name: "empty root always returns true", + path: "/any/path", + root: "", + expected: true, + }, + { + name: "path equals root", + path: tempDir, + root: tempDir, + expected: true, + }, + { + name: "path within root", + path: subdir, + root: tempDir, + expected: true, + }, + { + name: "path outside root", + path: tempDir, + root: subdir, + expected: false, + }, + { + name: "relative path within root", + path: filepath.Join(tempDir, ".", "subdir"), + root: tempDir, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := PathWithin(tt.path, tt.root) + if result != tt.expected { + t.Errorf("PathWithin(%q, %q) = %v, expected %v", tt.path, tt.root, result, tt.expected) + } + }) + } +} + +func TestResolveRelativeLocalPath(t *testing.T) { + tempDir, err := os.MkdirTemp("", "resolve_path_test") + if err != nil { + t.Fatalf("failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create a repository structure + repoRoot := filepath.Join(tempDir, "repo") + subfolder := filepath.Join(repoRoot, "apps", "demo") + if err := os.MkdirAll(subfolder, 0755); err != nil { + t.Fatalf("failed to create subfolder: %v", err) + } + + // Create a test file + testFile := filepath.Join(subfolder, "main.py") + if err := os.WriteFile(testFile, []byte("test"), 0644); err != nil { + t.Fatalf("failed to create test file: %v", err) + } + + tests := []struct { + name string + cleanURI string + repoRoot string + subfolder string + absSource string + expected string + }{ + { + name: "relative file exists in subfolder", + cleanURI: "main.py", + repoRoot: repoRoot, + subfolder: "apps/demo", + absSource: subfolder, + expected: testFile, + }, + { + name: "relative file with repo root only", + cleanURI: filepath.Join("apps", "demo", "main.py"), + repoRoot: repoRoot, + subfolder: "", + absSource: "", + expected: testFile, + }, + { + name: "fallback to absSource", + cleanURI: "main.py", + repoRoot: "", + subfolder: "", + absSource: subfolder, + expected: testFile, + }, + { + name: "non-existent file returns constructed path", + cleanURI: "nonexistent.py", + repoRoot: repoRoot, + subfolder: "", + absSource: subfolder, + expected: filepath.Join(repoRoot, "nonexistent.py"), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := ResolveRelativeLocalPath(tt.cleanURI, tt.repoRoot, tt.subfolder, tt.absSource) + if result != tt.expected { + t.Errorf("expected %q, got %q", tt.expected, result) + } + }) + } +} + +func TestConvertToRepoRelativePath(t *testing.T) { + tempDir, err := os.MkdirTemp("", "convert_path_test") + if err != nil { + t.Fatalf("failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + repoRoot := filepath.Join(tempDir, "scanio-test") + subfolder := filepath.Join(repoRoot, "apps", "demo") + if err := os.MkdirAll(subfolder, 0755); err != nil { + t.Fatalf("failed to create subfolder: %v", err) + } + + absoluteFile := filepath.Join(subfolder, "main.py") + if err := os.WriteFile(absoluteFile, []byte("test"), 0644); err != nil { + t.Fatalf("failed to create test file: %v", err) + } + + metadata := &git.RepositoryMetadata{ + RepoRootFolder: repoRoot, + Subfolder: "apps/demo", + } + + tests := []struct { + name string + rawURI string + metadata *git.RepositoryMetadata + sourceFolder string + expected string + }{ + { + name: "empty URI", + rawURI: "", + metadata: metadata, + sourceFolder: subfolder, + expected: "", + }, + { + name: "absolute URI with metadata", + rawURI: absoluteFile, + metadata: metadata, + sourceFolder: subfolder, + expected: "apps/demo/main.py", + }, + { + name: "relative URI with metadata", + rawURI: "main.py", + metadata: metadata, + sourceFolder: subfolder, + expected: "apps/demo/main.py", + }, + { + name: "relative URI with file:// prefix", + rawURI: "file://main.py", + metadata: metadata, + sourceFolder: subfolder, + expected: "apps/demo/main.py", + }, + { + name: "absolute URI without metadata", + rawURI: absoluteFile, + metadata: nil, + sourceFolder: subfolder, + expected: "main.py", + }, + { + name: "relative URI with parent path", + rawURI: filepath.ToSlash(filepath.Join("..", "scanio-test", "apps", "demo", "main.py")), + metadata: metadata, + sourceFolder: subfolder, + expected: "apps/demo/main.py", + }, + { + name: "URI already with subfolder prefix", + rawURI: "apps/demo/main.py", + metadata: metadata, + sourceFolder: subfolder, + expected: "apps/demo/main.py", + }, + { + name: "relative URI without metadata or source folder", + rawURI: "src/main.py", + metadata: nil, + sourceFolder: "", + expected: "src/main.py", + }, + { + name: "whitespace in URI", + rawURI: " main.py ", + metadata: metadata, + sourceFolder: subfolder, + expected: "apps/demo/main.py", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := ConvertToRepoRelativePath(tt.rawURI, tt.metadata, tt.sourceFolder) + if result != tt.expected { + t.Errorf("expected %q, got %q", tt.expected, result) + } + }) + } +} + +func TestConvertToRepoRelativePathWithoutRepoRoot(t *testing.T) { + // Test scenarios where we don't have repository metadata + tests := []struct { + name string + rawURI string + sourceFolder string + expected string + }{ + { + name: "relative path without metadata", + rawURI: "src/main.py", + sourceFolder: "/tmp/project", + expected: "src/main.py", + }, + { + name: "absolute path without metadata falls back to source folder", + rawURI: "/tmp/project/src/main.py", + sourceFolder: "/tmp/project", + expected: "src/main.py", + }, + { + name: "absolute path with no context", + rawURI: "/home/user/project/main.py", + sourceFolder: "", + expected: "home/user/project/main.py", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := ConvertToRepoRelativePath(tt.rawURI, nil, tt.sourceFolder) + if result != tt.expected { + t.Errorf("expected %q, got %q", tt.expected, result) + } + }) + } +} + +func TestConvertToRepoRelativePathCrossPlatform(t *testing.T) { + // Test that paths are always normalized to forward slashes + tempDir, err := os.MkdirTemp("", "cross_platform_test") + if err != nil { + t.Fatalf("failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + repoRoot := filepath.Join(tempDir, "repo") + subfolder := filepath.Join(repoRoot, "apps", "demo") + if err := os.MkdirAll(subfolder, 0755); err != nil { + t.Fatalf("failed to create subfolder: %v", err) + } + + // Create the file + mainFile := filepath.Join(subfolder, "main.py") + if err := os.WriteFile(mainFile, []byte("test"), 0644); err != nil { + t.Fatalf("failed to create test file: %v", err) + } + + metadata := &git.RepositoryMetadata{ + RepoRootFolder: repoRoot, + Subfolder: "apps/demo", + } + + // Test with a relative path - the internal logic will normalize it + rawURI := "main.py" + result := ConvertToRepoRelativePath(rawURI, metadata, subfolder) + + // Result should always use forward slashes + if strings.Contains(result, "\\") { + t.Errorf("expected forward slashes only, got %q", result) + } + + expected := "apps/demo/main.py" + if result != expected { + t.Errorf("expected %q, got %q", expected, result) + } +} From b2c7bd0aeffbd2415c99045f95d1e255953c48b8 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sat, 11 Oct 2025 17:08:38 +0200 Subject: [PATCH 38/52] feat: implement BuildGitHubPermalink function for constructing GitHub permalinks and update related usages in SARIF processing --- cmd/sarif-issues/utils.go | 11 +-- internal/sarif/message_formatter.go | 12 +-- internal/sarif/path_helpers.go | 37 ++++++++ internal/sarif/path_helpers_test.go | 134 ++++++++++++++++++++++++++++ 4 files changed, 174 insertions(+), 20 deletions(-) diff --git a/cmd/sarif-issues/utils.go b/cmd/sarif-issues/utils.go index 8ea1bf0e..19304d99 100644 --- a/cmd/sarif-issues/utils.go +++ b/cmd/sarif-issues/utils.go @@ -220,7 +220,6 @@ func getScannerName(run *sarif.Run) string { // directly when metadata is not provided). Returns empty string when any // critical component is missing. func buildGitHubPermalink(options RunOptions, repoMetadata *git.RepositoryMetadata, fileURI string, start, end int) string { - base := fmt.Sprintf("https://github.com/%s/%s", options.Namespace, options.Repository) ref := strings.TrimSpace(options.Ref) if ref == "" { @@ -238,15 +237,7 @@ func buildGitHubPermalink(options RunOptions, repoMetadata *git.RepositoryMetada } path := filepath.ToSlash(fileURI) - anchor := "" - if start > 0 { - anchor = fmt.Sprintf("#L%d", start) - if end > start { - anchor = fmt.Sprintf("%s-L%d", anchor, end) - } - } - - return fmt.Sprintf("%s/blob/%s/%s%s", base, ref, path, anchor) + return internalsarif.BuildGitHubPermalink(options.Namespace, options.Repository, ref, path, start, end) } // extractFileURIFromResult derives both the repository-relative path and local filesystem path diff --git a/internal/sarif/message_formatter.go b/internal/sarif/message_formatter.go index 68e98a23..049dab33 100644 --- a/internal/sarif/message_formatter.go +++ b/internal/sarif/message_formatter.go @@ -269,14 +269,6 @@ func buildLocationLink(location *sarif.Location, repoMetadata *git.RepositoryMet endLine = startLine } - // Build GitHub permalink - // Format: https://github.com/{namespace}/{repo}/blob/{ref}/{file}#L{start}-L{end} - baseURL := fmt.Sprintf("https://github.com/%s/%s/blob/%s/%s", - options.Namespace, options.Repository, options.Ref, repoPath) - - if startLine == endLine { - return fmt.Sprintf("%s#L%d", baseURL, startLine) - } else { - return fmt.Sprintf("%s#L%d-L%d", baseURL, startLine, endLine) - } + // Build GitHub permalink using shared helper + return BuildGitHubPermalink(options.Namespace, options.Repository, options.Ref, repoPath, startLine, endLine) } diff --git a/internal/sarif/path_helpers.go b/internal/sarif/path_helpers.go index 23e5d3d4..17b0671e 100644 --- a/internal/sarif/path_helpers.go +++ b/internal/sarif/path_helpers.go @@ -1,6 +1,7 @@ package sarif import ( + "fmt" "os" "path/filepath" "strings" @@ -196,3 +197,39 @@ func ConvertToRepoRelativePath(rawURI string, repoMetadata *git.RepositoryMetada repoPath = filepath.ToSlash(repoPath) return repoPath } + +// BuildGitHubPermalink constructs a GitHub permalink for a file and line range. +// It takes the core components needed for URL construction and handles the anchor format. +// Returns empty string if any critical component is missing. +// +// Parameters: +// - namespace: GitHub namespace/organization +// - repository: GitHub repository name +// - ref: Git reference (commit hash, branch, or tag) +// - repoRelativePath: file path relative to repository root (forward slashes) +// - startLine: starting line number (1-based) +// - endLine: ending line number (1-based, defaults to startLine if 0) +// +// Returns: +// - GitHub permalink string in format: https://github.com/{namespace}/{repo}/blob/{ref}/{file}#L{start}-L{end} +// - Empty string if any required parameter is missing +func BuildGitHubPermalink(namespace, repository, ref, repoRelativePath string, startLine, endLine int) string { + // Validate required parameters + if namespace == "" || repository == "" || ref == "" || repoRelativePath == "" { + return "" + } + + // Build base URL + baseURL := fmt.Sprintf("https://github.com/%s/%s/blob/%s/%s", namespace, repository, ref, repoRelativePath) + + // Handle line anchor + if startLine <= 0 { + return baseURL + } + + if endLine <= 0 || endLine == startLine || endLine < startLine { + return fmt.Sprintf("%s#L%d", baseURL, startLine) + } + + return fmt.Sprintf("%s#L%d-L%d", baseURL, startLine, endLine) +} diff --git a/internal/sarif/path_helpers_test.go b/internal/sarif/path_helpers_test.go index 8a7c4fa0..09af5688 100644 --- a/internal/sarif/path_helpers_test.go +++ b/internal/sarif/path_helpers_test.go @@ -388,3 +388,137 @@ func TestConvertToRepoRelativePathCrossPlatform(t *testing.T) { t.Errorf("expected %q, got %q", expected, result) } } + +func TestBuildGitHubPermalink(t *testing.T) { + tests := []struct { + name string + namespace string + repository string + ref string + repoRelativePath string + startLine int + endLine int + expected string + }{ + { + name: "single line", + namespace: "scan-io-git", + repository: "scan-io", + ref: "main", + repoRelativePath: "src/main.go", + startLine: 42, + endLine: 0, + expected: "https://github.com/scan-io-git/scan-io/blob/main/src/main.go#L42", + }, + { + name: "line range", + namespace: "scan-io-git", + repository: "scan-io", + ref: "abc123", + repoRelativePath: "internal/sarif/path_helpers.go", + startLine: 10, + endLine: 15, + expected: "https://github.com/scan-io-git/scan-io/blob/abc123/internal/sarif/path_helpers.go#L10-L15", + }, + { + name: "same start and end line", + namespace: "scan-io-git", + repository: "scan-io", + ref: "main", + repoRelativePath: "cmd/sarif-issues/utils.go", + startLine: 5, + endLine: 5, + expected: "https://github.com/scan-io-git/scan-io/blob/main/cmd/sarif-issues/utils.go#L5", + }, + { + name: "no line numbers", + namespace: "scan-io-git", + repository: "scan-io", + ref: "main", + repoRelativePath: "README.md", + startLine: 0, + endLine: 0, + expected: "https://github.com/scan-io-git/scan-io/blob/main/README.md", + }, + { + name: "negative start line", + namespace: "scan-io-git", + repository: "scan-io", + ref: "main", + repoRelativePath: "src/main.go", + startLine: -1, + endLine: 0, + expected: "https://github.com/scan-io-git/scan-io/blob/main/src/main.go", + }, + { + name: "empty namespace", + namespace: "", + repository: "scan-io", + ref: "main", + repoRelativePath: "src/main.go", + startLine: 1, + endLine: 0, + expected: "", + }, + { + name: "empty repository", + namespace: "scan-io-git", + repository: "", + ref: "main", + repoRelativePath: "src/main.go", + startLine: 1, + endLine: 0, + expected: "", + }, + { + name: "empty ref", + namespace: "scan-io-git", + repository: "scan-io", + ref: "", + repoRelativePath: "src/main.go", + startLine: 1, + endLine: 0, + expected: "", + }, + { + name: "empty file path", + namespace: "scan-io-git", + repository: "scan-io", + ref: "main", + repoRelativePath: "", + startLine: 1, + endLine: 0, + expected: "", + }, + { + name: "file with subdirectory", + namespace: "scan-io-git", + repository: "scan-io", + ref: "feature-branch", + repoRelativePath: "internal/sarif/message_formatter.go", + startLine: 25, + endLine: 30, + expected: "https://github.com/scan-io-git/scan-io/blob/feature-branch/internal/sarif/message_formatter.go#L25-L30", + }, + { + name: "end line less than start line", + namespace: "scan-io-git", + repository: "scan-io", + ref: "main", + repoRelativePath: "src/main.go", + startLine: 10, + endLine: 5, + expected: "https://github.com/scan-io-git/scan-io/blob/main/src/main.go#L10", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := BuildGitHubPermalink(tt.namespace, tt.repository, tt.ref, tt.repoRelativePath, tt.startLine, tt.endLine) + if result != tt.expected { + t.Errorf("BuildGitHubPermalink(%q, %q, %q, %q, %d, %d) = %q, expected %q", + tt.namespace, tt.repository, tt.ref, tt.repoRelativePath, tt.startLine, tt.endLine, result, tt.expected) + } + }) + } +} From 4981fde56d2f0f1b20afd8f214b107facd629b49 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sun, 12 Oct 2025 09:00:01 +0200 Subject: [PATCH 39/52] refactor: migrate extractFileURIFromResult and extractRegionFromResult to internalsarif package and update usages in SARIF issue processing --- cmd/sarif-issues/issue_processing.go | 4 +- cmd/sarif-issues/utils.go | 83 +--------------------- cmd/sarif-issues/utils_test.go | 99 -------------------------- internal/sarif/path_helpers.go | 82 ++++++++++++++++++++++ internal/sarif/path_helpers_test.go | 100 +++++++++++++++++++++++++++ 5 files changed, 185 insertions(+), 183 deletions(-) diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go index 5f2f4095..10c02a94 100644 --- a/cmd/sarif-issues/issue_processing.go +++ b/cmd/sarif-issues/issue_processing.go @@ -209,13 +209,13 @@ func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, s continue } - fileURI, localPath := extractFileURIFromResult(res, sourceFolderAbs, repoMetadata) + fileURI, localPath := internalsarif.ExtractFileURIFromResult(res, sourceFolderAbs, repoMetadata) fileURI = filepath.ToSlash(strings.TrimSpace(fileURI)) if fileURI == "" { fileURI = "" lg.Warn("SARIF result missing file URI, using placeholder", "rule_id", ruleID) } - line, endLine := extractRegionFromResult(res) + line, endLine := internalsarif.ExtractRegionFromResult(res) // Warn about missing location information if line <= 0 { diff --git a/cmd/sarif-issues/utils.go b/cmd/sarif-issues/utils.go index 19304d99..bdcf279b 100644 --- a/cmd/sarif-issues/utils.go +++ b/cmd/sarif-issues/utils.go @@ -240,87 +240,6 @@ func buildGitHubPermalink(options RunOptions, repoMetadata *git.RepositoryMetada return internalsarif.BuildGitHubPermalink(options.Namespace, options.Repository, ref, path, start, end) } -// extractFileURIFromResult derives both the repository-relative path and local filesystem path -// for the first location in a SARIF result. When repository metadata is available the repo-relative -// path is anchored at the repository root; otherwise the function falls back to trimming the -// provided source folder (preserving legacy behaviour). -func extractFileURIFromResult(res *sarif.Result, absSourceFolder string, repoMetadata *git.RepositoryMetadata) (string, string) { - if res == nil || len(res.Locations) == 0 { - return "", "" - } - loc := res.Locations[0] - if loc.PhysicalLocation == nil { - return "", "" - } - art := loc.PhysicalLocation.ArtifactLocation - if art == nil || art.URI == nil { - return "", "" - } - rawURI := strings.TrimSpace(*art.URI) - if rawURI == "" { - return "", "" - } - - // Use shared function to get repo-relative path - repoPath := internalsarif.ConvertToRepoRelativePath(rawURI, repoMetadata, absSourceFolder) - - // Calculate local path for file operations (snippet hashing, etc.) - localPath := calculateLocalPath(rawURI, repoMetadata, absSourceFolder) - - return repoPath, localPath -} - -// calculateLocalPath determines the absolute local filesystem path for a SARIF URI. -// This is used for reading files for snippet hashing and other local file operations. -func calculateLocalPath(rawURI string, repoMetadata *git.RepositoryMetadata, absSourceFolder string) string { - subfolder := internalsarif.NormalisedSubfolder(repoMetadata) - var repoRoot string - if repoMetadata != nil && strings.TrimSpace(repoMetadata.RepoRootFolder) != "" { - repoRoot = filepath.Clean(repoMetadata.RepoRootFolder) - } - absSource := strings.TrimSpace(absSourceFolder) - if absSource != "" { - if abs, err := filepath.Abs(absSource); err == nil { - absSource = abs - } else { - absSource = filepath.Clean(absSource) - } - } - - // Normalize URI to the host OS path representation - osURI := filepath.FromSlash(rawURI) - osURI = strings.TrimPrefix(osURI, "file://") - cleanURI := filepath.Clean(osURI) - - if filepath.IsAbs(cleanURI) { - return cleanURI - } - - // Relative path - resolve to absolute - return internalsarif.ResolveRelativeLocalPath(cleanURI, repoRoot, subfolder, absSource) -} - -// extractRegionFromResult returns start and end line numbers (0 when not present) -// taken from the SARIF result's first location region. -func extractRegionFromResult(res *sarif.Result) (int, int) { - if res == nil || len(res.Locations) == 0 { - return 0, 0 - } - loc := res.Locations[0] - if loc.PhysicalLocation == nil || loc.PhysicalLocation.Region == nil { - return 0, 0 - } - start := 0 - end := 0 - if loc.PhysicalLocation.Region.StartLine != nil { - start = *loc.PhysicalLocation.Region.StartLine - } - if loc.PhysicalLocation.Region.EndLine != nil { - end = *loc.PhysicalLocation.Region.EndLine - } - return start, end -} - // ResolveSourceFolder resolves a source folder path to its absolute form for path calculations. // It handles path expansion (e.g., ~) and absolute path resolution with graceful fallbacks. // Returns an empty string if the input folder is empty or whitespace-only. @@ -407,7 +326,7 @@ func FormatCodeFlows(result *sarif.Result, options RunOptions, repoMetadata *git } // Extract file path and line information - fileURI, _ := extractFileURIFromResult(&sarif.Result{ + fileURI, _ := internalsarif.ExtractFileURIFromResult(&sarif.Result{ Locations: []*sarif.Location{location}, }, sourceFolderAbs, repoMetadata) diff --git a/cmd/sarif-issues/utils_test.go b/cmd/sarif-issues/utils_test.go index 40f826ef..2aa72fee 100644 --- a/cmd/sarif-issues/utils_test.go +++ b/cmd/sarif-issues/utils_test.go @@ -392,105 +392,6 @@ func TestComputeSnippetHash_SameContentSameHash(t *testing.T) { } } -func TestExtractFileURIFromResult(t *testing.T) { - tempDir, err := os.MkdirTemp("", "sarif_extract") - if err != nil { - t.Fatalf("Failed to create temp directory: %v", err) - } - defer os.RemoveAll(tempDir) - - repoRoot := filepath.Join(tempDir, "scanio-test") - subfolder := filepath.Join(repoRoot, "apps", "demo") - if err := os.MkdirAll(subfolder, 0o755); err != nil { - t.Fatalf("Failed to create subfolder: %v", err) - } - - absoluteFile := filepath.Join(subfolder, "main.py") - metadata := &git.RepositoryMetadata{ - RepoRootFolder: repoRoot, - Subfolder: filepath.ToSlash(filepath.Join("apps", "demo")), - } - - if err := os.WriteFile(absoluteFile, []byte("print('demo')\n"), 0o644); err != nil { - t.Fatalf("Failed to create test file: %v", err) - } - - tests := []struct { - name string - uri string - meta *git.RepositoryMetadata - expectedRepo string - expectedLocal string - sourceFolder string - }{ - { - name: "absolute URI with metadata", - uri: absoluteFile, - meta: metadata, - expectedRepo: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), - expectedLocal: absoluteFile, - sourceFolder: subfolder, - }, - { - name: "relative URI with metadata", - uri: "main.py", - meta: metadata, - expectedRepo: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), - expectedLocal: filepath.Join(repoRoot, "apps", "demo", "main.py"), - sourceFolder: subfolder, - }, - { - name: "relative URI with parent segments", - uri: filepath.ToSlash(filepath.Join("..", "scanio-test", "apps", "demo", "main.py")), - meta: metadata, - expectedRepo: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), - expectedLocal: filepath.Join(repoRoot, "apps", "demo", "main.py"), - sourceFolder: subfolder, - }, - { - name: "relative URI already prefixed", - uri: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), - meta: metadata, - expectedRepo: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), - expectedLocal: filepath.Join(repoRoot, "apps", "demo", "main.py"), - sourceFolder: subfolder, - }, - { - name: "absolute URI without metadata falls back to source folder", - uri: absoluteFile, - meta: nil, - expectedRepo: "main.py", - expectedLocal: absoluteFile, - sourceFolder: subfolder, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - uri := tt.uri - result := &sarif.Result{ - Locations: []*sarif.Location{ - { - PhysicalLocation: &sarif.PhysicalLocation{ - ArtifactLocation: &sarif.ArtifactLocation{ - URI: &uri, - }, - }, - }, - }, - } - - repoPath, localPath := extractFileURIFromResult(result, tt.sourceFolder, tt.meta) - if repoPath != tt.expectedRepo { - t.Fatalf("expected repo path %q, got %q", tt.expectedRepo, repoPath) - } - if localPath != tt.expectedLocal { - t.Fatalf("expected local path %q, got %q", tt.expectedLocal, localPath) - } - }) - } -} - func TestBuildGitHubPermalink(t *testing.T) { fileURI := filepath.ToSlash(filepath.Join("apps", "demo", "main.py")) options := RunOptions{ diff --git a/internal/sarif/path_helpers.go b/internal/sarif/path_helpers.go index 17b0671e..4f138b29 100644 --- a/internal/sarif/path_helpers.go +++ b/internal/sarif/path_helpers.go @@ -6,6 +6,7 @@ import ( "path/filepath" "strings" + "github.com/owenrumney/go-sarif/v2/sarif" "github.com/scan-io-git/scan-io/internal/git" ) @@ -233,3 +234,84 @@ func BuildGitHubPermalink(namespace, repository, ref, repoRelativePath string, s return fmt.Sprintf("%s#L%d-L%d", baseURL, startLine, endLine) } + +// ExtractFileURIFromResult derives both the repository-relative path and local filesystem path +// for the first location in a SARIF result. When repository metadata is available the repo-relative +// path is anchored at the repository root; otherwise the function falls back to trimming the +// provided source folder (preserving legacy behaviour). +func ExtractFileURIFromResult(res *sarif.Result, absSourceFolder string, repoMetadata *git.RepositoryMetadata) (string, string) { + if res == nil || len(res.Locations) == 0 { + return "", "" + } + loc := res.Locations[0] + if loc.PhysicalLocation == nil { + return "", "" + } + art := loc.PhysicalLocation.ArtifactLocation + if art == nil || art.URI == nil { + return "", "" + } + rawURI := strings.TrimSpace(*art.URI) + if rawURI == "" { + return "", "" + } + + // Use shared function to get repo-relative path + repoPath := ConvertToRepoRelativePath(rawURI, repoMetadata, absSourceFolder) + + // Calculate local path for file operations (snippet hashing, etc.) + localPath := CalculateLocalPath(rawURI, repoMetadata, absSourceFolder) + + return repoPath, localPath +} + +// CalculateLocalPath determines the absolute local filesystem path for a SARIF URI. +// This is used for reading files for snippet hashing and other local file operations. +func CalculateLocalPath(rawURI string, repoMetadata *git.RepositoryMetadata, absSourceFolder string) string { + subfolder := NormalisedSubfolder(repoMetadata) + var repoRoot string + if repoMetadata != nil && strings.TrimSpace(repoMetadata.RepoRootFolder) != "" { + repoRoot = filepath.Clean(repoMetadata.RepoRootFolder) + } + absSource := strings.TrimSpace(absSourceFolder) + if absSource != "" { + if abs, err := filepath.Abs(absSource); err == nil { + absSource = abs + } else { + absSource = filepath.Clean(absSource) + } + } + + // Normalize URI to the host OS path representation + osURI := filepath.FromSlash(rawURI) + osURI = strings.TrimPrefix(osURI, "file://") + cleanURI := filepath.Clean(osURI) + + if filepath.IsAbs(cleanURI) { + return cleanURI + } + + // Relative path - resolve to absolute + return ResolveRelativeLocalPath(cleanURI, repoRoot, subfolder, absSource) +} + +// ExtractRegionFromResult returns start and end line numbers (0 when not present) +// taken from the SARIF result's first location region. +func ExtractRegionFromResult(res *sarif.Result) (int, int) { + if res == nil || len(res.Locations) == 0 { + return 0, 0 + } + loc := res.Locations[0] + if loc.PhysicalLocation == nil || loc.PhysicalLocation.Region == nil { + return 0, 0 + } + start := 0 + end := 0 + if loc.PhysicalLocation.Region.StartLine != nil { + start = *loc.PhysicalLocation.Region.StartLine + } + if loc.PhysicalLocation.Region.EndLine != nil { + end = *loc.PhysicalLocation.Region.EndLine + } + return start, end +} diff --git a/internal/sarif/path_helpers_test.go b/internal/sarif/path_helpers_test.go index 09af5688..bbd9e073 100644 --- a/internal/sarif/path_helpers_test.go +++ b/internal/sarif/path_helpers_test.go @@ -6,6 +6,7 @@ import ( "strings" "testing" + "github.com/owenrumney/go-sarif/v2/sarif" "github.com/scan-io-git/scan-io/internal/git" ) @@ -522,3 +523,102 @@ func TestBuildGitHubPermalink(t *testing.T) { }) } } + +func TestExtractFileURIFromResult(t *testing.T) { + tempDir, err := os.MkdirTemp("", "sarif_extract") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + repoRoot := filepath.Join(tempDir, "scanio-test") + subfolder := filepath.Join(repoRoot, "apps", "demo") + if err := os.MkdirAll(subfolder, 0o755); err != nil { + t.Fatalf("Failed to create subfolder: %v", err) + } + + absoluteFile := filepath.Join(subfolder, "main.py") + metadata := &git.RepositoryMetadata{ + RepoRootFolder: repoRoot, + Subfolder: filepath.ToSlash(filepath.Join("apps", "demo")), + } + + if err := os.WriteFile(absoluteFile, []byte("print('demo')\n"), 0o644); err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + + tests := []struct { + name string + uri string + meta *git.RepositoryMetadata + expectedRepo string + expectedLocal string + sourceFolder string + }{ + { + name: "absolute URI with metadata", + uri: absoluteFile, + meta: metadata, + expectedRepo: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), + expectedLocal: absoluteFile, + sourceFolder: subfolder, + }, + { + name: "relative URI with metadata", + uri: "main.py", + meta: metadata, + expectedRepo: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), + expectedLocal: filepath.Join(repoRoot, "apps", "demo", "main.py"), + sourceFolder: subfolder, + }, + { + name: "relative URI with parent segments", + uri: filepath.ToSlash(filepath.Join("..", "scanio-test", "apps", "demo", "main.py")), + meta: metadata, + expectedRepo: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), + expectedLocal: filepath.Join(repoRoot, "apps", "demo", "main.py"), + sourceFolder: subfolder, + }, + { + name: "relative URI already prefixed", + uri: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), + meta: metadata, + expectedRepo: filepath.ToSlash(filepath.Join("apps", "demo", "main.py")), + expectedLocal: filepath.Join(repoRoot, "apps", "demo", "main.py"), + sourceFolder: subfolder, + }, + { + name: "absolute URI without metadata falls back to source folder", + uri: absoluteFile, + meta: nil, + expectedRepo: "main.py", + expectedLocal: absoluteFile, + sourceFolder: subfolder, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + uri := tt.uri + result := &sarif.Result{ + Locations: []*sarif.Location{ + { + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: &uri, + }, + }, + }, + }, + } + + repoPath, localPath := ExtractFileURIFromResult(result, tt.sourceFolder, tt.meta) + if repoPath != tt.expectedRepo { + t.Fatalf("expected repo path %q, got %q", tt.expectedRepo, repoPath) + } + if localPath != tt.expectedLocal { + t.Fatalf("expected local path %q, got %q", tt.expectedLocal, localPath) + } + }) + } +} From f99bfdbafc64d0dd0d6203e74f19e5a73cc8461d Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sun, 12 Oct 2025 09:19:25 +0200 Subject: [PATCH 40/52] refactor: move computeSnippetHash function to issuecorrelation package --- cmd/sarif-issues/issue_processing.go | 2 +- cmd/sarif-issues/utils.go | 32 ---- cmd/sarif-issues/utils_test.go | 246 -------------------------- pkg/issuecorrelation/snippet.go | 39 ++++ pkg/issuecorrelation/snippet_test.go | 254 +++++++++++++++++++++++++++ 5 files changed, 294 insertions(+), 279 deletions(-) create mode 100644 pkg/issuecorrelation/snippet.go create mode 100644 pkg/issuecorrelation/snippet_test.go diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go index 10c02a94..dc8db977 100644 --- a/cmd/sarif-issues/issue_processing.go +++ b/cmd/sarif-issues/issue_processing.go @@ -222,7 +222,7 @@ func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, s lg.Warn("SARIF result missing line information", "rule_id", ruleID, "file", fileURI) } - snippetHash := computeSnippetHash(localPath, line, endLine) + snippetHash := issuecorrelation.ComputeSnippetHash(localPath, line, endLine) if snippetHash == "" && fileURI != "" && line > 0 { lg.Warn("failed to compute snippet hash", "rule_id", ruleID, "file", fileURI, "line", line, "local_path", localPath) } diff --git a/cmd/sarif-issues/utils.go b/cmd/sarif-issues/utils.go index bdcf279b..64e43f95 100644 --- a/cmd/sarif-issues/utils.go +++ b/cmd/sarif-issues/utils.go @@ -1,7 +1,6 @@ package sarifissues import ( - "crypto/sha256" "fmt" "os" "path/filepath" @@ -129,37 +128,6 @@ func buildIssueTitle(scannerName, severity, ruleID, fileURI string, line, endLin return fmt.Sprintf("%s at %s", title, fileURI) } -// computeSnippetHash reads the snippet (single line or range) from a local filesystem path -// and returns its SHA256 hex string. Returns empty string on any error or if inputs are invalid. -func computeSnippetHash(localPath string, line, endLine int) string { - if strings.TrimSpace(localPath) == "" || line <= 0 { - return "" - } - data, err := os.ReadFile(localPath) - if err != nil { - return "" - } - lines := strings.Split(string(data), "\n") - start := line - end := line - if endLine > line { - end = endLine - } - // Validate bounds (1-based line numbers) - if start < 1 || start > len(lines) { - return "" - } - if end > len(lines) { - end = len(lines) - } - if end < start { - return "" - } - snippet := strings.Join(lines[start-1:end], "\n") - sum := sha256.Sum256([]byte(snippet)) - return fmt.Sprintf("%x", sum[:]) -} - // parseRuleHelpMarkdown removes promotional content from help markdown and splits // it into the descriptive details and a list of reference bullet points. func parseRuleHelpMarkdown(markdown string) (string, []string) { diff --git a/cmd/sarif-issues/utils_test.go b/cmd/sarif-issues/utils_test.go index 2aa72fee..4dc597c2 100644 --- a/cmd/sarif-issues/utils_test.go +++ b/cmd/sarif-issues/utils_test.go @@ -1,7 +1,6 @@ package sarifissues import ( - "crypto/sha256" "fmt" "os" "path/filepath" @@ -147,251 +146,6 @@ func TestDisplaySeverity(t *testing.T) { } } -func TestComputeSnippetHash(t *testing.T) { - // Create a temporary directory for test files - tempDir, err := os.MkdirTemp("", "snippet_hash_test") - if err != nil { - t.Fatalf("Failed to create temp directory: %v", err) - } - defer os.RemoveAll(tempDir) - - // Create test files with known content - testFileContent := `line 1 -line 2 -line 3 -line 4 -line 5` - - testFilePath := filepath.Join(tempDir, "test.txt") - err = os.WriteFile(testFilePath, []byte(testFileContent), 0644) - if err != nil { - t.Fatalf("Failed to create test file: %v", err) - } - - // Create another test file with different content - singleLineContent := "single line content" - singleLineFilePath := filepath.Join(tempDir, "single.txt") - err = os.WriteFile(singleLineFilePath, []byte(singleLineContent), 0644) - if err != nil { - t.Fatalf("Failed to create single line test file: %v", err) - } - - // Create empty file - emptyFilePath := filepath.Join(tempDir, "empty.txt") - err = os.WriteFile(emptyFilePath, []byte(""), 0644) - if err != nil { - t.Fatalf("Failed to create empty test file: %v", err) - } - - // Helper function to compute expected hash - computeExpectedHash := func(content string) string { - sum := sha256.Sum256([]byte(content)) - return fmt.Sprintf("%x", sum[:]) - } - - tests := []struct { - name string - localPath string - line int - endLine int - expected string - }{ - // Valid cases - { - name: "single line from middle", - localPath: testFilePath, - line: 2, - endLine: 2, - expected: computeExpectedHash("line 2"), - }, - { - name: "multiple lines range", - localPath: testFilePath, - line: 2, - endLine: 4, - expected: computeExpectedHash("line 2\nline 3\nline 4"), - }, - { - name: "first line only", - localPath: testFilePath, - line: 1, - endLine: 1, - expected: computeExpectedHash("line 1"), - }, - { - name: "last line only", - localPath: testFilePath, - line: 5, - endLine: 5, - expected: computeExpectedHash("line 5"), - }, - { - name: "entire file", - localPath: testFilePath, - line: 1, - endLine: 5, - expected: computeExpectedHash(testFileContent), - }, - { - name: "single line file", - localPath: singleLineFilePath, - line: 1, - endLine: 1, - expected: computeExpectedHash(singleLineContent), - }, - { - name: "endLine same as line (no range)", - localPath: testFilePath, - line: 3, - endLine: 3, - expected: computeExpectedHash("line 3"), - }, - { - name: "endLine less than line (should use single line)", - localPath: testFilePath, - line: 3, - endLine: 2, - expected: computeExpectedHash("line 3"), - }, - - // Edge cases that should return empty string - { - name: "empty path", - localPath: "", - line: 1, - endLine: 1, - expected: "", - }, - { - name: "zero line number", - localPath: testFilePath, - line: 0, - endLine: 1, - expected: "", - }, - { - name: "negative line number", - localPath: testFilePath, - line: -1, - endLine: 1, - expected: "", - }, - { - name: "line number beyond file length", - localPath: testFilePath, - line: 10, - endLine: 10, - expected: "", - }, - { - name: "file does not exist", - localPath: filepath.Join(tempDir, "nonexistent.txt"), - line: 1, - endLine: 1, - expected: "", - }, - - // Boundary cases - { - name: "endLine beyond file length (should clamp)", - localPath: testFilePath, - line: 4, - endLine: 10, - expected: computeExpectedHash("line 4\nline 5"), - }, - { - name: "empty file", - localPath: emptyFilePath, - line: 1, - endLine: 1, - expected: computeExpectedHash(""), // Empty file has one empty line - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := computeSnippetHash(tt.localPath, tt.line, tt.endLine) - if result != tt.expected { - t.Errorf("computeSnippetHash(%q, %d, %d) = %q, want %q", - tt.localPath, tt.line, tt.endLine, result, tt.expected) - } - }) - } -} - -// TestComputeSnippetHash_DifferentContentDifferentHash tests that different -// content produces different hashes -func TestComputeSnippetHash_DifferentContentDifferentHash(t *testing.T) { - // Create a temporary directory for test files - tempDir, err := os.MkdirTemp("", "snippet_hash_different_test") - if err != nil { - t.Fatalf("Failed to create temp directory: %v", err) - } - defer os.RemoveAll(tempDir) - - // Create two files with different content - file1Path := filepath.Join(tempDir, "file1.txt") - file2Path := filepath.Join(tempDir, "file2.txt") - - err = os.WriteFile(file1Path, []byte("content A"), 0644) - if err != nil { - t.Fatalf("Failed to create file1: %v", err) - } - - err = os.WriteFile(file2Path, []byte("content B"), 0644) - if err != nil { - t.Fatalf("Failed to create file2: %v", err) - } - - hash1 := computeSnippetHash(file1Path, 1, 1) - hash2 := computeSnippetHash(file2Path, 1, 1) - - if hash1 == hash2 { - t.Errorf("Different content produced same hash: %q", hash1) - } - - if hash1 == "" || hash2 == "" { - t.Error("One or both hashes were empty") - } -} - -// TestComputeSnippetHash_SameContentSameHash tests that identical content -// produces identical hashes regardless of file name -func TestComputeSnippetHash_SameContentSameHash(t *testing.T) { - // Create a temporary directory for test files - tempDir, err := os.MkdirTemp("", "snippet_hash_same_test") - if err != nil { - t.Fatalf("Failed to create temp directory: %v", err) - } - defer os.RemoveAll(tempDir) - - // Create two files with identical content but different names - content := "identical content\nline 2\nline 3" - file1Path := filepath.Join(tempDir, "identical1.txt") - file2Path := filepath.Join(tempDir, "identical2.txt") - - err = os.WriteFile(file1Path, []byte(content), 0644) - if err != nil { - t.Fatalf("Failed to create file1: %v", err) - } - - err = os.WriteFile(file2Path, []byte(content), 0644) - if err != nil { - t.Fatalf("Failed to create file2: %v", err) - } - - hash1 := computeSnippetHash(file1Path, 1, 2) - hash2 := computeSnippetHash(file2Path, 1, 2) - - if hash1 != hash2 { - t.Errorf("Identical content produced different hashes: %q vs %q", hash1, hash2) - } - - if hash1 == "" { - t.Error("Hash was empty for valid content") - } -} - func TestBuildGitHubPermalink(t *testing.T) { fileURI := filepath.ToSlash(filepath.Join("apps", "demo", "main.py")) options := RunOptions{ diff --git a/pkg/issuecorrelation/snippet.go b/pkg/issuecorrelation/snippet.go new file mode 100644 index 00000000..a0e11d28 --- /dev/null +++ b/pkg/issuecorrelation/snippet.go @@ -0,0 +1,39 @@ +package issuecorrelation + +import ( + "crypto/sha256" + "fmt" + "os" + "strings" +) + +// ComputeSnippetHash reads the snippet (single line or range) from a local filesystem path +// and returns its SHA256 hex string. Returns empty string on any error or if inputs are invalid. +func ComputeSnippetHash(localPath string, line, endLine int) string { + if strings.TrimSpace(localPath) == "" || line <= 0 { + return "" + } + data, err := os.ReadFile(localPath) + if err != nil { + return "" + } + lines := strings.Split(string(data), "\n") + start := line + end := line + if endLine > line { + end = endLine + } + // Validate bounds (1-based line numbers) + if start < 1 || start > len(lines) { + return "" + } + if end > len(lines) { + end = len(lines) + } + if end < start { + return "" + } + snippet := strings.Join(lines[start-1:end], "\n") + sum := sha256.Sum256([]byte(snippet)) + return fmt.Sprintf("%x", sum[:]) +} diff --git a/pkg/issuecorrelation/snippet_test.go b/pkg/issuecorrelation/snippet_test.go new file mode 100644 index 00000000..4486970f --- /dev/null +++ b/pkg/issuecorrelation/snippet_test.go @@ -0,0 +1,254 @@ +package issuecorrelation + +import ( + "crypto/sha256" + "fmt" + "os" + "path/filepath" + "testing" +) + +func TestComputeSnippetHash(t *testing.T) { + // Create a temporary directory for test files + tempDir, err := os.MkdirTemp("", "snippet_hash_test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create test files with known content + testFileContent := `line 1 +line 2 +line 3 +line 4 +line 5` + + testFilePath := filepath.Join(tempDir, "test.txt") + err = os.WriteFile(testFilePath, []byte(testFileContent), 0644) + if err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + + // Create another test file with different content + singleLineContent := "single line content" + singleLineFilePath := filepath.Join(tempDir, "single.txt") + err = os.WriteFile(singleLineFilePath, []byte(singleLineContent), 0644) + if err != nil { + t.Fatalf("Failed to create single line test file: %v", err) + } + + // Create empty file + emptyFilePath := filepath.Join(tempDir, "empty.txt") + err = os.WriteFile(emptyFilePath, []byte(""), 0644) + if err != nil { + t.Fatalf("Failed to create empty test file: %v", err) + } + + // Helper function to compute expected hash + computeExpectedHash := func(content string) string { + sum := sha256.Sum256([]byte(content)) + return fmt.Sprintf("%x", sum[:]) + } + + tests := []struct { + name string + localPath string + line int + endLine int + expected string + }{ + // Valid cases + { + name: "single line from middle", + localPath: testFilePath, + line: 2, + endLine: 2, + expected: computeExpectedHash("line 2"), + }, + { + name: "multiple lines range", + localPath: testFilePath, + line: 2, + endLine: 4, + expected: computeExpectedHash("line 2\nline 3\nline 4"), + }, + { + name: "first line only", + localPath: testFilePath, + line: 1, + endLine: 1, + expected: computeExpectedHash("line 1"), + }, + { + name: "last line only", + localPath: testFilePath, + line: 5, + endLine: 5, + expected: computeExpectedHash("line 5"), + }, + { + name: "entire file", + localPath: testFilePath, + line: 1, + endLine: 5, + expected: computeExpectedHash(testFileContent), + }, + { + name: "single line file", + localPath: singleLineFilePath, + line: 1, + endLine: 1, + expected: computeExpectedHash(singleLineContent), + }, + { + name: "endLine same as line (no range)", + localPath: testFilePath, + line: 3, + endLine: 3, + expected: computeExpectedHash("line 3"), + }, + { + name: "endLine less than line (should use single line)", + localPath: testFilePath, + line: 3, + endLine: 2, + expected: computeExpectedHash("line 3"), + }, + + // Edge cases that should return empty string + { + name: "empty path", + localPath: "", + line: 1, + endLine: 1, + expected: "", + }, + { + name: "zero line number", + localPath: testFilePath, + line: 0, + endLine: 1, + expected: "", + }, + { + name: "negative line number", + localPath: testFilePath, + line: -1, + endLine: 1, + expected: "", + }, + { + name: "line number beyond file length", + localPath: testFilePath, + line: 10, + endLine: 10, + expected: "", + }, + { + name: "file does not exist", + localPath: filepath.Join(tempDir, "nonexistent.txt"), + line: 1, + endLine: 1, + expected: "", + }, + + // Boundary cases + { + name: "endLine beyond file length (should clamp)", + localPath: testFilePath, + line: 4, + endLine: 10, + expected: computeExpectedHash("line 4\nline 5"), + }, + { + name: "empty file", + localPath: emptyFilePath, + line: 1, + endLine: 1, + expected: computeExpectedHash(""), // Empty file has one empty line + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := ComputeSnippetHash(tt.localPath, tt.line, tt.endLine) + if result != tt.expected { + t.Errorf("ComputeSnippetHash(%q, %d, %d) = %q, want %q", + tt.localPath, tt.line, tt.endLine, result, tt.expected) + } + }) + } +} + +// TestComputeSnippetHash_DifferentContentDifferentHash tests that different +// content produces different hashes +func TestComputeSnippetHash_DifferentContentDifferentHash(t *testing.T) { + // Create a temporary directory for test files + tempDir, err := os.MkdirTemp("", "snippet_hash_different_test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create two files with different content + file1Path := filepath.Join(tempDir, "file1.txt") + file2Path := filepath.Join(tempDir, "file2.txt") + + err = os.WriteFile(file1Path, []byte("content A"), 0644) + if err != nil { + t.Fatalf("Failed to create file1: %v", err) + } + + err = os.WriteFile(file2Path, []byte("content B"), 0644) + if err != nil { + t.Fatalf("Failed to create file2: %v", err) + } + + hash1 := ComputeSnippetHash(file1Path, 1, 1) + hash2 := ComputeSnippetHash(file2Path, 1, 1) + + if hash1 == hash2 { + t.Errorf("Different content produced same hash: %q", hash1) + } + + if hash1 == "" || hash2 == "" { + t.Error("One or both hashes were empty") + } +} + +// TestComputeSnippetHash_SameContentSameHash tests that identical content +// produces identical hashes regardless of file name +func TestComputeSnippetHash_SameContentSameHash(t *testing.T) { + // Create a temporary directory for test files + tempDir, err := os.MkdirTemp("", "snippet_hash_same_test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create two files with identical content but different names + content := "identical content\nline 2\nline 3" + file1Path := filepath.Join(tempDir, "identical1.txt") + file2Path := filepath.Join(tempDir, "identical2.txt") + + err = os.WriteFile(file1Path, []byte(content), 0644) + if err != nil { + t.Fatalf("Failed to create file1: %v", err) + } + + err = os.WriteFile(file2Path, []byte(content), 0644) + if err != nil { + t.Fatalf("Failed to create file2: %v", err) + } + + hash1 := ComputeSnippetHash(file1Path, 1, 2) + hash2 := ComputeSnippetHash(file2Path, 1, 2) + + if hash1 != hash2 { + t.Errorf("Identical content produced different hashes: %q vs %q", hash1, hash2) + } + + if hash1 == "" { + t.Error("Hash was empty for valid content") + } +} From f4bc9267516890b2be70552895fc0b5439ce15b3 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sun, 12 Oct 2025 09:44:24 +0200 Subject: [PATCH 41/52] refactor: consolidate displayRuleHeading logic into a single function and update tests accordingly --- cmd/sarif-issues/issue_processing.go | 22 +---------- cmd/sarif-issues/issue_processing_test.go | 35 ----------------- internal/sarif/path_helpers.go | 22 +++++++++++ internal/sarif/path_helpers_test.go | 47 +++++++++++++++++++++++ 4 files changed, 70 insertions(+), 56 deletions(-) diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go index dc8db977..47fa5506 100644 --- a/cmd/sarif-issues/issue_processing.go +++ b/cmd/sarif-issues/issue_processing.go @@ -245,7 +245,7 @@ func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, s // New body header and compact metadata blockquote header := "" - if h := displayRuleHeading(ruleID, ruleDescriptor); strings.TrimSpace(h) != "" { + if h := internalsarif.DisplayRuleHeading(ruleDescriptor); strings.TrimSpace(h) != "" { header = fmt.Sprintf("## 🐞 %s\n\n", h) } scannerDisp := scannerName @@ -360,26 +360,6 @@ func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, s return newIssueData } -// displayRuleHeading returns the preferred human-friendly rule heading for the issue body: -// 1. rule.ShortDescription.Text when available. -// 2. rule.Name when available. -// 3. ruleID as a fallback. -func displayRuleHeading(ruleID string, rule *sarif.ReportingDescriptor) string { - if rule != nil { - if rule.ShortDescription != nil && rule.ShortDescription.Text != nil { - if heading := strings.TrimSpace(*rule.ShortDescription.Text); heading != "" { - return heading - } - } - if rule.Name != nil { - if heading := strings.TrimSpace(*rule.Name); heading != "" { - return heading - } - } - } - return strings.TrimSpace(ruleID) -} - // displayRuleTitleComponent returns the identifier segment to embed in the GitHub issue title. // Prefers rule.Name when available; falls back to ruleID. func displayRuleTitleComponent(ruleID string, rule *sarif.ReportingDescriptor) string { diff --git a/cmd/sarif-issues/issue_processing_test.go b/cmd/sarif-issues/issue_processing_test.go index b1928286..967ed712 100644 --- a/cmd/sarif-issues/issue_processing_test.go +++ b/cmd/sarif-issues/issue_processing_test.go @@ -54,41 +54,6 @@ func TestParseIssueBodyFallsBackToHeaderRuleID(t *testing.T) { } } -func TestDisplayRuleHeadingPrefersShortDescription(t *testing.T) { - text := "Short desc" - name := "Rule Name" - rule := &sarif.ReportingDescriptor{ - ShortDescription: &sarif.MultiformatMessageString{ - Text: &text, - }, - Name: &name, - } - - got := displayRuleHeading("rule.id", rule) - if got != "Short desc" { - t.Fatalf("expected short description heading, got %q", got) - } -} - -func TestDisplayRuleHeadingFallsBackToName(t *testing.T) { - name := "Rule Name" - rule := &sarif.ReportingDescriptor{ - Name: &name, - } - - got := displayRuleHeading("rule.id", rule) - if got != "Rule Name" { - t.Fatalf("expected name fallback heading, got %q", got) - } -} - -func TestDisplayRuleHeadingFallsBackToID(t *testing.T) { - got := displayRuleHeading("rule.id", nil) - if got != "rule.id" { - t.Fatalf("expected rule id fallback heading, got %q", got) - } -} - func TestDisplayRuleTitleComponentPrefersName(t *testing.T) { name := "Descriptive Rule" rule := &sarif.ReportingDescriptor{ diff --git a/internal/sarif/path_helpers.go b/internal/sarif/path_helpers.go index 4f138b29..ca62af3f 100644 --- a/internal/sarif/path_helpers.go +++ b/internal/sarif/path_helpers.go @@ -315,3 +315,25 @@ func ExtractRegionFromResult(res *sarif.Result) (int, int) { } return start, end } + +// DisplayRuleHeading returns the preferred human-friendly rule heading for the issue body: +// 1. rule.ShortDescription.Text when available. +// 2. rule.Name when available. +// 3. rule.ID as a fallback. +func DisplayRuleHeading(rule *sarif.ReportingDescriptor) string { + if rule != nil { + if rule.ShortDescription != nil && rule.ShortDescription.Text != nil { + if heading := strings.TrimSpace(*rule.ShortDescription.Text); heading != "" { + return heading + } + } + if rule.Name != nil { + if heading := strings.TrimSpace(*rule.Name); heading != "" { + return heading + } + } + // Parse ruleId from rule.ID instead of separate parameter + return strings.TrimSpace(rule.ID) + } + return "" +} diff --git a/internal/sarif/path_helpers_test.go b/internal/sarif/path_helpers_test.go index bbd9e073..8383b597 100644 --- a/internal/sarif/path_helpers_test.go +++ b/internal/sarif/path_helpers_test.go @@ -622,3 +622,50 @@ func TestExtractFileURIFromResult(t *testing.T) { }) } } + +func TestDisplayRuleHeadingPrefersShortDescription(t *testing.T) { + text := "Short desc" + name := "Rule Name" + rule := &sarif.ReportingDescriptor{ + ShortDescription: &sarif.MultiformatMessageString{ + Text: &text, + }, + Name: &name, + } + + got := DisplayRuleHeading(rule) + if got != "Short desc" { + t.Fatalf("expected short description heading, got %q", got) + } +} + +func TestDisplayRuleHeadingFallsBackToName(t *testing.T) { + name := "Rule Name" + rule := &sarif.ReportingDescriptor{ + Name: &name, + } + + got := DisplayRuleHeading(rule) + if got != "Rule Name" { + t.Fatalf("expected name fallback heading, got %q", got) + } +} + +func TestDisplayRuleHeadingFallsBackToID(t *testing.T) { + id := "rule.id" + rule := &sarif.ReportingDescriptor{ + ID: id, + } + + got := DisplayRuleHeading(rule) + if got != "rule.id" { + t.Fatalf("expected rule id fallback heading, got %q", got) + } +} + +func TestDisplayRuleHeadingReturnsEmptyForNil(t *testing.T) { + got := DisplayRuleHeading(nil) + if got != "" { + t.Fatalf("expected empty string for nil rule, got %q", got) + } +} From 87c94a738a58b1c4db72aec35786a2f4dc9ec8df Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sun, 12 Oct 2025 17:21:18 +0200 Subject: [PATCH 42/52] feat: add default source-folder handling for SARIF issues command --- cmd/sarif-issues/sarif-issues.go | 14 +- cmd/sarif-issues/utils_test.go | 184 ++++++++++++++++++ .../engineering/sarif-issues-path-analysis.md | 13 +- 3 files changed, 209 insertions(+), 2 deletions(-) diff --git a/cmd/sarif-issues/sarif-issues.go b/cmd/sarif-issues/sarif-issues.go index bc898ffe..32b9d4db 100644 --- a/cmd/sarif-issues/sarif-issues.go +++ b/cmd/sarif-issues/sarif-issues.go @@ -93,6 +93,12 @@ func runSarifIssues(cmd *cobra.Command, args []string) error { // 4. Handle git metadata fallbacks ApplyGitMetadataFallbacks(&opts, lg) + // 4.5. Default source-folder to current directory when empty + if strings.TrimSpace(opts.SourceFolder) == "" { + opts.SourceFolder = "." + lg.Debug("defaulted source-folder to current directory") + } + // 5. Validate arguments if err := validate(&opts); err != nil { lg.Error("invalid arguments", "error", err) @@ -158,7 +164,13 @@ func resolveRepositoryMetadata(sourceFolderAbs string, lg hclog.Logger) *git.Rep md, err := git.CollectRepositoryMetadata(sourceFolderAbs) if err != nil { - lg.Debug("unable to collect repository metadata", "error", err) + // If we defaulted to current directory and git metadata collection fails, + // log a concise warning but don't fail hard (preserve existing error guidance) + if sourceFolderAbs == "." { + lg.Warn("unable to collect git metadata from current directory - snippet hashes may not be computed") + } else { + lg.Debug("unable to collect repository metadata", "error", err) + } } return md } diff --git a/cmd/sarif-issues/utils_test.go b/cmd/sarif-issues/utils_test.go index 4dc597c2..db13f6de 100644 --- a/cmd/sarif-issues/utils_test.go +++ b/cmd/sarif-issues/utils_test.go @@ -1502,6 +1502,190 @@ https://github.com/test-org/test-repo/blob/test-ref/main.py#L2 } } +// TestDefaultSourceFolderRegression tests the fix for the issue where omitting --source-folder +// causes snippet hash computation to fail. This validates the documented workflow: +// "Run inside git repository (auto-detects namespace, repository, ref)" +func TestDefaultSourceFolderRegression(t *testing.T) { + // Create a temporary directory structure that mimics a git repository + tempDir, err := os.MkdirTemp("", "sarif-test-*") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create a git repository structure + repoRoot := filepath.Join(tempDir, "test-repo") + if err := os.Mkdir(repoRoot, 0755); err != nil { + t.Fatalf("failed to create repo root: %v", err) + } + + // Create a test file with content for snippet hashing + testFile := filepath.Join(repoRoot, "main.py") + var builder strings.Builder + for i := 1; i <= 30; i++ { + builder.WriteString(fmt.Sprintf("line %d\n", i)) + } + if err := os.WriteFile(testFile, []byte(builder.String()), 0o644); err != nil { + t.Fatalf("failed to write test file: %v", err) + } + + // Initialize git repository + gitDir := filepath.Join(repoRoot, ".git") + if err := os.Mkdir(gitDir, 0755); err != nil { + t.Fatalf("failed to create .git dir: %v", err) + } + + // Create a mock git config file + configFile := filepath.Join(gitDir, "config") + configContent := `[remote "origin"] + url = https://github.com/test-org/test-repo.git` + if err := os.WriteFile(configFile, []byte(configContent), 0o644); err != nil { + t.Fatalf("failed to write git config: %v", err) + } + + // Create a mock HEAD file + headFile := filepath.Join(gitDir, "HEAD") + if err := os.WriteFile(headFile, []byte("ref: refs/heads/main\n"), 0o644); err != nil { + t.Fatalf("failed to write HEAD file: %v", err) + } + + // Create refs/heads directory and main branch file + refsDir := filepath.Join(gitDir, "refs", "heads") + if err := os.MkdirAll(refsDir, 0755); err != nil { + t.Fatalf("failed to create refs dir: %v", err) + } + mainBranchFile := filepath.Join(refsDir, "main") + commitHash := "aec0b795c350ff53fe9ab01adf862408aa34c3fd" + if err := os.WriteFile(mainBranchFile, []byte(commitHash+"\n"), 0o644); err != nil { + t.Fatalf("failed to write main branch file: %v", err) + } + + // Change to the repository directory to simulate running from inside the repo + originalDir, err := os.Getwd() + if err != nil { + t.Fatalf("failed to get current directory: %v", err) + } + defer os.Chdir(originalDir) + + if err := os.Chdir(repoRoot); err != nil { + t.Fatalf("failed to change to repo directory: %v", err) + } + + // Create test logger + logger := hclog.NewNullLogger() + + // Create repository metadata (simulating successful git metadata collection) + metadata := &git.RepositoryMetadata{ + RepoRootFolder: repoRoot, + Subfolder: "", // No subfolder when running from root + CommitHash: &commitHash, + RepositoryFullName: stringPtr("test-org/test-repo"), + } + + // Create SARIF result pointing to our test file + ruleID := "test.rule" + uriValue := "main.py" // Relative URI as would be in SARIF when scanning from repo root + startLine := 11 + endLine := 29 + message := "Test finding" + + result := &sarif.Result{ + RuleID: &ruleID, + Message: sarif.Message{ + Text: &message, + }, + Locations: []*sarif.Location{ + { + PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{ + URI: &uriValue, + }, + Region: &sarif.Region{ + StartLine: &startLine, + EndLine: &endLine, + }, + }, + }, + }, + } + result.PropertyBag = *sarif.NewPropertyBag() + result.Add("Level", "error") + + // Create SARIF report + report := &internalsarif.Report{ + Report: &sarif.Report{ + Runs: []*sarif.Run{ + { + Tool: sarif.Tool{ + Driver: &sarif.ToolComponent{ + Name: "Test Scanner", + Rules: []*sarif.ReportingDescriptor{ + {ID: ruleID}, + }, + }, + }, + Results: []*sarif.Result{result}, + }, + }, + }, + } + + // Test with empty source folder (simulating omitted --source-folder flag) + options := RunOptions{ + Namespace: "test-org", + Repository: "test-repo", + Ref: commitHash, + SourceFolder: "", // Empty source folder - should default to current directory + } + + // Build issues from SARIF + issues := buildNewIssuesFromSARIF(report, options, repoRoot, metadata, logger) + + // Verify results + if len(issues) == 0 { + t.Fatal("expected at least one issue to be created") + } + + issue := issues[0] + + // Verify snippet hash was computed successfully + if issue.Metadata.SnippetHash == "" { + t.Fatal("expected snippet hash to be computed when source-folder defaults to current directory") + } + + // Verify snippet hash is included in issue body + if !strings.Contains(issue.Body, "Snippet SHA256") { + t.Fatal("expected issue body to contain Snippet SHA256 block") + } + + // Verify the snippet hash in the body matches the metadata + expectedHash := issue.Metadata.SnippetHash + if !strings.Contains(issue.Body, expectedHash) { + t.Fatalf("expected issue body to contain snippet hash %q", expectedHash) + } + + // Verify file path is correctly resolved + expectedRepoPath := "main.py" + if issue.Metadata.Filename != expectedRepoPath { + t.Fatalf("expected repo path %q, got %q", expectedRepoPath, issue.Metadata.Filename) + } + + // Verify permalink is generated correctly + expectedPermalink := fmt.Sprintf("https://github.com/%s/%s/blob/%s/%s#L%d-L%d", + options.Namespace, + options.Repository, + commitHash, + expectedRepoPath, + startLine, + endLine, + ) + if !strings.Contains(issue.Body, expectedPermalink) { + t.Fatalf("expected issue body to contain permalink %q", expectedPermalink) + } + + t.Logf("Successfully computed snippet hash: %s", issue.Metadata.SnippetHash) +} + // Helper functions for creating test data func stringPtr(s string) *string { return &s diff --git a/docs/engineering/sarif-issues-path-analysis.md b/docs/engineering/sarif-issues-path-analysis.md index ff8fc454..534eb11b 100644 --- a/docs/engineering/sarif-issues-path-analysis.md +++ b/docs/engineering/sarif-issues-path-analysis.md @@ -146,4 +146,15 @@ scanio sarif-issues --sarif snyk-another.sarif --source-folder apps/another - When `--source-folder` points to a subfolder, only issues whose file paths start with that subfolder are considered - When scanning from root (no subfolder), all issues are included as before -**Expected Behavior**: Both sets of issues remain open and are managed independently. Issues from `apps/demo` won't be closed when running the second command for `apps/another`. \ No newline at end of file +**Expected Behavior**: Both sets of issues remain open and are managed independently. Issues from `apps/demo` won't be closed when running the second command for `apps/another`. + +## empty source-folder test +```sh +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test + +snyk code test --sarif-file-output=snyk.sarif + +# create issues +scanio sarif-issues --sarif snyk.sarif --source-folder . +scanio sarif-issues --sarif snyk.sarif +``` \ No newline at end of file From 1fa1eedec3041ea5f2c55e48727a325adcd9299f Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sun, 12 Oct 2025 17:28:25 +0200 Subject: [PATCH 43/52] fix: update variable names from opts to options for consistency in issue processing --- cmd/sarif-issues/issue_processing.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go index 47fa5506..826237b3 100644 --- a/cmd/sarif-issues/issue_processing.go +++ b/cmd/sarif-issues/issue_processing.go @@ -487,8 +487,8 @@ func createUnmatchedIssues(unmatchedNew []issuecorrelation.IssueMetadata, newIss }, Title: newTitles[idx], Body: newBodies[idx], - Labels: opts.Labels, - Assignees: opts.Assignees, + Labels: options.Labels, + Assignees: options.Assignees, } err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { From a421b60189dfd420b38a12c645858c94023dc64d Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sun, 12 Oct 2025 17:41:30 +0200 Subject: [PATCH 44/52] refactor: update closeUnmatchedIssues and processSARIFReport functions to return closed issue count and handle logging improvements --- cmd/sarif-issues/issue_processing.go | 19 +++++++++++-------- cmd/sarif-issues/sarif-issues.go | 11 +++++++---- docs/reference/cmd-sarif-issues.md | 9 +++++---- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go index 826237b3..55a708a1 100644 --- a/cmd/sarif-issues/issue_processing.go +++ b/cmd/sarif-issues/issue_processing.go @@ -510,7 +510,8 @@ func createUnmatchedIssues(unmatchedNew []issuecorrelation.IssueMetadata, newIss // closeUnmatchedIssues closes GitHub issues for known findings that don't correlate with current scan results. // Returns an error if any issue closure fails. -func closeUnmatchedIssues(unmatchedKnown []issuecorrelation.IssueMetadata, options RunOptions, lg hclog.Logger) error { +func closeUnmatchedIssues(unmatchedKnown []issuecorrelation.IssueMetadata, options RunOptions, lg hclog.Logger) (int, error) { + closed := 0 for _, k := range unmatchedKnown { // known IssueID contains the number as string num, err := strconv.Atoi(k.IssueID) @@ -567,15 +568,16 @@ func closeUnmatchedIssues(unmatchedKnown []issuecorrelation.IssueMetadata, optio if err != nil { lg.Error("failed to close issue via plugin", "error", err, "number", num) // continue closing others but report an error at end - return errors.NewCommandError(options, nil, fmt.Errorf("close issue failed: %w", err), 2) + return closed, errors.NewCommandError(options, nil, fmt.Errorf("close issue failed: %w", err), 2) } + closed++ } - return nil + return closed, nil } // processSARIFReport iterates runs/results in the SARIF report and creates VCS issues for // high severity findings. Returns number of created issues or an error. -func processSARIFReport(report *internalsarif.Report, options RunOptions, sourceFolderAbs string, repoMetadata *git.RepositoryMetadata, lg hclog.Logger, openIssues map[int]OpenIssueEntry) (int, error) { +func processSARIFReport(report *internalsarif.Report, options RunOptions, sourceFolderAbs string, repoMetadata *git.RepositoryMetadata, lg hclog.Logger, openIssues map[int]OpenIssueEntry) (int, int, error) { // Build list of new issues from SARIF using extracted function newIssueData := buildNewIssuesFromSARIF(report, options, sourceFolderAbs, repoMetadata, lg) @@ -604,14 +606,15 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, source unmatchedNew := corr.UnmatchedNew() created, err := createUnmatchedIssues(unmatchedNew, newIssues, newBodies, newTitles, options, lg) if err != nil { - return created, err + return created, 0, err } // Close unmatched known issues (open issues that did not correlate) unmatchedKnown := corr.UnmatchedKnown() - if err := closeUnmatchedIssues(unmatchedKnown, options, lg); err != nil { - return created, err + closed, err := closeUnmatchedIssues(unmatchedKnown, options, lg) + if err != nil { + return created, closed, err } - return created, nil + return created, closed, nil } diff --git a/cmd/sarif-issues/sarif-issues.go b/cmd/sarif-issues/sarif-issues.go index 32b9d4db..4009be7d 100644 --- a/cmd/sarif-issues/sarif-issues.go +++ b/cmd/sarif-issues/sarif-issues.go @@ -96,7 +96,7 @@ func runSarifIssues(cmd *cobra.Command, args []string) error { // 4.5. Default source-folder to current directory when empty if strings.TrimSpace(opts.SourceFolder) == "" { opts.SourceFolder = "." - lg.Debug("defaulted source-folder to current directory") + lg.Info("no --source-folder provided; defaulting to current directory", "source_folder", opts.SourceFolder) } // 5. Validate arguments @@ -117,6 +117,9 @@ func runSarifIssues(cmd *cobra.Command, args []string) error { // Collect repository metadata to understand repo root vs. subfolder layout repoMetadata := resolveRepositoryMetadata(sourceFolderAbs, lg) + if repoMetadata == nil { + lg.Warn("git metadata unavailable; permalinks and snippet hashing may be degraded", "source_folder", sourceFolderAbs) + } // Enrich to ensure Levels and Titles are present report.EnrichResultsLevelProperty() @@ -131,15 +134,15 @@ func runSarifIssues(cmd *cobra.Command, args []string) error { lg.Info("fetched open issues from repository", "count", len(openIssues)) // 8. Process SARIF report and create/close issues - created, err := processSARIFReport(report, opts, sourceFolderAbs, repoMetadata, lg, openIssues) + created, closed, err := processSARIFReport(report, opts, sourceFolderAbs, repoMetadata, lg, openIssues) if err != nil { lg.Error("failed to process SARIF report", "error", err) return err } // 9. Log success and handle output - lg.Info("issues created from SARIF high severity findings", "count", created) - fmt.Printf("Created %d issue(s) from SARIF high severity findings\n", created) + lg.Info("sarif-issues run completed", "created", created, "closed", closed) + fmt.Printf("Created %d issue(s); closed %d resolved issue(s)\n", created, closed) return nil } diff --git a/docs/reference/cmd-sarif-issues.md b/docs/reference/cmd-sarif-issues.md index c16a904c..c9bfc6b5 100644 --- a/docs/reference/cmd-sarif-issues.md +++ b/docs/reference/cmd-sarif-issues.md @@ -36,7 +36,7 @@ scanio sarif-issues --sarif PATH [--namespace NAMESPACE] [--repository REPO] [-- | `--sarif` | string | Yes | `none` | Path to SARIF report file containing security findings. | | `--namespace` | string | Conditional | `$GITHUB_REPOSITORY_OWNER` | GitHub organization or user name. Required if environment variable not set. | | `--repository` | string | Conditional | `${GITHUB_REPOSITORY#*/}` | Repository name. Required if environment variable not set. | -| `--source-folder` | string | No | `none` | Path to source code folder for improved file path resolution and snippets. | +| `--source-folder` | string | No | `.` | Path to source code folder for improved file path resolution and snippets. | | `--ref` | string | No | `$GITHUB_SHA` | Git ref (branch or commit SHA) for building permalinks to vulnerable code. | | `--labels` | strings | No | `none` | Labels to assign to created GitHub issues (comma-separated or repeat flag). | | `--assignees` | strings | No | `none` | GitHub usernames to assign to created issues (comma-separated or repeat flag). | @@ -84,7 +84,7 @@ For detailed GitHub plugin configuration, refer to [GitHub Plugin Documentation] ## Usage Examples -> **Recommendation:** Run the command from your repository root and pass `--source-folder` as repo-relative paths (for example `--source-folder apps/demo`). This keeps permalinks and snippet hashing consistent across environments. Even when .git repo is corrupted or is missing. +> **Recommendation:** Run the command from your repository root and pass `--source-folder` as repo-relative paths (for example `--source-folder apps/demo`). The flag defaults to `.` when omitted; if git metadata cannot be detected, permalinks and snippet hashing may be incomplete. ### Basic Usage in GitHub Actions Create issues from SARIF report using environment variables: @@ -114,7 +114,7 @@ scanio sarif-issues --sarif results/semgrep.sarif --source-folder . --ref featur ### User Mode Output ``` -Created 3 issue(s) from SARIF high severity findings +Created 3 issue(s); closed 1 resolved issue(s) ``` ### Logging Information @@ -135,7 +135,7 @@ The command implements intelligent issue correlation to manage the lifecycle of ### Automatic Issue Closure - **Resolved Findings**: Automatically closes open issues that don't correlate with current scan results -- **Comment Before Closure**: Adds brief explanatory comment. +- **Comment Before Closure**: Adds comment `Recent scan didn't see the issue; closing this as resolved.`. - **Managed Issues Only**: Only closes issues containing the scanio-managed annotation to avoid affecting manually created issues ### Correlation Criteria @@ -237,6 +237,7 @@ Scanio prefers the SARIF rule's short description for the heading; if that is mi ```markdown ## 🐞 javascript.express.security.audit.express-check-csurf-middleware-usage.express-check-csurf-middleware-usage +> **Rule ID**: javascript.express.security.audit.express-check-csurf-middleware-usage.express-check-csurf-middleware-usage > **Severity**: High, **Scanner**: Semgrep OSS > **File**: app.js, **Lines**: 42-45 From 41c2a59e885b883121543c0596e80f91ab8ef03f Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sun, 12 Oct 2025 18:19:19 +0200 Subject: [PATCH 45/52] feat: add configurable severity levels for SARIF issues command and update documentation --- cmd/sarif-issues/issue_processing.go | 17 ++++++- cmd/sarif-issues/sarif-issues.go | 23 +++++++-- cmd/sarif-issues/utils.go | 74 ++++++++++++++++++++++++++++ docs/reference/README.md | 2 +- docs/reference/cmd-sarif-issues.md | 68 +++++++++++++++++++++++-- 5 files changed, 174 insertions(+), 10 deletions(-) diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go index 55a708a1..32d44eb1 100644 --- a/cmd/sarif-issues/issue_processing.go +++ b/cmd/sarif-issues/issue_processing.go @@ -194,7 +194,7 @@ func buildNewIssuesFromSARIF(report *internalsarif.Report, options RunOptions, s for _, res := range run.Results { level, _ := res.Properties["Level"].(string) - if strings.ToLower(level) != "error" { + if !isLevelAllowed(strings.ToLower(level), options.Levels) { continue } @@ -618,3 +618,18 @@ func processSARIFReport(report *internalsarif.Report, options RunOptions, source return created, closed, nil } + +// isLevelAllowed checks if a SARIF level is in the allowed levels list +func isLevelAllowed(level string, allowedLevels []string) bool { + // If no levels specified, default to "error" for backward compatibility + if allowedLevels == nil || len(allowedLevels) == 0 { + return strings.ToLower(level) == "error" + } + + for _, allowed := range allowedLevels { + if strings.ToLower(level) == strings.ToLower(allowed) { + return true + } + } + return false +} diff --git a/cmd/sarif-issues/sarif-issues.go b/cmd/sarif-issues/sarif-issues.go index 4009be7d..7176adad 100644 --- a/cmd/sarif-issues/sarif-issues.go +++ b/cmd/sarif-issues/sarif-issues.go @@ -32,6 +32,7 @@ type RunOptions struct { Ref string `json:"ref,omitempty"` Labels []string `json:"labels,omitempty"` Assignees []string `json:"assignees,omitempty"` + Levels []string `json:"levels,omitempty"` } var ( @@ -45,9 +46,15 @@ var ( # Run inside git repository (auto-detects namespace, repository, ref) scanio sarif-issues --sarif semgrep-demo.sarif --source-folder apps/demo - # Create issues from SARIF report with basic configuration + # Create issues from SARIF report with basic configuration (default: error level only) scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif + # Create issues for multiple severity levels using SARIF levels + scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --levels error,warning + + # Create issues for multiple severity levels using display levels + scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --levels High,Medium + # Create issues with labels and assignees scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --labels bug,security --assignees alice,bob @@ -62,8 +69,8 @@ var ( // SarifIssuesCmd represents the command to create GitHub issues from a SARIF file. SarifIssuesCmd = &cobra.Command{ - Use: "sarif-issues --sarif PATH [--namespace NAMESPACE] [--repository REPO] [--source-folder PATH] [--ref REF] [--labels label[,label...]] [--assignees user[,user...]]", - Short: "Create GitHub issues for high severity SARIF findings", + Use: "sarif-issues --sarif PATH [--namespace NAMESPACE] [--repository REPO] [--source-folder PATH] [--ref REF] [--labels label[,label...]] [--assignees user[,user...]] [--levels level[,level...]]", + Short: "Create GitHub issues for SARIF findings with configurable severity levels", Example: exampleSarifIssuesUsage, SilenceUsage: false, Hidden: false, @@ -99,6 +106,15 @@ func runSarifIssues(cmd *cobra.Command, args []string) error { lg.Info("no --source-folder provided; defaulting to current directory", "source_folder", opts.SourceFolder) } + // 4.6. Validate and normalize severity levels + normalizedLevels, err := normalizeAndValidateLevels(opts.Levels) + if err != nil { + lg.Error("invalid severity levels", "error", err) + return errors.NewCommandError(opts, nil, fmt.Errorf("invalid severity levels: %w", err), 1) + } + opts.Levels = normalizedLevels + lg.Debug("normalized severity levels", "levels", opts.Levels) + // 5. Validate arguments if err := validate(&opts); err != nil { lg.Error("invalid arguments", "error", err) @@ -157,6 +173,7 @@ func init() { SarifIssuesCmd.Flags().StringSliceVar(&opts.Labels, "labels", nil, "Optional: labels to assign to created GitHub issues (repeat flag or use comma-separated values)") // --assignees supports multiple usages or comma-separated values SarifIssuesCmd.Flags().StringSliceVar(&opts.Assignees, "assignees", nil, "Optional: assignees (GitHub logins) to assign to created issues (repeat flag or use comma-separated values)") + SarifIssuesCmd.Flags().StringSliceVar(&opts.Levels, "levels", []string{"error"}, "SARIF severity levels to process: SARIF levels (error, warning, note, none) or display levels (High, Medium, Low, Info). Cannot mix formats. (repeat flag or use comma-separated values)") SarifIssuesCmd.Flags().BoolP("help", "h", false, "Show help for sarif-issues command.") } diff --git a/cmd/sarif-issues/utils.go b/cmd/sarif-issues/utils.go index 64e43f95..77ad9bfb 100644 --- a/cmd/sarif-issues/utils.go +++ b/cmd/sarif-issues/utils.go @@ -59,6 +59,80 @@ func displaySeverity(level string) string { } } +// normalizeAndValidateLevels validates and normalizes severity levels input. +// Accepts both SARIF levels (error, warning, note, none) and display levels (High, Medium, Low, Info). +// Returns normalized SARIF levels and an error if mixing formats is detected. +func normalizeAndValidateLevels(levels []string) ([]string, error) { + if len(levels) == 0 { + return []string{"error"}, nil + } + + var sarifLevels []string + var displayLevels []string + var normalized []string + + // Check each level and categorize + for _, level := range levels { + normalizedLevel := strings.ToLower(strings.TrimSpace(level)) + + // Check if it's a SARIF level + if isSARIFLevel(normalizedLevel) { + sarifLevels = append(sarifLevels, normalizedLevel) + normalized = append(normalized, normalizedLevel) + } else if isDisplayLevel(normalizedLevel) { + displayLevels = append(displayLevels, normalizedLevel) + // Convert display level to SARIF level + sarifLevel := displayToSARIFLevel(normalizedLevel) + normalized = append(normalized, sarifLevel) + } else { + return nil, fmt.Errorf("invalid severity level '%s'. Valid SARIF levels: error, warning, note, none. Valid display levels: high, medium, low, info", level) + } + } + + // Check for mixing formats + if len(sarifLevels) > 0 && len(displayLevels) > 0 { + return nil, fmt.Errorf("cannot mix SARIF levels (error, warning, note, none) with display levels (High, Medium, Low, Info)") + } + + return normalized, nil +} + +// isSARIFLevel checks if the normalized level is a valid SARIF level +func isSARIFLevel(level string) bool { + switch level { + case "error", "warning", "note", "none": + return true + default: + return false + } +} + +// isDisplayLevel checks if the normalized level is a valid display level +func isDisplayLevel(level string) bool { + switch level { + case "high", "medium", "low", "info": + return true + default: + return false + } +} + +// displayToSARIFLevel converts a display level to its corresponding SARIF level +func displayToSARIFLevel(displayLevel string) string { + switch displayLevel { + case "high": + return "error" + case "medium": + return "warning" + case "low": + return "note" + case "info": + return "none" + default: + return displayLevel + } +} + // generateOWASPSlug creates a URL-safe slug from OWASP title text. // Converts spaces to underscores and removes non-alphanumeric characters except hyphens and underscores. func generateOWASPSlug(title string) string { diff --git a/docs/reference/README.md b/docs/reference/README.md index 95bb9a19..773e6ac2 100644 --- a/docs/reference/README.md +++ b/docs/reference/README.md @@ -11,7 +11,7 @@ This section provides detailed technical documentation for Scanio’s commands, - [List Command](cmd-list.md): Describes repository discovery functionality across supported VCS platforms, available filtering options, and command output structure. - [Fetch Command](cmd-fetch.md): Explains repository fetching logic, supported authentication types, URL formats, and command output structure. - [Analyse Command](cmd-analyse.md): Provides details on running security scanners, handling input data, configuring output formats, and command output structure. -- [SARIF Issues Command](cmd-sarif-issues.md): Explains how to create GitHub issues from high severity SARIF findings, with automated lifecycle management. +- [SARIF Issues Command](cmd-sarif-issues.md): Explains how to create GitHub issues from SARIF findings with configurable severity levels, with automated lifecycle management. - [To-HTML Command](cmd-to-html.md): Explains conversion of SARIF reports to human-friendly HTML format, code snippet inclusion, and template customization options. - [Report Patch Command](cmd-report-patch.md): Details how to make structured modifications to SARIF reports, including different filtering capabilities and actions. diff --git a/docs/reference/cmd-sarif-issues.md b/docs/reference/cmd-sarif-issues.md index c9bfc6b5..656ffc5a 100644 --- a/docs/reference/cmd-sarif-issues.md +++ b/docs/reference/cmd-sarif-issues.md @@ -1,5 +1,5 @@ # SARIF Issues Command -The `sarif-issues` command creates GitHub issues from high severity findings in SARIF reports. It implements intelligent issue correlation to avoid duplicates and automatically closes issues that are no longer present in recent scans. +The `sarif-issues` command creates GitHub issues from SARIF findings with configurable severity levels. It implements intelligent issue correlation to avoid duplicates and automatically closes issues that are no longer present in recent scans. This command is designed and recommended for CI/CD integration and automated security issue management, enabling teams to track and manage security findings directly in their GitHub repositories. @@ -8,6 +8,7 @@ This command is designed and recommended for CI/CD integration and automated sec - [Key Features](#key-features) - [Syntax](#syntax) - [Options](#options) +- [Severity Level Configuration](#severity-level-configuration) - [Core Validation](#core-validation) - [GitHub Authentication Setup](#github-authentication-setup) - [Usage Examples](#usage-examples) @@ -19,14 +20,14 @@ This command is designed and recommended for CI/CD integration and automated sec | Feature | Description | |-------------------------------------------|----------------------------------------------------------| -| Create issues from high severity findings | Automatically creates GitHub issues for SARIF findings with "error" level | +| Create issues from configurable severity levels | Automatically creates GitHub issues for SARIF findings with specified severity levels (default: "error") | | Correlate with existing issues | Matches new findings against open issues to prevent duplicates | | Auto-close resolved issues | Closes open issues that are no longer present in current scan results | | Add metadata and permalinks | Enriches issues with file links, severity, scanner info, and code snippets | ## Syntax ```bash -scanio sarif-issues --sarif PATH [--namespace NAMESPACE] [--repository REPO] [--source-folder PATH] [--ref REF] [--labels label[,label...]] [--assignees user[,user...]] +scanio sarif-issues --sarif PATH [--namespace NAMESPACE] [--repository REPO] [--source-folder PATH] [--ref REF] [--labels label[,label...]] [--assignees user[,user...]] [--levels level[,level...]] ``` ## Options @@ -40,6 +41,7 @@ scanio sarif-issues --sarif PATH [--namespace NAMESPACE] [--repository REPO] [-- | `--ref` | string | No | `$GITHUB_SHA` | Git ref (branch or commit SHA) for building permalinks to vulnerable code. | | `--labels` | strings | No | `none` | Labels to assign to created GitHub issues (comma-separated or repeat flag). | | `--assignees` | strings | No | `none` | GitHub usernames to assign to created issues (comma-separated or repeat flag). | +| `--levels` | strings | No | `["error"]` | SARIF severity levels to process. Accepts SARIF levels (error, warning, note, none) or display levels (High, Medium, Low, Info). Cannot mix formats. Case-insensitive. | | `--help`, `-h` | flag | No | `false` | Displays help for the `sarif-issues` command. | **Environment Variable Fallbacks**
@@ -50,12 +52,57 @@ The command automatically uses GitHub Actions environment variables when flags a This enables seamless integration with GitHub Actions workflows without explicit configuration. +## Severity Level Configuration + +The `--levels` flag allows you to specify which SARIF severity levels should trigger issue creation. This provides flexibility in managing different types of security findings based on your team's priorities. + +### Supported Level Formats + +**SARIF Levels** (native SARIF format): +- `error` - High severity findings (default) +- `warning` - Medium severity findings +- `note` - Low severity findings +- `none` - Informational findings + +**Display Levels** (human-readable format): +- `High` - Maps to SARIF `error` +- `Medium` - Maps to SARIF `warning` +- `Low` - Maps to SARIF `note` +- `Info` - Maps to SARIF `none` + +### Usage Rules + +- **Case-insensitive**: All level comparisons are case-insensitive +- **Format consistency**: Cannot mix SARIF and display levels in the same command +- **Multiple values**: Use comma-separated values or repeat the flag +- **Default behavior**: When `--levels` is not specified, only `error` level findings create issues + +### Examples + +```bash +# Default behavior (error level only) +scanio sarif-issues --sarif report.sarif + +# Multiple SARIF levels +scanio sarif-issues --sarif report.sarif --levels error,warning + +# Multiple display levels +scanio sarif-issues --sarif report.sarif --levels High,Medium + +# All severity levels using SARIF format +scanio sarif-issues --sarif report.sarif --levels error,warning,note,none + +# Invalid mixing (will error) +scanio sarif-issues --sarif report.sarif --levels error,High +``` + ## Core Validation The `sarif-issues` command includes several validation layers to ensure robust execution: - **Required Parameters**: Validates that `--sarif`, `--namespace`, and `--repository` are provided either via flags or environment variables. - **SARIF File Validation**: Ensures the SARIF file exists and can be parsed successfully. - **GitHub Authentication**: Requires valid GitHub credentials configured through the GitHub plugin. -- **High Severity Filtering**: Only processes SARIF results with `Level: "error"` to focus on critical findings. +- **Severity Level Validation**: Validates and normalizes severity levels, preventing mixing of SARIF and display level formats. +- **Configurable Severity Filtering**: Processes SARIF results based on specified severity levels (default: "error" only). ## GitHub Authentication Setup @@ -104,6 +151,17 @@ Create issues with source code snippets, labels, and assignees: scanio sarif-issues --sarif results/semgrep.sarif --source-folder . --labels bug,security --assignees alice,bob ``` +### Configurable Severity Levels +Create issues for multiple severity levels using SARIF levels: +```bash +scanio sarif-issues --sarif results/semgrep.sarif --levels error,warning +``` + +Create issues for multiple severity levels using display levels: +```bash +scanio sarif-issues --sarif results/semgrep.sarif --levels High,Medium +``` + ### With Custom Git Reference Create issues with specific commit reference for permalinks: ```bash @@ -129,7 +187,7 @@ The command provides some logging information including: The command implements intelligent issue correlation to manage the lifecycle of security findings: ### New Issue Creation -- **High Severity Only**: Only creates issues for SARIF findings with `Level: "error"` +- **Configurable Severity Levels**: Creates issues for SARIF findings with specified severity levels (default: "error" only) - **Duplicate Prevention**: Uses hierarchical correlation to match new findings against existing open issues - **Unmatched Findings**: Creates GitHub issues only for findings that don't match existing open issues through any correlation stage From d312ef28dd61716df0aec667bc873c2ecc9494d3 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sun, 12 Oct 2025 18:29:15 +0200 Subject: [PATCH 46/52] feat: implement dry-run mode for SARIF issues command --- cmd/sarif-issues/issue_processing.go | 35 +++++ cmd/sarif-issues/issue_processing_test.go | 175 ++++++++++++++++++++++ cmd/sarif-issues/sarif-issues.go | 13 +- docs/reference/cmd-sarif-issues.md | 88 ++++++++++- 4 files changed, 307 insertions(+), 4 deletions(-) diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go index 32d44eb1..5121e0dc 100644 --- a/cmd/sarif-issues/issue_processing.go +++ b/cmd/sarif-issues/issue_processing.go @@ -477,6 +477,24 @@ func createUnmatchedIssues(unmatchedNew []issuecorrelation.IssueMetadata, newIss continue } + if options.DryRun { + // Print dry-run information instead of making API call + fmt.Printf("[DRY RUN] Would create issue:\n") + fmt.Printf(" Title: %s\n", newTitles[idx]) + fmt.Printf(" File: %s\n", u.Filename) + fmt.Printf(" Lines: %d", u.StartLine) + if u.EndLine > u.StartLine { + fmt.Printf("-%d", u.EndLine) + } + fmt.Printf("\n") + fmt.Printf(" Severity: %s\n", u.Severity) + fmt.Printf(" Scanner: %s\n", u.Scanner) + fmt.Printf(" Rule ID: %s\n", u.RuleID) + fmt.Printf("\n") + created++ + continue + } + req := shared.VCSIssueCreationRequest{ VCSRequestBase: shared.VCSRequestBase{ RepoParam: shared.RepositoryParams{ @@ -519,6 +537,23 @@ func closeUnmatchedIssues(unmatchedKnown []issuecorrelation.IssueMetadata, optio // skip if we can't parse number continue } + + if options.DryRun { + // Print dry-run information instead of making API calls + fmt.Printf("[DRY RUN] Would close issue #%d:\n", num) + fmt.Printf(" File: %s\n", k.Filename) + fmt.Printf(" Lines: %d", k.StartLine) + if k.EndLine > k.StartLine { + fmt.Printf("-%d", k.EndLine) + } + fmt.Printf("\n") + fmt.Printf(" Rule ID: %s\n", k.RuleID) + fmt.Printf(" Reason: Not found in current scan\n") + fmt.Printf("\n") + closed++ + continue + } + // Leave a comment before closing the issue to explain why it is being closed commentReq := shared.VCSCreateIssueCommentRequest{ VCSRequestBase: shared.VCSRequestBase{ diff --git a/cmd/sarif-issues/issue_processing_test.go b/cmd/sarif-issues/issue_processing_test.go index 967ed712..85b55526 100644 --- a/cmd/sarif-issues/issue_processing_test.go +++ b/cmd/sarif-issues/issue_processing_test.go @@ -1,11 +1,14 @@ package sarifissues import ( + "bytes" + "os" "testing" "github.com/hashicorp/go-hclog" "github.com/owenrumney/go-sarif/v2/sarif" "github.com/scan-io-git/scan-io/internal/git" + issuecorrelation "github.com/scan-io-git/scan-io/pkg/issuecorrelation" ) func TestParseIssueBodyUsesMetadataRuleID(t *testing.T) { @@ -229,3 +232,175 @@ func TestFilterIssuesBySourceFolder(t *testing.T) { }) } } + +func TestCreateUnmatchedIssuesDryRun(t *testing.T) { + // Capture stdout + oldStdout := os.Stdout + r, w, _ := os.Pipe() + os.Stdout = w + + // Test data + unmatchedNew := []issuecorrelation.IssueMetadata{ + { + IssueID: "test-1", + Scanner: "Semgrep", + RuleID: "sql-injection", + Severity: "error", + Filename: "app.py", + StartLine: 11, + EndLine: 29, + SnippetHash: "abc123", + }, + { + IssueID: "test-2", + Scanner: "Snyk", + RuleID: "xss-vulnerability", + Severity: "warning", + Filename: "main.js", + StartLine: 5, + EndLine: 5, + SnippetHash: "def456", + }, + } + + newIssues := []issuecorrelation.IssueMetadata{ + unmatchedNew[0], unmatchedNew[1], + } + + newBodies := []string{ + "Test body for issue 1", + "Test body for issue 2", + } + + newTitles := []string{ + "[Semgrep][High][sql-injection] at app.py:11-29", + "[Snyk][Medium][xss-vulnerability] at main.js:5", + } + + options := RunOptions{ + DryRun: true, + } + + logger := hclog.NewNullLogger() + + // Test dry-run mode + created, err := createUnmatchedIssues(unmatchedNew, newIssues, newBodies, newTitles, options, logger) + + // Restore stdout + w.Close() + os.Stdout = oldStdout + + // Read captured output + var buf bytes.Buffer + buf.ReadFrom(r) + output := buf.String() + + // Verify results + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if created != 2 { + t.Fatalf("expected 2 created issues, got %d", created) + } + + // Check output contains expected dry-run information + expectedOutputs := []string{ + "[DRY RUN] Would create issue:", + "Title: [Semgrep][High][sql-injection] at app.py:11-29", + "File: app.py", + "Lines: 11-29", + "Severity: error", + "Scanner: Semgrep", + "Rule ID: sql-injection", + "Title: [Snyk][Medium][xss-vulnerability] at main.js:5", + "File: main.js", + "Lines: 5", + "Severity: warning", + "Scanner: Snyk", + "Rule ID: xss-vulnerability", + } + + for _, expected := range expectedOutputs { + if !bytes.Contains(buf.Bytes(), []byte(expected)) { + t.Fatalf("expected output to contain %q, got:\n%s", expected, output) + } + } +} + +func TestCloseUnmatchedIssuesDryRun(t *testing.T) { + // Capture stdout + oldStdout := os.Stdout + r, w, _ := os.Pipe() + os.Stdout = w + + // Test data + unmatchedKnown := []issuecorrelation.IssueMetadata{ + { + IssueID: "42", + Scanner: "Semgrep", + RuleID: "deprecated-rule", + Severity: "error", + Filename: "old-file.py", + StartLine: 5, + EndLine: 10, + SnippetHash: "xyz789", + }, + { + IssueID: "123", + Scanner: "Snyk", + RuleID: "old-vulnerability", + Severity: "warning", + Filename: "legacy.js", + StartLine: 15, + EndLine: 15, + SnippetHash: "abc123", + }, + } + + options := RunOptions{ + DryRun: true, + } + + logger := hclog.NewNullLogger() + + // Test dry-run mode + closed, err := closeUnmatchedIssues(unmatchedKnown, options, logger) + + // Restore stdout + w.Close() + os.Stdout = oldStdout + + // Read captured output + var buf bytes.Buffer + buf.ReadFrom(r) + output := buf.String() + + // Verify results + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if closed != 2 { + t.Fatalf("expected 2 closed issues, got %d", closed) + } + + // Check output contains expected dry-run information + expectedOutputs := []string{ + "[DRY RUN] Would close issue #42:", + "File: old-file.py", + "Lines: 5-10", + "Rule ID: deprecated-rule", + "Reason: Not found in current scan", + "[DRY RUN] Would close issue #123:", + "File: legacy.js", + "Lines: 15", + "Rule ID: old-vulnerability", + } + + for _, expected := range expectedOutputs { + if !bytes.Contains(buf.Bytes(), []byte(expected)) { + t.Fatalf("expected output to contain %q, got:\n%s", expected, output) + } + } +} diff --git a/cmd/sarif-issues/sarif-issues.go b/cmd/sarif-issues/sarif-issues.go index 7176adad..ef64df92 100644 --- a/cmd/sarif-issues/sarif-issues.go +++ b/cmd/sarif-issues/sarif-issues.go @@ -33,6 +33,7 @@ type RunOptions struct { Labels []string `json:"labels,omitempty"` Assignees []string `json:"assignees,omitempty"` Levels []string `json:"levels,omitempty"` + DryRun bool `json:"dry_run,omitempty"` } var ( @@ -65,7 +66,10 @@ var ( scanio sarif-issues --namespace scan-io-git --repository scan-io --sarif /path/to/report.sarif --ref feature-branch # Using environment variables (GitHub Actions) - GITHUB_REPOSITORY_OWNER=scan-io-git GITHUB_REPOSITORY=scan-io-git/scan-io GITHUB_SHA=abc123 scanio sarif-issues --sarif /path/to/report.sarif` + GITHUB_REPOSITORY_OWNER=scan-io-git GITHUB_REPOSITORY=scan-io-git/scan-io GITHUB_SHA=abc123 scanio sarif-issues --sarif /path/to/report.sarif + + # Preview what issues would be created/closed without making actual GitHub calls + scanio sarif-issues --sarif /path/to/report.sarif --dry-run` // SarifIssuesCmd represents the command to create GitHub issues from a SARIF file. SarifIssuesCmd = &cobra.Command{ @@ -158,7 +162,11 @@ func runSarifIssues(cmd *cobra.Command, args []string) error { // 9. Log success and handle output lg.Info("sarif-issues run completed", "created", created, "closed", closed) - fmt.Printf("Created %d issue(s); closed %d resolved issue(s)\n", created, closed) + if opts.DryRun { + fmt.Printf("[DRY RUN] Would create %d issue(s); would close %d resolved issue(s)\n", created, closed) + } else { + fmt.Printf("Created %d issue(s); closed %d resolved issue(s)\n", created, closed) + } return nil } @@ -174,6 +182,7 @@ func init() { // --assignees supports multiple usages or comma-separated values SarifIssuesCmd.Flags().StringSliceVar(&opts.Assignees, "assignees", nil, "Optional: assignees (GitHub logins) to assign to created issues (repeat flag or use comma-separated values)") SarifIssuesCmd.Flags().StringSliceVar(&opts.Levels, "levels", []string{"error"}, "SARIF severity levels to process: SARIF levels (error, warning, note, none) or display levels (High, Medium, Low, Info). Cannot mix formats. (repeat flag or use comma-separated values)") + SarifIssuesCmd.Flags().BoolVar(&opts.DryRun, "dry-run", false, "Show what issues would be created/closed without making actual GitHub API calls") SarifIssuesCmd.Flags().BoolP("help", "h", false, "Show help for sarif-issues command.") } diff --git a/docs/reference/cmd-sarif-issues.md b/docs/reference/cmd-sarif-issues.md index 656ffc5a..d57a45c5 100644 --- a/docs/reference/cmd-sarif-issues.md +++ b/docs/reference/cmd-sarif-issues.md @@ -10,6 +10,7 @@ This command is designed and recommended for CI/CD integration and automated sec - [Options](#options) - [Severity Level Configuration](#severity-level-configuration) - [Core Validation](#core-validation) +- [Dry Run Mode](#dry-run-mode) - [GitHub Authentication Setup](#github-authentication-setup) - [Usage Examples](#usage-examples) - [Command Output Format](#command-output-format) @@ -27,7 +28,7 @@ This command is designed and recommended for CI/CD integration and automated sec ## Syntax ```bash -scanio sarif-issues --sarif PATH [--namespace NAMESPACE] [--repository REPO] [--source-folder PATH] [--ref REF] [--labels label[,label...]] [--assignees user[,user...]] [--levels level[,level...]] +scanio sarif-issues --sarif PATH [--namespace NAMESPACE] [--repository REPO] [--source-folder PATH] [--ref REF] [--labels label[,label...]] [--assignees user[,user...]] [--levels level[,level...]] [--dry-run] ``` ## Options @@ -104,6 +105,49 @@ The `sarif-issues` command includes several validation layers to ensure robust e - **Severity Level Validation**: Validates and normalizes severity levels, preventing mixing of SARIF and display level formats. - **Configurable Severity Filtering**: Processes SARIF results based on specified severity levels (default: "error" only). +## Dry Run Mode + +The `--dry-run` flag allows you to preview what the command would do without making actual GitHub API calls. This is particularly useful for: + +- **Testing and Validation**: Verify the command behavior before running in production +- **Understanding Impact**: See exactly what issues would be created or closed +- **Debugging**: Troubleshoot issue correlation logic and SARIF processing +- **CI/CD Integration**: Validate SARIF files and command configuration + +### Dry Run Output Format + +When using `--dry-run`, the command provides detailed preview information: + +**For issues to be created:** +``` +[DRY RUN] Would create issue: + Title: [Semgrep][High][sql-injection] at app.py:11-29 + File: apps/demo/main.py + Lines: 11-29 + Severity: High + Scanner: Semgrep + Rule ID: sql-injection +``` + +**For issues to be closed:** +``` +[DRY RUN] Would close issue #42: + File: apps/demo/old-file.py + Lines: 5-10 + Rule ID: deprecated-rule + Reason: Not found in current scan +``` + +**Final summary:** +``` +[DRY RUN] Would create 3 issue(s); would close 1 resolved issue(s) +``` + +### Usage Example +```bash +scanio sarif-issues --sarif results/semgrep.sarif --dry-run +``` + ## GitHub Authentication Setup The `sarif-issues` command requires GitHub authentication to create and manage issues. Configure authentication using one of the following methods: @@ -168,13 +212,53 @@ Create issues with specific commit reference for permalinks: scanio sarif-issues --sarif results/semgrep.sarif --source-folder . --ref feature-branch ``` +### Dry Run Mode +Preview what issues would be created/closed without making actual GitHub API calls: +```bash +scanio sarif-issues --sarif results/semgrep.sarif --dry-run +``` + +This is useful for: +- Testing and validation before running in production +- Understanding what the command would do without making changes +- Debugging issue correlation logic +- Verifying SARIF file processing + ## Command Output Format -### User Mode Output +### Normal Mode Output ``` Created 3 issue(s); closed 1 resolved issue(s) ``` +### Dry Run Mode Output +When using `--dry-run`, the command shows detailed preview information: + +**For issues to be created:** +``` +[DRY RUN] Would create issue: + Title: [Semgrep][High][sql-injection] at app.py:11-29 + File: apps/demo/main.py + Lines: 11-29 + Severity: High + Scanner: Semgrep + Rule ID: sql-injection +``` + +**For issues to be closed:** +``` +[DRY RUN] Would close issue #42: + File: apps/demo/old-file.py + Lines: 5-10 + Rule ID: deprecated-rule + Reason: Not found in current scan +``` + +**Final summary:** +``` +[DRY RUN] Would create 3 issue(s); would close 1 resolved issue(s) +``` + ### Logging Information The command provides some logging information including: - Number of open issues fetched from the repository From e20b3abcf8b6348bce6093392d8f1a2b794f8c3d Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sun, 12 Oct 2025 18:55:27 +0200 Subject: [PATCH 47/52] docs: update cmd-sarif-issues documentation to reflect changes in metadata enrichment and required parameters validation --- docs/reference/cmd-sarif-issues.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/reference/cmd-sarif-issues.md b/docs/reference/cmd-sarif-issues.md index d57a45c5..db17a678 100644 --- a/docs/reference/cmd-sarif-issues.md +++ b/docs/reference/cmd-sarif-issues.md @@ -24,7 +24,7 @@ This command is designed and recommended for CI/CD integration and automated sec | Create issues from configurable severity levels | Automatically creates GitHub issues for SARIF findings with specified severity levels (default: "error") | | Correlate with existing issues | Matches new findings against open issues to prevent duplicates | | Auto-close resolved issues | Closes open issues that are no longer present in current scan results | -| Add metadata and permalinks | Enriches issues with file links, severity, scanner info, and code snippets | +| Add metadata and permalinks | Enriches issues with file links, severity, scanner info, and snippet hashes | ## Syntax ```bash @@ -99,7 +99,7 @@ scanio sarif-issues --sarif report.sarif --levels error,High ## Core Validation The `sarif-issues` command includes several validation layers to ensure robust execution: -- **Required Parameters**: Validates that `--sarif`, `--namespace`, and `--repository` are provided either via flags or environment variables. +- **Required Parameters**: Validates that `--sarif`, `--namespace`, and `--repository` are provided via flags, environment variables, or auto-detected git metadata. - **SARIF File Validation**: Ensures the SARIF file exists and can be parsed successfully. - **GitHub Authentication**: Requires valid GitHub credentials configured through the GitHub plugin. - **Severity Level Validation**: Validates and normalizes severity levels, preventing mixing of SARIF and display level formats. @@ -356,7 +356,7 @@ Scanio prefers the SARIF rule's short description for the heading; if that is mi **GitHub Permalink** - Direct link to vulnerable code in repository -- Uses commit SHA for permanent links +- Uses the `--ref` value when supplied (branch or SHA), falling back to the current commit hash from git metadata for stable links - Includes line number anchors: `#L42-L45` **Security References** From 75005707923f8bd6f894e4d7b6fa9835d3fce074 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sun, 12 Oct 2025 19:06:47 +0200 Subject: [PATCH 48/52] refactor: update sarifissues command to utilize a global logger and adjust function signatures for improved logging --- cmd/root.go | 2 +- cmd/sarif-issues/issue_processing.go | 10 +++++----- cmd/sarif-issues/sarif-issues.go | 11 ++++++----- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index 854093b5..1fe282ef 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -88,7 +88,7 @@ func initConfig() { fetch.Init(AppConfig, Logger.Named("fetch")) analyse.Init(AppConfig, Logger.Named("analyse")) integrationvcs.Init(AppConfig, Logger.Named("integration-vcs")) - sarifissues.Init(AppConfig) + sarifissues.Init(AppConfig, Logger.Named("sarif-issues")) version.Init(AppConfig, Logger.Named("version")) tohtml.Init(AppConfig, Logger.Named("to-html")) upload.Init(AppConfig, Logger.Named("upload")) diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go index 5121e0dc..09e31fac 100644 --- a/cmd/sarif-issues/issue_processing.go +++ b/cmd/sarif-issues/issue_processing.go @@ -131,7 +131,7 @@ func extractRuleIDFromBody(body string) string { // listOpenIssues calls the VCS plugin to list open issues for the configured repo // and parses their bodies into OpenIssueReport structures. -func listOpenIssues(options RunOptions) (map[int]OpenIssueEntry, error) { +func listOpenIssues(options RunOptions, lg hclog.Logger) (map[int]OpenIssueEntry, error) { req := shared.VCSListIssuesRequest{ VCSRequestBase: shared.VCSRequestBase{ RepoParam: shared.RepositoryParams{ @@ -144,7 +144,7 @@ func listOpenIssues(options RunOptions) (map[int]OpenIssueEntry, error) { } var issues []shared.IssueParams - err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { + err := shared.WithPlugin(AppConfig, lg, shared.PluginTypeVCS, "github", func(raw interface{}) error { vcs, ok := raw.(shared.VCS) if !ok { return fmt.Errorf("invalid VCS plugin type") @@ -509,7 +509,7 @@ func createUnmatchedIssues(unmatchedNew []issuecorrelation.IssueMetadata, newIss Assignees: options.Assignees, } - err := shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { + err := shared.WithPlugin(AppConfig, lg, shared.PluginTypeVCS, "github", func(raw interface{}) error { vcs, ok := raw.(shared.VCS) if !ok { return fmt.Errorf("invalid VCS plugin type") @@ -567,7 +567,7 @@ func closeUnmatchedIssues(unmatchedKnown []issuecorrelation.IssueMetadata, optio Body: "Recent scan didn't see the issue; closing this as resolved.", } - err = shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { + err = shared.WithPlugin(AppConfig, lg, shared.PluginTypeVCS, "github", func(raw interface{}) error { vcs, ok := raw.(shared.VCS) if !ok { return fmt.Errorf("invalid VCS plugin type") @@ -592,7 +592,7 @@ func closeUnmatchedIssues(unmatchedKnown []issuecorrelation.IssueMetadata, optio State: "closed", } - err = shared.WithPlugin(AppConfig, "plugin-vcs", shared.PluginTypeVCS, "github", func(raw interface{}) error { + err = shared.WithPlugin(AppConfig, lg, shared.PluginTypeVCS, "github", func(raw interface{}) error { vcs, ok := raw.(shared.VCS) if !ok { return fmt.Errorf("invalid VCS plugin type") diff --git a/cmd/sarif-issues/sarif-issues.go b/cmd/sarif-issues/sarif-issues.go index ef64df92..4beb4c31 100644 --- a/cmd/sarif-issues/sarif-issues.go +++ b/cmd/sarif-issues/sarif-issues.go @@ -12,7 +12,6 @@ import ( "github.com/scan-io-git/scan-io/pkg/shared" "github.com/scan-io-git/scan-io/pkg/shared/config" "github.com/scan-io-git/scan-io/pkg/shared/errors" - "github.com/scan-io-git/scan-io/pkg/shared/logger" ) // scanioManagedAnnotation is appended to issue bodies created by this command @@ -38,6 +37,7 @@ type RunOptions struct { var ( AppConfig *config.Config + cmdLogger hclog.Logger opts RunOptions // Example usage for the sarif-issues command @@ -84,8 +84,9 @@ var ( ) // Init wires config into this command. -func Init(cfg *config.Config) { +func Init(cfg *config.Config, l hclog.Logger) { AppConfig = cfg + cmdLogger = l } // runSarifIssues is the main execution function for the sarif-issues command. @@ -95,8 +96,8 @@ func runSarifIssues(cmd *cobra.Command, args []string) error { return cmd.Help() } - // 2. Initialize logger - lg := logger.NewLogger(AppConfig, "sarif-issues") + // 2. Use global logger + lg := cmdLogger // 3. Handle environment variable fallbacks ApplyEnvironmentFallbacks(&opts) @@ -146,7 +147,7 @@ func runSarifIssues(cmd *cobra.Command, args []string) error { report.EnrichResultsTitleProperty() // 7. Get all open GitHub issues - openIssues, err := listOpenIssues(opts) + openIssues, err := listOpenIssues(opts, lg) if err != nil { lg.Error("failed to list open issues", "error", err) return errors.NewCommandError(opts, nil, fmt.Errorf("failed to list open issues: %w", err), 2) From e38c4591b74fe8eb0234fda9ae8f670fecbb41b3 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sun, 12 Oct 2025 19:26:05 +0200 Subject: [PATCH 49/52] refactor: enhance SARIF result level enrichment to support multiple runs and improve severity handling --- cmd/sarif-issues/issue_processing.go | 6 +- cmd/sarif-issues/issue_processing_test.go | 4 +- internal/sarif/sarif.go | 86 +++++++++++++----- internal/sarif/sarif_test.go | 101 ++++++++++++++++++++++ 4 files changed, 171 insertions(+), 26 deletions(-) diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go index 09e31fac..524739ce 100644 --- a/cmd/sarif-issues/issue_processing.go +++ b/cmd/sarif-issues/issue_processing.go @@ -487,7 +487,11 @@ func createUnmatchedIssues(unmatchedNew []issuecorrelation.IssueMetadata, newIss fmt.Printf("-%d", u.EndLine) } fmt.Printf("\n") - fmt.Printf(" Severity: %s\n", u.Severity) + severityDisplay := displaySeverity(u.Severity) + if strings.TrimSpace(severityDisplay) == "" { + severityDisplay = u.Severity + } + fmt.Printf(" Severity: %s\n", severityDisplay) fmt.Printf(" Scanner: %s\n", u.Scanner) fmt.Printf(" Rule ID: %s\n", u.RuleID) fmt.Printf("\n") diff --git a/cmd/sarif-issues/issue_processing_test.go b/cmd/sarif-issues/issue_processing_test.go index 85b55526..fb00a214 100644 --- a/cmd/sarif-issues/issue_processing_test.go +++ b/cmd/sarif-issues/issue_processing_test.go @@ -310,13 +310,13 @@ func TestCreateUnmatchedIssuesDryRun(t *testing.T) { "Title: [Semgrep][High][sql-injection] at app.py:11-29", "File: app.py", "Lines: 11-29", - "Severity: error", + "Severity: High", "Scanner: Semgrep", "Rule ID: sql-injection", "Title: [Snyk][Medium][xss-vulnerability] at main.js:5", "File: main.js", "Lines: 5", - "Severity: warning", + "Severity: Medium", "Scanner: Snyk", "Rule ID: xss-vulnerability", } diff --git a/internal/sarif/sarif.go b/internal/sarif/sarif.go index f1958937..76530379 100644 --- a/internal/sarif/sarif.go +++ b/internal/sarif/sarif.go @@ -10,6 +10,7 @@ import ( "os" "path/filepath" "sort" + "strings" "github.com/hashicorp/go-hclog" "github.com/owenrumney/go-sarif/v2/sarif" @@ -283,33 +284,72 @@ func (r Report) EnrichResultsCodeFlowProperty(locationWebURLCallback func(artifa } } -// EnrichResultsLevelProperty function to enrich results properties with level taken from corersponding rules propertiues "problem.severity" field +// EnrichResultsLevelProperty enriches result properties with level information taken from either +// the result itself or the corresponding rule metadata. Supports multi-run SARIF reports. func (r Report) EnrichResultsLevelProperty() { - rulesMap := map[string]*sarif.ReportingDescriptor{} - for _, rule := range r.Runs[0].Tool.Driver.Rules { - rulesMap[rule.ID] = rule - } - - for _, result := range r.Runs[0].Results { - if result.Properties == nil { - result.Properties = make(map[string]interface{}) + for _, run := range r.Runs { + rulesMap := map[string]*sarif.ReportingDescriptor{} + if run.Tool.Driver != nil { + for _, rule := range run.Tool.Driver.Rules { + if rule == nil { + continue + } + rulesMap[rule.ID] = rule + } } - if rule, ok := rulesMap[*result.RuleID]; ok { - if result.Properties["Level"] == nil { - if result.Level != nil { - // used by snyk - result.Properties["Level"] = *result.Level - } else if rule.Properties["problem.severity"] != nil { - // used by codeql - result.Properties["Level"] = rule.Properties["problem.severity"] - } else if rule.DefaultConfiguration != nil { - // used by all tools? - result.Properties["Level"] = rule.DefaultConfiguration.Level - } else { - // just a fallback, should never happen - result.Properties["Level"] = "unknown" + + for _, result := range run.Results { + if result == nil { + continue + } + + if result.Properties == nil { + result.Properties = make(map[string]interface{}) + } + + if result.Properties["Level"] != nil { + continue + } + + // Prefer explicit level on the result when available. + if result.Level != nil { + if lvl := strings.TrimSpace(*result.Level); lvl != "" { + result.Properties["Level"] = lvl + continue + } + } + + var ruleDescriptor *sarif.ReportingDescriptor + if result.RuleID != nil { + if rule, ok := rulesMap[*result.RuleID]; ok { + ruleDescriptor = rule + } + } + + if ruleDescriptor != nil && ruleDescriptor.Properties != nil { + if level, ok := ruleDescriptor.Properties["problem.severity"]; ok { + if str, ok := level.(string); ok { + if trimmed := strings.TrimSpace(str); trimmed != "" { + result.Properties["Level"] = trimmed + continue + } + } else if level != nil { + // Preserve non-string values (legacy behaviour) if provided. + result.Properties["Level"] = level + continue + } } } + + if ruleDescriptor != nil && ruleDescriptor.DefaultConfiguration != nil { + if lvl := strings.TrimSpace(ruleDescriptor.DefaultConfiguration.Level); lvl != "" { + result.Properties["Level"] = lvl + continue + } + } + + // Fallback when no metadata provides a level. + result.Properties["Level"] = "unknown" } } } diff --git a/internal/sarif/sarif_test.go b/internal/sarif/sarif_test.go index 76871111..21febd6c 100644 --- a/internal/sarif/sarif_test.go +++ b/internal/sarif/sarif_test.go @@ -52,3 +52,104 @@ func TestEnrichResultsLevelPropertyInitialisesResultProperties(t *testing.T) { t.Fatalf("expected Level property to be %q, got %v", "warning", level) } } + +func TestEnrichResultsLevelPropertyHandlesMultipleRuns(t *testing.T) { + ruleIDOne := "RULE-ONE" + ruleIDTwo := "RULE-TWO" + resultLevel := "note" + + runOneRule := gosarif.NewRule(ruleIDOne).WithProperties(gosarif.Properties{ + "problem.severity": "warning", + }) + runTwoRule := gosarif.NewRule(ruleIDTwo) + + runOneResult := &gosarif.Result{ + RuleID: &ruleIDOne, + } + runTwoResult := &gosarif.Result{ + RuleID: &ruleIDTwo, + Level: &resultLevel, + } + + report := Report{ + Report: &gosarif.Report{ + Version: string(gosarif.Version210), + Runs: []*gosarif.Run{ + { + Tool: gosarif.Tool{ + Driver: &gosarif.ToolComponent{ + Name: "ToolOne", + Rules: []*gosarif.ReportingDescriptor{runOneRule}, + }, + }, + Results: []*gosarif.Result{runOneResult}, + }, + { + Tool: gosarif.Tool{ + Driver: &gosarif.ToolComponent{ + Name: "ToolTwo", + Rules: []*gosarif.ReportingDescriptor{runTwoRule}, + }, + }, + Results: []*gosarif.Result{runTwoResult}, + }, + }, + }, + } + + report.EnrichResultsLevelProperty() + + if runOneResult.Properties == nil { + t.Fatalf("expected runOneResult properties to be initialised") + } + if lvl := runOneResult.Properties["Level"]; lvl != "warning" { + t.Fatalf("expected runOneResult level to be %q, got %v", "warning", lvl) + } + + if runTwoResult.Properties == nil { + t.Fatalf("expected runTwoResult properties to be initialised") + } + if lvl := runTwoResult.Properties["Level"]; lvl != "note" { + t.Fatalf("expected runTwoResult level to be %q, got %v", "note", lvl) + } +} + +func TestEnrichResultsLevelPropertyUsesDefaultConfigurationLevel(t *testing.T) { + ruleID := "RULE-DEFAULT" + rule := gosarif.NewRule(ruleID) + rule.DefaultConfiguration = gosarif.NewReportingConfiguration().WithLevel("error") + + result := &gosarif.Result{ + RuleID: &ruleID, + } + + report := Report{ + Report: &gosarif.Report{ + Version: string(gosarif.Version210), + Runs: []*gosarif.Run{ + { + Tool: gosarif.Tool{ + Driver: &gosarif.ToolComponent{ + Name: "Tool", + Rules: []*gosarif.ReportingDescriptor{rule}, + }, + }, + Results: []*gosarif.Result{result}, + }, + }, + }, + } + + report.EnrichResultsLevelProperty() + + if result.Properties == nil { + t.Fatalf("expected result properties to be initialised") + } + level, ok := result.Properties["Level"].(string) + if !ok { + t.Fatalf("expected Level property to be a string, got %T", result.Properties["Level"]) + } + if level != "error" { + t.Fatalf("expected Level property to be %q, got %q", "error", level) + } +} From f26af7b8bb68ae88351e194bb7d0e8f9b63df5f2 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sun, 12 Oct 2025 19:41:25 +0200 Subject: [PATCH 50/52] chore: remove outdated SARIF issues path analysis documentation --- .../engineering/sarif-issues-path-analysis.md | 160 ------------------ 1 file changed, 160 deletions(-) delete mode 100644 docs/engineering/sarif-issues-path-analysis.md diff --git a/docs/engineering/sarif-issues-path-analysis.md b/docs/engineering/sarif-issues-path-analysis.md deleted file mode 100644 index 534eb11b..00000000 --- a/docs/engineering/sarif-issues-path-analysis.md +++ /dev/null @@ -1,160 +0,0 @@ -# SARIF Issues Path Handling Analysis - -## Reproduction Context -- Command sequence: - 1. `scanio analyse --scanner semgrep /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo --format sarif --output outside-project.sarif` - 2. `scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif from-subfolder.sarif --source-folder /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo` -- Expected permalink: `.../blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py#L11-L29` -- Actual permalink (incorrect): `.../blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/main.py#L11-L29` - -## Key Observations -- `data/outside-project.sarif` contains absolute URIs such as `/home/.../scanio-test/apps/demo/main.py`. -- `data/from-subfolder.sarif` contains relative URIs (`main.py`) because Semgrep ran from the subfolder. -- In both cases the SARIF report points to the file under `apps/demo/main.py`, yet the CLI emits `main.py` in issue bodies and permalinks. - -## Code Flow Review -- `cmd/sarif-issues/issue_processing.go` calls `extractFileURIFromResult` to determine the file path recorded in `NewIssueData` (`buildNewIssuesFromSARIF`, line references around `fileURI` usage). -- `extractFileURIFromResult` (`cmd/sarif-issues/utils.go:173-212`) trims the `--source-folder` prefix from absolute URIs and returns the remainder; for relative URIs it simply returns the raw value. - - When `--source-folder` is `/.../scanio-test/apps/demo`, absolute URIs reduce to `main.py`, losing the repository subpath. -- `buildGitHubPermalink` (`utils.go:125-170`) expects `fileURI` to be repository-relative when constructing `https://github.com/{namespace}/{repo}/blob/{ref}/{fileURI}#L...`. -- `computeSnippetHash` (`utils.go:104-121`) relies on joining `sourceFolder` with the same `fileURI` to re-read the local file. If we change `fileURI` to be repo-relative (`apps/demo/main.py`), the current join logic will point at `/.../apps/demo/apps/demo/main.py` and fail. -- `internal/sarif.Report.EnrichResultsLocationProperty` and `EnrichResultsLocationURIProperty` perform similar prefix stripping using `sourceFolder`, so the HTML report path logic (`cmd/to-html.go`) inherits the same limitation. -- `internal/git.CollectRepositoryMetadata` already derives `RepoRootFolder` and the `Subfolder` path segment when `--source-folder` is nested within the repo. - -## Root Cause -The CLI assumes `--source-folder` equals the repository root. When the user points it to a subdirectory, the helper trims that prefix and drops intermediate path segments. Consequently: -- Issue metadata (`File` field) loses the directory context. -- GitHub permalinks omit the subfolder and land on the wrong file. -- Correlation metadata (`Metadata.Filename`) no longer matches the path stored in GitHub issues, risking mismatches if/when we fix the permalink logic without updating correlation. - -## Fix Considerations -1. **Determine repository root & subfolder once.** `internal/git.CollectRepositoryMetadata` gives us both `RepoRootFolder` and `Subfolder` for any path inside the repo. Reusing this keeps CLI logic consistent with the HTML report command. -2. **Produce dual path representations.** - - Repo-relative path (e.g. `apps/demo/main.py`) for GitHub URLs and issue bodies. - - Source-folder-relative path (e.g. `main.py`) or absolute path for reading files/snippet hashing. -3. **Avoid regressions in existing flows.** After changing `fileURI`, ensure: - - `computeSnippetHash` receives the correct on-disk path. - - Issue correlation (`Metadata.Filename`) uses the same representation that is stored in GitHub issue bodies to preserve matching. -4. **Consider harmonising SARIF helpers.** Updating `internal/sarif` enrichment to use repo metadata would fix both CLI commands (`sarif-issues`, `to-html`) and reduce duplicated path trimming logic. - -## Proposed Fix Plan -1. Enhance the `sarif-issues` command to collect repository metadata: - - Call `git.CollectRepositoryMetadata(opts.SourceFolder)` early (guard for errors). - - Derive helper closures that can translate between repo-relative and local paths. -2. Update `extractFileURIFromResult` (or an adjacent helper) to: - - Resolve the SARIF URI to an absolute path (using `uriBaseId` and `sourceFolder` when necessary). - - Emit the repo-relative path (using metadata.RepoRootFolder) for issue content and permalinks. - - Return both repo-relative and local paths, or store them in a small struct to avoid repeated conversions. -3. Adjust `computeSnippetHash` and correlation metadata to consume the correct local path while storing repo-relative filenames in issue metadata. -4. Reuse the new path helper in `buildGitHubPermalink` so the permalink path stays in sync. -5. Add regression tests: - - Extend `cmd/sarif-issues/utils_test.go` (or introduce new tests) covering absolute and relative SARIF URIs when `sourceFolder` points to a subdirectory. - - Include permalink assertions using `data/from-subfolder.sarif` / `data/outside-project.sarif`. -6. Evaluate whether `internal/sarif`’s enrichment should adopt the same metadata-aware logic; if so, share the helper to keep `to-html` and future commands consistent. - -# Manual testing -## Scans from root, subfolder, outside, with abs and relative paths -### Semgrep scan of subfolder (monorepo like use case) -```sh -# 1. Outside folder absolute paths -cd /home/jekos/ghq/github.com/scan-io-git/scan-io -scanio analyse --scanner semgrep /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/outside-project-abs.sarif -scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif data/outside-project-abs.sarif --source-folder /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo -# validate here: 2 issues with correct permalinks -# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py - -# 2. Outside folder relative paths -cd /home/jekos/ghq/github.com/scan-io-git/scan-io -scanio analyse --scanner semgrep ../scanio-test/apps/demo --format sarif --output data/outside-project-rel.sarif -scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif data/outside-project-rel.sarif --source-folder ../scanio-test/apps/demo -# validate here: 2 issues with correct permalinks -# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py - -# 3. From root absolute path -cd /home/jekos/ghq/github.com/scan-io-git/scanio-test -scanio analyse --scanner semgrep /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-root-asb.sarif -scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-root-asb.sarif --source-folder /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo -# validate here: 2 issues with correct permalinks -# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py - -# 4. From root relative paths -cd /home/jekos/ghq/github.com/scan-io-git/scanio-test -scanio analyse --scanner semgrep apps/demo --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-root-rel.sarif -scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-root-rel.sarif --source-folder apps/demo -# validate here: 2 issues with correct permalinks -# correct https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py -# correct even when .git folder is not there - -# 5. From subfolder absolute paths -cd /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo -scanio analyse --scanner semgrep /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-subfolder-abs.sarif -scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-subfolder-abs.sarif --source-folder /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo -# validate here: 2 issues with correct permalinks -# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py - -# 6. From subfolder relative paths -cd /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo -scanio analyse --scanner semgrep . --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-subfolder-rel.sarif -scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-subfolder-rel.sarif --source-folder . -# validate here: 2 issues with correct permalinks -# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py -``` -### snyk -```sh -cd /home/jekos/ghq/github.com/scan-io-git/scanio-test - -# 1. scan root -snyk code test --sarif-file-output=/home/jekos/ghq/github.com/scan-io-git/scan-io/data/snyk-root.sarif -scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/snyk-root.sarif --source-folder . - -# 2. scan subfolder from root -snyk code test --sarif-file-output=/home/jekos/ghq/github.com/scan-io-git/scan-io/data/snyk-subfolder-from-root.sarif apps/demo -scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/snyk-subfolder-from-root.sarif --source-folder apps/demo -``` -### codeql -```sh -cd /home/jekos/ghq/github.com/scan-io-git/scanio-test - -# 1. scan root -/tmp/codeql/codeql database create /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-database-root --language=python --source-root=. -/tmp/codeql/codeql database analyze /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-database-root --format=sarif-latest --output=/home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-root.sarif -scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-root.sarif --source-folder . - -# 1. scan subfolder -/tmp/codeql/codeql database create /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-database-subfolder --language=python --source-root=apps/demo -/tmp/codeql/codeql database analyze /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-database-subfolder --format=sarif-latest --output=/home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-subfolder.sarif -scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-subfolder.sarif --source-folder apps/demo -``` -## How to handle 2 subfolders with 2 separate scans -```sh -cd /home/jekos/ghq/github.com/scan-io-git/scanio-test - -# scan projects -scanio analyse --scanner semgrep apps/demo --format sarif --output semgrep-demo.sarif -snyk code test --sarif-file-output=snyk-another.sarif apps/another - -# create issues -scanio sarif-issues --sarif semgrep-demo.sarif --source-folder apps/demo -scanio sarif-issues --sarif snyk-another.sarif --source-folder apps/another -``` - -**Solution Implemented**: The `sarif-issues` command now filters open issues by source folder scope before correlation. Issues are scoped based on their file path metadata matching the normalized subfolder path. This enables independent issue management for different subfolders in monorepo CI workflows. - -**Key Changes**: -- Added `filterIssuesBySourceFolder()` function that filters open issues to only those within the current `--source-folder` scope -- Issues are filtered before correlation, ensuring each subfolder's issues are managed independently -- When `--source-folder` points to a subfolder, only issues whose file paths start with that subfolder are considered -- When scanning from root (no subfolder), all issues are included as before - -**Expected Behavior**: Both sets of issues remain open and are managed independently. Issues from `apps/demo` won't be closed when running the second command for `apps/another`. - -## empty source-folder test -```sh -cd /home/jekos/ghq/github.com/scan-io-git/scanio-test - -snyk code test --sarif-file-output=snyk.sarif - -# create issues -scanio sarif-issues --sarif snyk.sarif --source-folder . -scanio sarif-issues --sarif snyk.sarif -``` \ No newline at end of file From 2a56acbc95793a9130ac094eb3b8a8e0d8ae5599 Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Wed, 22 Oct 2025 19:03:11 +0200 Subject: [PATCH 51/52] feat: add body filtering capability to list issues command on plugin side - to filter relevant issues by bofy-filter, and don't send not relevant items over RPC - Introduced `BodyFilter` parameter in `VCSListIssuesRequest` to filter issues based on body content. - Implemented `filterIssuesByBody` function to handle substring matching for issue bodies. - Updated `ListIssues` method to apply the body filter if provided, enhancing issue retrieval based on specific criteria. - Added unit tests for body filtering functionality to ensure correct behavior. --- .cursorrules | 1 + cmd/sarif-issues/issue_processing.go | 3 +- .../engineering/sarif-issues-path-analysis.md | 160 ++++++++++++++++++ pkg/shared/ivcs.go | 3 +- plugins/github/github.go | 67 +++++--- plugins/github/github_test.go | 95 +++++++++++ 6 files changed, 304 insertions(+), 25 deletions(-) create mode 100644 docs/engineering/sarif-issues-path-analysis.md create mode 100644 plugins/github/github_test.go diff --git a/.cursorrules b/.cursorrules index 70ccd441..06ca8bc1 100644 --- a/.cursorrules +++ b/.cursorrules @@ -5,6 +5,7 @@ - When in doubt about implementation details, always refer to the engineering documentation first, then examine similar existing implementations in the codebase. - Try to reuse internal packages if relevant. Extend if required functionality does not exist. - Don't use `data` folder in tests, it will not be available in other environment. But feel free to read content to make proper mocks. +- Try to use `make build-cli` or `make build-plugins` or `make build` instead of `go build ...` ## Commands - Build cli with: `make build-cli` diff --git a/cmd/sarif-issues/issue_processing.go b/cmd/sarif-issues/issue_processing.go index 524739ce..e601fc2d 100644 --- a/cmd/sarif-issues/issue_processing.go +++ b/cmd/sarif-issues/issue_processing.go @@ -140,7 +140,8 @@ func listOpenIssues(options RunOptions, lg hclog.Logger) (map[int]OpenIssueEntry }, Action: "listIssues", }, - State: "open", + State: "open", + BodyFilter: scanioManagedAnnotation, // Filter for scanio-managed issues } var issues []shared.IssueParams diff --git a/docs/engineering/sarif-issues-path-analysis.md b/docs/engineering/sarif-issues-path-analysis.md new file mode 100644 index 00000000..534eb11b --- /dev/null +++ b/docs/engineering/sarif-issues-path-analysis.md @@ -0,0 +1,160 @@ +# SARIF Issues Path Handling Analysis + +## Reproduction Context +- Command sequence: + 1. `scanio analyse --scanner semgrep /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo --format sarif --output outside-project.sarif` + 2. `scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif from-subfolder.sarif --source-folder /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo` +- Expected permalink: `.../blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py#L11-L29` +- Actual permalink (incorrect): `.../blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/main.py#L11-L29` + +## Key Observations +- `data/outside-project.sarif` contains absolute URIs such as `/home/.../scanio-test/apps/demo/main.py`. +- `data/from-subfolder.sarif` contains relative URIs (`main.py`) because Semgrep ran from the subfolder. +- In both cases the SARIF report points to the file under `apps/demo/main.py`, yet the CLI emits `main.py` in issue bodies and permalinks. + +## Code Flow Review +- `cmd/sarif-issues/issue_processing.go` calls `extractFileURIFromResult` to determine the file path recorded in `NewIssueData` (`buildNewIssuesFromSARIF`, line references around `fileURI` usage). +- `extractFileURIFromResult` (`cmd/sarif-issues/utils.go:173-212`) trims the `--source-folder` prefix from absolute URIs and returns the remainder; for relative URIs it simply returns the raw value. + - When `--source-folder` is `/.../scanio-test/apps/demo`, absolute URIs reduce to `main.py`, losing the repository subpath. +- `buildGitHubPermalink` (`utils.go:125-170`) expects `fileURI` to be repository-relative when constructing `https://github.com/{namespace}/{repo}/blob/{ref}/{fileURI}#L...`. +- `computeSnippetHash` (`utils.go:104-121`) relies on joining `sourceFolder` with the same `fileURI` to re-read the local file. If we change `fileURI` to be repo-relative (`apps/demo/main.py`), the current join logic will point at `/.../apps/demo/apps/demo/main.py` and fail. +- `internal/sarif.Report.EnrichResultsLocationProperty` and `EnrichResultsLocationURIProperty` perform similar prefix stripping using `sourceFolder`, so the HTML report path logic (`cmd/to-html.go`) inherits the same limitation. +- `internal/git.CollectRepositoryMetadata` already derives `RepoRootFolder` and the `Subfolder` path segment when `--source-folder` is nested within the repo. + +## Root Cause +The CLI assumes `--source-folder` equals the repository root. When the user points it to a subdirectory, the helper trims that prefix and drops intermediate path segments. Consequently: +- Issue metadata (`File` field) loses the directory context. +- GitHub permalinks omit the subfolder and land on the wrong file. +- Correlation metadata (`Metadata.Filename`) no longer matches the path stored in GitHub issues, risking mismatches if/when we fix the permalink logic without updating correlation. + +## Fix Considerations +1. **Determine repository root & subfolder once.** `internal/git.CollectRepositoryMetadata` gives us both `RepoRootFolder` and `Subfolder` for any path inside the repo. Reusing this keeps CLI logic consistent with the HTML report command. +2. **Produce dual path representations.** + - Repo-relative path (e.g. `apps/demo/main.py`) for GitHub URLs and issue bodies. + - Source-folder-relative path (e.g. `main.py`) or absolute path for reading files/snippet hashing. +3. **Avoid regressions in existing flows.** After changing `fileURI`, ensure: + - `computeSnippetHash` receives the correct on-disk path. + - Issue correlation (`Metadata.Filename`) uses the same representation that is stored in GitHub issue bodies to preserve matching. +4. **Consider harmonising SARIF helpers.** Updating `internal/sarif` enrichment to use repo metadata would fix both CLI commands (`sarif-issues`, `to-html`) and reduce duplicated path trimming logic. + +## Proposed Fix Plan +1. Enhance the `sarif-issues` command to collect repository metadata: + - Call `git.CollectRepositoryMetadata(opts.SourceFolder)` early (guard for errors). + - Derive helper closures that can translate between repo-relative and local paths. +2. Update `extractFileURIFromResult` (or an adjacent helper) to: + - Resolve the SARIF URI to an absolute path (using `uriBaseId` and `sourceFolder` when necessary). + - Emit the repo-relative path (using metadata.RepoRootFolder) for issue content and permalinks. + - Return both repo-relative and local paths, or store them in a small struct to avoid repeated conversions. +3. Adjust `computeSnippetHash` and correlation metadata to consume the correct local path while storing repo-relative filenames in issue metadata. +4. Reuse the new path helper in `buildGitHubPermalink` so the permalink path stays in sync. +5. Add regression tests: + - Extend `cmd/sarif-issues/utils_test.go` (or introduce new tests) covering absolute and relative SARIF URIs when `sourceFolder` points to a subdirectory. + - Include permalink assertions using `data/from-subfolder.sarif` / `data/outside-project.sarif`. +6. Evaluate whether `internal/sarif`’s enrichment should adopt the same metadata-aware logic; if so, share the helper to keep `to-html` and future commands consistent. + +# Manual testing +## Scans from root, subfolder, outside, with abs and relative paths +### Semgrep scan of subfolder (monorepo like use case) +```sh +# 1. Outside folder absolute paths +cd /home/jekos/ghq/github.com/scan-io-git/scan-io +scanio analyse --scanner semgrep /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/outside-project-abs.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif data/outside-project-abs.sarif --source-folder /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo +# validate here: 2 issues with correct permalinks +# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py + +# 2. Outside folder relative paths +cd /home/jekos/ghq/github.com/scan-io-git/scan-io +scanio analyse --scanner semgrep ../scanio-test/apps/demo --format sarif --output data/outside-project-rel.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif data/outside-project-rel.sarif --source-folder ../scanio-test/apps/demo +# validate here: 2 issues with correct permalinks +# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py + +# 3. From root absolute path +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test +scanio analyse --scanner semgrep /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-root-asb.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-root-asb.sarif --source-folder /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo +# validate here: 2 issues with correct permalinks +# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py + +# 4. From root relative paths +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test +scanio analyse --scanner semgrep apps/demo --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-root-rel.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-root-rel.sarif --source-folder apps/demo +# validate here: 2 issues with correct permalinks +# correct https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py +# correct even when .git folder is not there + +# 5. From subfolder absolute paths +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo +scanio analyse --scanner semgrep /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-subfolder-abs.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-subfolder-abs.sarif --source-folder /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo +# validate here: 2 issues with correct permalinks +# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py + +# 6. From subfolder relative paths +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test/apps/demo +scanio analyse --scanner semgrep . --format sarif --output /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-subfolder-rel.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/from-subfolder-rel.sarif --source-folder . +# validate here: 2 issues with correct permalinks +# correct: https://github.com/scan-io-git/scanio-test/blob/aec0b795c350ff53fe9ab01adf862408aa34c3fd/apps/demo/main.py +``` +### snyk +```sh +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test + +# 1. scan root +snyk code test --sarif-file-output=/home/jekos/ghq/github.com/scan-io-git/scan-io/data/snyk-root.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/snyk-root.sarif --source-folder . + +# 2. scan subfolder from root +snyk code test --sarif-file-output=/home/jekos/ghq/github.com/scan-io-git/scan-io/data/snyk-subfolder-from-root.sarif apps/demo +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/snyk-subfolder-from-root.sarif --source-folder apps/demo +``` +### codeql +```sh +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test + +# 1. scan root +/tmp/codeql/codeql database create /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-database-root --language=python --source-root=. +/tmp/codeql/codeql database analyze /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-database-root --format=sarif-latest --output=/home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-root.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-root.sarif --source-folder . + +# 1. scan subfolder +/tmp/codeql/codeql database create /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-database-subfolder --language=python --source-root=apps/demo +/tmp/codeql/codeql database analyze /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-database-subfolder --format=sarif-latest --output=/home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-subfolder.sarif +scanio sarif-issues --namespace scan-io-git --repository scanio-test --ref aec0b795c350ff53fe9ab01adf862408aa34c3fd --sarif /home/jekos/ghq/github.com/scan-io-git/scan-io/data/codeql-subfolder.sarif --source-folder apps/demo +``` +## How to handle 2 subfolders with 2 separate scans +```sh +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test + +# scan projects +scanio analyse --scanner semgrep apps/demo --format sarif --output semgrep-demo.sarif +snyk code test --sarif-file-output=snyk-another.sarif apps/another + +# create issues +scanio sarif-issues --sarif semgrep-demo.sarif --source-folder apps/demo +scanio sarif-issues --sarif snyk-another.sarif --source-folder apps/another +``` + +**Solution Implemented**: The `sarif-issues` command now filters open issues by source folder scope before correlation. Issues are scoped based on their file path metadata matching the normalized subfolder path. This enables independent issue management for different subfolders in monorepo CI workflows. + +**Key Changes**: +- Added `filterIssuesBySourceFolder()` function that filters open issues to only those within the current `--source-folder` scope +- Issues are filtered before correlation, ensuring each subfolder's issues are managed independently +- When `--source-folder` points to a subfolder, only issues whose file paths start with that subfolder are considered +- When scanning from root (no subfolder), all issues are included as before + +**Expected Behavior**: Both sets of issues remain open and are managed independently. Issues from `apps/demo` won't be closed when running the second command for `apps/another`. + +## empty source-folder test +```sh +cd /home/jekos/ghq/github.com/scan-io-git/scanio-test + +snyk code test --sarif-file-output=snyk.sarif + +# create issues +scanio sarif-issues --sarif snyk.sarif --source-folder . +scanio sarif-issues --sarif snyk.sarif +``` \ No newline at end of file diff --git a/pkg/shared/ivcs.go b/pkg/shared/ivcs.go index c3ddbcba..2f299bfd 100644 --- a/pkg/shared/ivcs.go +++ b/pkg/shared/ivcs.go @@ -148,7 +148,8 @@ type VCSAddCommentToPRRequest struct { // VCSListIssuesRequest represents a request to list issues in a repository. type VCSListIssuesRequest struct { VCSRequestBase - State string `json:"state"` // open, closed, all; default open + State string `json:"state"` // open, closed, all; default open + BodyFilter string `json:"body_filter"` // optional: filter issues by body content (substring match) } // ListFuncResult holds the result of a list function. diff --git a/plugins/github/github.go b/plugins/github/github.go index bac6c8f4..3a2fea6a 100644 --- a/plugins/github/github.go +++ b/plugins/github/github.go @@ -41,29 +41,29 @@ type VCSGithub struct { // CreateIssueComment creates a new comment on an existing GitHub issue. func (g *VCSGithub) CreateIssueComment(args shared.VCSCreateIssueCommentRequest) (bool, error) { - // Basic validation - if strings.TrimSpace(args.RepoParam.Namespace) == "" || strings.TrimSpace(args.RepoParam.Repository) == "" { - return false, fmt.Errorf("namespace and repository are required") - } - if args.Number <= 0 { - return false, fmt.Errorf("valid issue number is required") - } - if strings.TrimSpace(args.Body) == "" { - return false, fmt.Errorf("comment body is required") - } - - client, err := g.initializeGithubClient() - if err != nil { - return false, fmt.Errorf("failed to initialize GitHub client: %w", err) - } - - comment := &github.IssueComment{Body: github.String(args.Body)} - _, _, err = client.Issues.CreateComment(context.Background(), args.RepoParam.Namespace, args.RepoParam.Repository, args.Number, comment) - if err != nil { - return false, fmt.Errorf("failed to create issue comment: %w", err) - } - - return true, nil + // Basic validation + if strings.TrimSpace(args.RepoParam.Namespace) == "" || strings.TrimSpace(args.RepoParam.Repository) == "" { + return false, fmt.Errorf("namespace and repository are required") + } + if args.Number <= 0 { + return false, fmt.Errorf("valid issue number is required") + } + if strings.TrimSpace(args.Body) == "" { + return false, fmt.Errorf("comment body is required") + } + + client, err := g.initializeGithubClient() + if err != nil { + return false, fmt.Errorf("failed to initialize GitHub client: %w", err) + } + + comment := &github.IssueComment{Body: github.String(args.Body)} + _, _, err = client.Issues.CreateComment(context.Background(), args.RepoParam.Namespace, args.RepoParam.Repository, args.Number, comment) + if err != nil { + return false, fmt.Errorf("failed to create issue comment: %w", err) + } + + return true, nil } // UpdateIssue updates an existing GitHub issue's title and/or body. @@ -594,9 +594,30 @@ func (g *VCSGithub) ListIssues(args shared.VCSListIssuesRequest) ([]shared.Issue result = append(result, convertToIssueParams(it)) } + // Apply body filter if provided + if args.BodyFilter != "" { + result = filterIssuesByBody(result, args.BodyFilter) + } + return result, nil } +// filterIssuesByBody filters a slice of issues by body content using substring matching. +// Returns only issues whose body contains the specified filter text. +func filterIssuesByBody(issues []shared.IssueParams, bodyFilter string) []shared.IssueParams { + if bodyFilter == "" { + return issues + } + + var filtered []shared.IssueParams + for _, issue := range issues { + if strings.Contains(issue.Body, bodyFilter) { + filtered = append(filtered, issue) + } + } + return filtered +} + // Setup initializes the global configuration for the VCSGithub instance. func (g *VCSGithub) Setup(configData config.Config) (bool, error) { g.setGlobalConfig(&configData) diff --git a/plugins/github/github_test.go b/plugins/github/github_test.go new file mode 100644 index 00000000..842ac2da --- /dev/null +++ b/plugins/github/github_test.go @@ -0,0 +1,95 @@ +package main + +import ( + "strings" + "testing" + + "github.com/scan-io-git/scan-io/pkg/shared" +) + +func TestFilterIssuesByBody(t *testing.T) { + tests := []struct { + name string + bodyFilter string + issues []shared.IssueParams + expected int + }{ + { + name: "no filter - return all issues", + bodyFilter: "", + issues: []shared.IssueParams{ + {Number: 1, Body: "Regular issue"}, + {Number: 2, Body: "Scanio managed issue\n> [!NOTE]\n> This issue was created and will be managed by scanio automation"}, + {Number: 3, Body: "Another regular issue"}, + }, + expected: 3, + }, + { + name: "filter for scanio managed issues", + bodyFilter: "> [!NOTE]\n> This issue was created and will be managed by scanio automation", + issues: []shared.IssueParams{ + {Number: 1, Body: "Regular issue"}, + {Number: 2, Body: "Scanio managed issue\n> [!NOTE]\n> This issue was created and will be managed by scanio automation"}, + {Number: 3, Body: "Another regular issue"}, + }, + expected: 1, + }, + { + name: "filter with partial match", + bodyFilter: "scanio automation", + issues: []shared.IssueParams{ + {Number: 1, Body: "Regular issue"}, + {Number: 2, Body: "Scanio managed issue\n> [!NOTE]\n> This issue was created and will be managed by scanio automation"}, + {Number: 3, Body: "Another regular issue"}, + }, + expected: 1, + }, + { + name: "filter with no matches", + bodyFilter: "nonexistent text", + issues: []shared.IssueParams{ + {Number: 1, Body: "Regular issue"}, + {Number: 2, Body: "Another regular issue"}, + }, + expected: 0, + }, + { + name: "filter with multiple matches", + bodyFilter: "issue", + issues: []shared.IssueParams{ + {Number: 1, Body: "Regular issue"}, + {Number: 2, Body: "Another issue"}, + {Number: 3, Body: "Not a problem"}, + }, + expected: 2, + }, + { + name: "case sensitive filtering", + bodyFilter: "Issue", + issues: []shared.IssueParams{ + {Number: 1, Body: "Regular issue"}, + {Number: 2, Body: "Another Issue"}, + }, + expected: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := filterIssuesByBody(tt.issues, tt.bodyFilter) + + if len(result) != tt.expected { + t.Errorf("expected %d issues, got %d", tt.expected, len(result)) + } + + // Verify that the correct issues are returned + if tt.bodyFilter != "" { + for _, issue := range result { + if !strings.Contains(issue.Body, tt.bodyFilter) { + t.Errorf("issue %d does not contain filter text: %s", issue.Number, tt.bodyFilter) + } + } + } + }) + } +} From 453c488dca631f3d506902b0013ce668f53b850d Mon Sep 17 00:00:00 2001 From: Evgenii Protsenko Date: Sat, 15 Nov 2025 18:23:16 +0100 Subject: [PATCH 52/52] docs: simplify CreateIssue function documentation --- plugins/github/github.go | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/plugins/github/github.go b/plugins/github/github.go index 3a2fea6a..1d575d81 100644 --- a/plugins/github/github.go +++ b/plugins/github/github.go @@ -500,22 +500,6 @@ func (g *VCSGithub) Fetch(args shared.VCSFetchRequest) (shared.VCSFetchResponse, // CreateIssue creates a new GitHub issue using the provided request. // -// Parameters: -// -// args - VCSIssueCreationRequest containing repository details and issue content -// -// Examples: -// - Create an issue: -// req := shared.VCSIssueCreationRequest{ -// RepoParam: shared.RepositoryParams{ -// Namespace: "octocat", -// Repository: "hello-world", -// }, -// Title: "New Feature Request", -// Body: "Please add support for...", -// } -// issueNumber, err := githubClient.CreateIssue(req) -// // Returns: // - The number of the created issue // - An error if the issue creation fails