diff --git a/cmd/nightshift/commands/semanticdiff.go b/cmd/nightshift/commands/semanticdiff.go new file mode 100644 index 0000000..1f3f259 --- /dev/null +++ b/cmd/nightshift/commands/semanticdiff.go @@ -0,0 +1,144 @@ +package commands + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "time" + + "github.com/spf13/cobra" + + "github.com/marcus/nightshift/internal/analysis/semanticdiff" + "github.com/marcus/nightshift/internal/logging" +) + +var semanticDiffCmd = &cobra.Command{ + Use: "semantic-diff [path]", + Short: "Explain the semantic meaning of code changes", + Long: `Analyze git changes between two refs and classify each change by semantic +category (feature, bugfix, refactor, dependency update, config, test, docs, cleanup). + +Produces a structured report grouping changes by category with impact highlights +for API surface, schema, and security-sensitive files. + +Examples: + nightshift semantic-diff # last commit in current repo + nightshift semantic-diff --since 7d # changes in the last 7 days + nightshift semantic-diff --base main --head dev # compare two branches + nightshift semantic-diff --json # output as JSON`, + RunE: func(cmd *cobra.Command, args []string) error { + path, _ := cmd.Flags().GetString("path") + if path == "" && len(args) > 0 { + path = args[0] + } + if path == "" { + var err error + path, err = os.Getwd() + if err != nil { + return fmt.Errorf("getting current directory: %w", err) + } + } + + baseRef, _ := cmd.Flags().GetString("base") + headRef, _ := cmd.Flags().GetString("head") + since, _ := cmd.Flags().GetString("since") + jsonOutput, _ := cmd.Flags().GetBool("json") + save, _ := cmd.Flags().GetString("save") + + return runSemanticDiff(path, baseRef, headRef, since, jsonOutput, save) + }, +} + +func init() { + semanticDiffCmd.Flags().StringP("path", "p", "", "Repository path") + semanticDiffCmd.Flags().String("base", "", "Base ref (commit, branch, or tag)") + semanticDiffCmd.Flags().String("head", "", "Head ref (default: HEAD)") + semanticDiffCmd.Flags().String("since", "", "Analyze changes since duration (e.g. 7d, 24h, 30d)") + semanticDiffCmd.Flags().Bool("json", false, "Output as JSON") + semanticDiffCmd.Flags().String("save", "", "Save report to file path") + rootCmd.AddCommand(semanticDiffCmd) +} + +func runSemanticDiff(path, baseRef, headRef, since string, jsonOutput bool, savePath string) error { + logger := logging.Component("semantic-diff") + + absPath, err := filepath.Abs(path) + if err != nil { + return fmt.Errorf("resolving path: %w", err) + } + + // Verify git repo + if _, err := os.Stat(filepath.Join(absPath, ".git")); err != nil { + return fmt.Errorf("not a git repository: %s", absPath) + } + + var sinceDur time.Duration + if since != "" { + sinceDur, err = parseDuration(since) + if err != nil { + return fmt.Errorf("parsing --since: %w", err) + } + } + + analyzer := semanticdiff.NewAnalyzer(semanticdiff.Options{ + RepoPath: absPath, + BaseRef: baseRef, + HeadRef: headRef, + Since: sinceDur, + }) + + report, err := analyzer.Run() + if err != nil { + return fmt.Errorf("analysis failed: %w", err) + } + + if jsonOutput { + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + return enc.Encode(report) + } + + md := semanticdiff.RenderMarkdown(report) + fmt.Print(md) + + if savePath != "" { + if err := os.WriteFile(savePath, []byte(md), 0o644); err != nil { + logger.Errorf("saving report: %v", err) + } else { + logger.Infof("report saved to %s", savePath) + } + } + + return nil +} + +// parseDuration parses human-friendly durations like "7d", "24h", "30d". +func parseDuration(s string) (time.Duration, error) { + if len(s) == 0 { + return 0, fmt.Errorf("empty duration") + } + + // Handle day suffix which Go's time.ParseDuration doesn't support. + last := s[len(s)-1] + if last == 'd' || last == 'D' { + s = s[:len(s)-1] + "h" + d, err := time.ParseDuration(s) + if err != nil { + return 0, fmt.Errorf("invalid duration: %s", s) + } + return d * 24, nil + } + + // Handle week suffix. + if last == 'w' || last == 'W' { + s = s[:len(s)-1] + "h" + d, err := time.ParseDuration(s) + if err != nil { + return 0, fmt.Errorf("invalid duration: %s", s) + } + return d * 24 * 7, nil + } + + return time.ParseDuration(s) +} diff --git a/internal/analysis/semanticdiff/classifier.go b/internal/analysis/semanticdiff/classifier.go new file mode 100644 index 0000000..03500ef --- /dev/null +++ b/internal/analysis/semanticdiff/classifier.go @@ -0,0 +1,266 @@ +package semanticdiff + +import ( + "path/filepath" + "strings" +) + +// Category represents a semantic category for a change. +type Category string + +const ( + CategoryFeature Category = "feature" + CategoryBugfix Category = "bugfix" + CategoryRefactor Category = "refactor" + CategoryDeps Category = "dependency-update" + CategoryConfig Category = "config-change" + CategoryTest Category = "test-change" + CategoryDocs Category = "docs-change" + CategoryCleanup Category = "cleanup" + CategoryUnknown Category = "uncategorized" +) + +// ClassifiedChange pairs a file change with its semantic category. +type ClassifiedChange struct { + File FileChange + Category Category + Reason string // short explanation of why this category was chosen +} + +// ClassifiedCommit pairs a commit with its primary category. +type ClassifiedCommit struct { + Commit Commit + Category Category + Reason string + Files []ClassifiedChange +} + +// ClassifyChangeSet classifies every commit and file in the change set. +func ClassifyChangeSet(cs *ChangeSet) []ClassifiedCommit { + var result []ClassifiedCommit + for _, c := range cs.Commits { + cc := classifyCommit(c) + result = append(result, cc) + } + return result +} + +// classifyCommit determines the primary category of a commit. +func classifyCommit(c Commit) ClassifiedCommit { + var classified []ClassifiedChange + for _, f := range c.Files { + cat, reason := classifyFile(f, c.Subject, c.Body) + classified = append(classified, ClassifiedChange{ + File: f, + Category: cat, + Reason: reason, + }) + } + + primary, reason := classifyFromMessage(c.Subject, c.Body) + if primary == CategoryUnknown && len(classified) > 0 { + primary, reason = majorityCategory(classified) + } + + return ClassifiedCommit{ + Commit: c, + Category: primary, + Reason: reason, + Files: classified, + } +} + +// classifyFile determines the category of a single file change. +func classifyFile(f FileChange, subject, body string) (Category, string) { + path := strings.ToLower(f.Path) + base := strings.ToLower(filepath.Base(f.Path)) + ext := strings.ToLower(filepath.Ext(f.Path)) + + // Test files + if isTestFile(path, base) { + return CategoryTest, "test file path" + } + + // Documentation + if isDocsFile(path, base, ext) { + return CategoryDocs, "documentation file" + } + + // Dependencies + if isDepsFile(base) { + return CategoryDeps, "dependency manifest" + } + + // Configuration + if isConfigFile(path, base, ext) { + return CategoryConfig, "configuration file" + } + + // Fall back to commit message analysis. + cat, reason := classifyFromMessage(subject, body) + if cat != CategoryUnknown { + return cat, reason + } + + // Heuristic: deletion-heavy changes are often cleanup. + if f.Deletions > 0 && f.Additions == 0 { + return CategoryCleanup, "pure deletion" + } + + return CategoryUnknown, "" +} + +// classifyFromMessage determines category from commit message patterns. +func classifyFromMessage(subject, body string) (Category, string) { + msg := strings.ToLower(subject + " " + body) + + // Conventional commit prefixes + subjectLower := strings.ToLower(subject) + if strings.HasPrefix(subjectLower, "fix:") || strings.HasPrefix(subjectLower, "fix(") { + return CategoryBugfix, "conventional commit: fix" + } + if strings.HasPrefix(subjectLower, "feat:") || strings.HasPrefix(subjectLower, "feat(") { + return CategoryFeature, "conventional commit: feat" + } + if strings.HasPrefix(subjectLower, "docs:") || strings.HasPrefix(subjectLower, "docs(") { + return CategoryDocs, "conventional commit: docs" + } + if strings.HasPrefix(subjectLower, "test:") || strings.HasPrefix(subjectLower, "test(") { + return CategoryTest, "conventional commit: test" + } + if strings.HasPrefix(subjectLower, "refactor:") || strings.HasPrefix(subjectLower, "refactor(") { + return CategoryRefactor, "conventional commit: refactor" + } + if strings.HasPrefix(subjectLower, "chore:") || strings.HasPrefix(subjectLower, "chore(") { + return CategoryCleanup, "conventional commit: chore" + } + if strings.HasPrefix(subjectLower, "build:") || strings.HasPrefix(subjectLower, "build(") || strings.HasPrefix(subjectLower, "ci:") || strings.HasPrefix(subjectLower, "ci(") { + return CategoryConfig, "conventional commit: build/ci" + } + if strings.HasPrefix(subjectLower, "deps:") || strings.HasPrefix(subjectLower, "deps(") { + return CategoryDeps, "conventional commit: deps" + } + + // Keyword patterns in message + bugKeywords := []string{"fix", "bug", "patch", "hotfix", "resolve", "issue"} + for _, kw := range bugKeywords { + if strings.Contains(msg, kw) { + return CategoryBugfix, "keyword: " + kw + } + } + + featureKeywords := []string{"add ", "new ", "feature", "implement", "introduce"} + for _, kw := range featureKeywords { + if strings.Contains(msg, kw) { + return CategoryFeature, "keyword: " + strings.TrimSpace(kw) + } + } + + refactorKeywords := []string{"refactor", "restructure", "reorganize", "simplify", "extract", "rename"} + for _, kw := range refactorKeywords { + if strings.Contains(msg, kw) { + return CategoryRefactor, "keyword: " + kw + } + } + + depKeywords := []string{"bump", "upgrade", "dependency", "dependabot", "go.sum", "go.mod"} + for _, kw := range depKeywords { + if strings.Contains(msg, kw) { + return CategoryDeps, "keyword: " + kw + } + } + + cleanupKeywords := []string{"cleanup", "clean up", "remove unused", "delete", "deprecate", "drop"} + for _, kw := range cleanupKeywords { + if strings.Contains(msg, kw) { + return CategoryCleanup, "keyword: " + kw + } + } + + return CategoryUnknown, "" +} + +func isTestFile(path, base string) bool { + if strings.Contains(path, "_test.go") || strings.Contains(path, "_test.") { + return true + } + if strings.Contains(path, "/test/") || strings.Contains(path, "/tests/") || strings.Contains(path, "/testdata/") { + return true + } + if strings.HasPrefix(path, "test/") || strings.HasPrefix(path, "tests/") || strings.HasPrefix(path, "testdata/") { + return true + } + if strings.HasSuffix(base, ".test.js") || strings.HasSuffix(base, ".test.ts") || strings.HasSuffix(base, ".spec.js") || strings.HasSuffix(base, ".spec.ts") { + return true + } + return false +} + +func isDocsFile(path, base, ext string) bool { + if ext == ".md" || ext == ".rst" || ext == ".txt" || ext == ".adoc" { + return true + } + if strings.Contains(path, "/docs/") || strings.Contains(path, "/doc/") { + return true + } + if base == "readme" || base == "readme.md" || base == "changelog" || base == "changelog.md" || base == "license" || base == "license.md" { + return true + } + return false +} + +func isDepsFile(base string) bool { + depFiles := []string{ + "go.mod", "go.sum", + "package.json", "package-lock.json", "yarn.lock", "pnpm-lock.yaml", + "requirements.txt", "poetry.lock", "pipfile.lock", + "gemfile.lock", "cargo.lock", "cargo.toml", + "composer.lock", + } + for _, df := range depFiles { + if base == df { + return true + } + } + return false +} + +func isConfigFile(path, base, ext string) bool { + if ext == ".yaml" || ext == ".yml" || ext == ".toml" || ext == ".ini" { + if !strings.Contains(path, "/test") && !strings.Contains(path, "/doc") { + return true + } + } + configFiles := []string{ + "dockerfile", ".dockerignore", "docker-compose.yml", "docker-compose.yaml", + "makefile", ".goreleaser.yml", ".goreleaser.yaml", ".golangci.yml", + ".gitignore", ".editorconfig", ".eslintrc", ".prettierrc", + "tsconfig.json", "webpack.config.js", + } + for _, cf := range configFiles { + if base == cf { + return true + } + } + if strings.Contains(path, ".github/") || strings.Contains(path, ".circleci/") { + return true + } + return false +} + +// majorityCategory returns the most common category among classified changes. +func majorityCategory(changes []ClassifiedChange) (Category, string) { + counts := make(map[Category]int) + for _, c := range changes { + counts[c.Category]++ + } + best := CategoryUnknown + bestCount := 0 + for cat, count := range counts { + if count > bestCount { + best = cat + bestCount = count + } + } + return best, "majority of files" +} diff --git a/internal/analysis/semanticdiff/diff.go b/internal/analysis/semanticdiff/diff.go new file mode 100644 index 0000000..377f78b --- /dev/null +++ b/internal/analysis/semanticdiff/diff.go @@ -0,0 +1,205 @@ +// Package semanticdiff extracts git diffs and classifies changes by semantic category. +package semanticdiff + +import ( + "fmt" + "os/exec" + "strconv" + "strings" +) + +// Commit represents a single git commit with metadata. +type Commit struct { + Hash string + Subject string + Body string + Author string + Files []FileChange +} + +// FileChange represents a single file's changes within a commit. +type FileChange struct { + Path string + OldPath string // non-empty if renamed + Additions int + Deletions int + IsBinary bool + IsRename bool + IsNew bool + IsDeleted bool + DiffOutput string // raw diff hunk text for this file +} + +// ChangeSet is the full diff extraction result between two refs. +type ChangeSet struct { + BaseRef string + HeadRef string + Commits []Commit + AllFiles []FileChange // aggregated across all commits +} + +// TotalAdditions returns the sum of additions across all files. +func (cs *ChangeSet) TotalAdditions() int { + total := 0 + for _, f := range cs.AllFiles { + total += f.Additions + } + return total +} + +// TotalDeletions returns the sum of deletions across all files. +func (cs *ChangeSet) TotalDeletions() int { + total := 0 + for _, f := range cs.AllFiles { + total += f.Deletions + } + return total +} + +// ExtractDiff extracts a structured ChangeSet between two git refs. +func ExtractDiff(repoPath, baseRef, headRef string) (*ChangeSet, error) { + commits, err := parseCommits(repoPath, baseRef, headRef) + if err != nil { + return nil, fmt.Errorf("parsing commits: %w", err) + } + + allFiles, err := parseNumstat(repoPath, baseRef, headRef) + if err != nil { + return nil, fmt.Errorf("parsing numstat: %w", err) + } + + return &ChangeSet{ + BaseRef: baseRef, + HeadRef: headRef, + Commits: commits, + AllFiles: allFiles, + }, nil +} + +// parseCommits extracts commit metadata between two refs. +func parseCommits(repoPath, baseRef, headRef string) ([]Commit, error) { + // Use a delimiter unlikely to appear in commit messages. + const sep = "---NIGHTSHIFT-SEP---" + format := fmt.Sprintf("%%H%s%%s%s%%b%s%%an%s", sep, sep, sep, sep) + + cmd := exec.Command("git", "log", "--format="+format, baseRef+".."+headRef) + cmd.Dir = repoPath + out, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("git log: %w", err) + } + + raw := strings.TrimSpace(string(out)) + if raw == "" { + return nil, nil + } + + // Each commit ends with the trailing separator. + entries := strings.Split(raw, sep+"\n") + var commits []Commit + for _, entry := range entries { + entry = strings.TrimSpace(entry) + if entry == "" { + continue + } + parts := strings.SplitN(entry, sep, 4) + if len(parts) < 4 { + continue + } + c := Commit{ + Hash: strings.TrimSpace(parts[0]), + Subject: strings.TrimSpace(parts[1]), + Body: strings.TrimSpace(parts[2]), + Author: strings.TrimSpace(parts[3]), + } + commits = append(commits, c) + } + + // Attach file lists to each commit. + for i := range commits { + files, err := parseCommitFiles(repoPath, commits[i].Hash) + if err != nil { + continue // non-fatal + } + commits[i].Files = files + } + + return commits, nil +} + +// parseCommitFiles returns the file changes for a single commit. +func parseCommitFiles(repoPath, hash string) ([]FileChange, error) { + cmd := exec.Command("git", "diff-tree", "--no-commit-id", "-r", "--numstat", "-M", hash) + cmd.Dir = repoPath + out, err := cmd.Output() + if err != nil { + return nil, err + } + return parseNumstatLines(string(out)) +} + +// parseNumstat runs git diff --numstat between two refs and returns aggregated file changes. +func parseNumstat(repoPath, baseRef, headRef string) ([]FileChange, error) { + cmd := exec.Command("git", "diff", "--numstat", "-M", baseRef+".."+headRef) + cmd.Dir = repoPath + out, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("git diff --numstat: %w", err) + } + return parseNumstatLines(string(out)) +} + +// parseNumstatLines parses lines of git numstat output into FileChange structs. +func parseNumstatLines(output string) ([]FileChange, error) { + var files []FileChange + for _, line := range strings.Split(output, "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + fields := strings.Fields(line) + if len(fields) < 3 { + continue + } + + fc := FileChange{} + + // Binary files show "-" for additions/deletions. + if fields[0] == "-" && fields[1] == "-" { + fc.IsBinary = true + } else { + fc.Additions, _ = strconv.Atoi(fields[0]) + fc.Deletions, _ = strconv.Atoi(fields[1]) + } + + path := fields[2] + // Detect renames: "old => new" or "{prefix/old => prefix/new}" + if len(fields) >= 4 && strings.Contains(line, "=>") { + fc.IsRename = true + fc.OldPath = extractRenamePart(fields[2:], true) + fc.Path = extractRenamePart(fields[2:], false) + } else { + fc.Path = path + } + + files = append(files, fc) + } + return files, nil +} + +// extractRenamePart extracts old or new path from rename notation. +func extractRenamePart(fields []string, wantOld bool) string { + joined := strings.Join(fields, " ") + // Handle {a => b} style + if idx := strings.Index(joined, " => "); idx >= 0 { + if wantOld { + old := joined[:idx] + old = strings.TrimPrefix(old, "{") + return strings.TrimSpace(old) + } + newPart := joined[idx+4:] + newPart = strings.TrimSuffix(newPart, "}") + return strings.TrimSpace(newPart) + } + return joined +} diff --git a/internal/analysis/semanticdiff/report.go b/internal/analysis/semanticdiff/report.go new file mode 100644 index 0000000..ac5125a --- /dev/null +++ b/internal/analysis/semanticdiff/report.go @@ -0,0 +1,245 @@ +package semanticdiff + +import ( + "bytes" + "fmt" + "sort" + "strings" +) + +// Report is the structured result of a semantic diff analysis. +type Report struct { + BaseRef string `json:"base_ref"` + HeadRef string `json:"head_ref"` + Summary string `json:"summary"` + Categories []CategoryGroup `json:"categories"` + Highlights []string `json:"highlights"` + Stats DiffStats `json:"stats"` +} + +// CategoryGroup groups changes under one semantic category. +type CategoryGroup struct { + Category Category `json:"category"` + Label string `json:"label"` + Commits []ClassifiedCommit `json:"commits"` + FilesCount int `json:"files_count"` + Additions int `json:"additions"` + Deletions int `json:"deletions"` +} + +// DiffStats contains aggregate statistics. +type DiffStats struct { + TotalCommits int `json:"total_commits"` + TotalFiles int `json:"total_files"` + TotalAdded int `json:"total_added"` + TotalDeleted int `json:"total_deleted"` +} + +// GenerateReport builds a Report from a classified change set. +func GenerateReport(cs *ChangeSet, classified []ClassifiedCommit) *Report { + groups := groupByCategory(classified) + stats := DiffStats{ + TotalCommits: len(cs.Commits), + TotalFiles: len(cs.AllFiles), + TotalAdded: cs.TotalAdditions(), + TotalDeleted: cs.TotalDeletions(), + } + highlights := detectHighlights(cs, classified) + summary := buildSummary(groups, stats) + + return &Report{ + BaseRef: cs.BaseRef, + HeadRef: cs.HeadRef, + Summary: summary, + Categories: groups, + Highlights: highlights, + Stats: stats, + } +} + +// groupByCategory groups classified commits by their primary category. +func groupByCategory(classified []ClassifiedCommit) []CategoryGroup { + catMap := make(map[Category]*CategoryGroup) + order := []Category{} + + for _, cc := range classified { + g, ok := catMap[cc.Category] + if !ok { + g = &CategoryGroup{ + Category: cc.Category, + Label: categoryLabel(cc.Category), + } + catMap[cc.Category] = g + order = append(order, cc.Category) + } + g.Commits = append(g.Commits, cc) + for _, f := range cc.Files { + g.FilesCount++ + g.Additions += f.File.Additions + g.Deletions += f.File.Deletions + } + } + + // Sort by priority. + sort.Slice(order, func(i, j int) bool { + return categoryPriority(order[i]) < categoryPriority(order[j]) + }) + + groups := make([]CategoryGroup, 0, len(order)) + for _, cat := range order { + groups = append(groups, *catMap[cat]) + } + return groups +} + +func categoryLabel(c Category) string { + switch c { + case CategoryFeature: + return "New Features" + case CategoryBugfix: + return "Bug Fixes" + case CategoryRefactor: + return "Refactoring" + case CategoryDeps: + return "Dependency Updates" + case CategoryConfig: + return "Configuration Changes" + case CategoryTest: + return "Test Changes" + case CategoryDocs: + return "Documentation" + case CategoryCleanup: + return "Cleanup" + case CategoryUnknown: + return "Other Changes" + default: + return string(c) + } +} + +func categoryPriority(c Category) int { + switch c { + case CategoryFeature: + return 0 + case CategoryBugfix: + return 1 + case CategoryRefactor: + return 2 + case CategoryDeps: + return 3 + case CategoryConfig: + return 4 + case CategoryTest: + return 5 + case CategoryDocs: + return 6 + case CategoryCleanup: + return 7 + default: + return 8 + } +} + +// detectHighlights identifies high-impact changes. +func detectHighlights(cs *ChangeSet, classified []ClassifiedCommit) []string { + var highlights []string + + for _, f := range cs.AllFiles { + path := strings.ToLower(f.Path) + + // API surface changes + if strings.Contains(path, "api") || strings.Contains(path, "handler") || strings.Contains(path, "route") { + highlights = append(highlights, fmt.Sprintf("API surface changed: %s (+%d/-%d)", f.Path, f.Additions, f.Deletions)) + } + + // Schema/migration changes + if strings.Contains(path, "migration") || strings.Contains(path, "schema") { + highlights = append(highlights, fmt.Sprintf("Schema/migration changed: %s", f.Path)) + } + + // Security-sensitive files + if strings.Contains(path, "auth") || strings.Contains(path, "security") || strings.Contains(path, "crypto") || strings.Contains(path, "permission") { + highlights = append(highlights, fmt.Sprintf("Security-sensitive file changed: %s", f.Path)) + } + + // Large changes + if f.Additions+f.Deletions > 200 { + highlights = append(highlights, fmt.Sprintf("Large change: %s (+%d/-%d lines)", f.Path, f.Additions, f.Deletions)) + } + } + + return dedup(highlights) +} + +func dedup(items []string) []string { + seen := make(map[string]bool) + var result []string + for _, item := range items { + if !seen[item] { + seen[item] = true + result = append(result, item) + } + } + return result +} + +func buildSummary(groups []CategoryGroup, stats DiffStats) string { + parts := make([]string, 0, len(groups)) + for _, g := range groups { + parts = append(parts, fmt.Sprintf("%d %s", len(g.Commits), strings.ToLower(g.Label))) + } + return fmt.Sprintf("%d commits across %d files: %s. Net change: +%d/-%d lines.", + stats.TotalCommits, stats.TotalFiles, strings.Join(parts, ", "), + stats.TotalAdded, stats.TotalDeleted) +} + +// RenderMarkdown renders the report as a markdown string. +func RenderMarkdown(r *Report) string { + var buf bytes.Buffer + + fmt.Fprintf(&buf, "# Semantic Diff: %s..%s\n\n", r.BaseRef, r.HeadRef) + + // TL;DR + buf.WriteString("## TL;DR\n\n") + fmt.Fprintf(&buf, "%s\n\n", r.Summary) + + // Stats + buf.WriteString("## Stats\n\n") + buf.WriteString("| Metric | Value |\n") + buf.WriteString("|--------|-------|\n") + fmt.Fprintf(&buf, "| Commits | %d |\n", r.Stats.TotalCommits) + fmt.Fprintf(&buf, "| Files Changed | %d |\n", r.Stats.TotalFiles) + fmt.Fprintf(&buf, "| Lines Added | +%d |\n", r.Stats.TotalAdded) + fmt.Fprintf(&buf, "| Lines Deleted | -%d |\n\n", r.Stats.TotalDeleted) + + // Highlights + if len(r.Highlights) > 0 { + buf.WriteString("## Impact Highlights\n\n") + for _, h := range r.Highlights { + fmt.Fprintf(&buf, "- %s\n", h) + } + buf.WriteString("\n") + } + + // Categories + for _, g := range r.Categories { + fmt.Fprintf(&buf, "## %s\n\n", g.Label) + fmt.Fprintf(&buf, "*%d commits, %d files, +%d/-%d lines*\n\n", len(g.Commits), g.FilesCount, g.Additions, g.Deletions) + for _, cc := range g.Commits { + fmt.Fprintf(&buf, "- **%s** `%s`\n", cc.Commit.Subject, shortHash(cc.Commit.Hash)) + for _, f := range cc.Files { + fmt.Fprintf(&buf, " - `%s` (+%d/-%d)\n", f.File.Path, f.File.Additions, f.File.Deletions) + } + } + buf.WriteString("\n") + } + + return buf.String() +} + +func shortHash(hash string) string { + if len(hash) > 7 { + return hash[:7] + } + return hash +} diff --git a/internal/analysis/semanticdiff/semanticdiff.go b/internal/analysis/semanticdiff/semanticdiff.go new file mode 100644 index 0000000..7e08fc1 --- /dev/null +++ b/internal/analysis/semanticdiff/semanticdiff.go @@ -0,0 +1,85 @@ +package semanticdiff + +import ( + "fmt" + "os/exec" + "strings" + "time" +) + +// Options configures the semantic diff analysis. +type Options struct { + RepoPath string + BaseRef string + HeadRef string + Since time.Duration // alternative to BaseRef: go back N duration from HEAD +} + +// Analyzer performs semantic diff analysis on a git repository. +type Analyzer struct { + opts Options +} + +// NewAnalyzer creates a new semantic diff analyzer. +func NewAnalyzer(opts Options) *Analyzer { + return &Analyzer{opts: opts} +} + +// Run executes the analysis and returns a report. +func (a *Analyzer) Run() (*Report, error) { + baseRef, headRef, err := a.resolveRefs() + if err != nil { + return nil, fmt.Errorf("resolving refs: %w", err) + } + + cs, err := ExtractDiff(a.opts.RepoPath, baseRef, headRef) + if err != nil { + return nil, fmt.Errorf("extracting diff: %w", err) + } + + if len(cs.Commits) == 0 { + return &Report{ + BaseRef: baseRef, + HeadRef: headRef, + Summary: "No changes found between the specified refs.", + }, nil + } + + classified := ClassifyChangeSet(cs) + report := GenerateReport(cs, classified) + return report, nil +} + +// resolveRefs resolves base and head refs, applying defaults. +func (a *Analyzer) resolveRefs() (string, string, error) { + headRef := a.opts.HeadRef + if headRef == "" { + headRef = "HEAD" + } + + baseRef := a.opts.BaseRef + if baseRef == "" && a.opts.Since > 0 { + // Use git rev-list to find the commit closest to the since duration. + sinceTime := time.Now().Add(-a.opts.Since) + cmd := exec.Command("git", "log", "--format=%H", "--after="+sinceTime.Format(time.RFC3339), "--reverse") + cmd.Dir = a.opts.RepoPath + out, err := cmd.Output() + if err != nil { + return "", "", fmt.Errorf("finding base commit from --since: %w", err) + } + lines := strings.Split(strings.TrimSpace(string(out)), "\n") + if len(lines) > 0 && lines[0] != "" { + // Use the parent of the first commit in range, or the commit itself. + baseRef = lines[0] + "~1" + } else { + return "", "", fmt.Errorf("no commits found in the last %s", a.opts.Since) + } + } + + if baseRef == "" { + // Default: compare HEAD against its first parent (last commit). + baseRef = headRef + "~1" + } + + return baseRef, headRef, nil +} diff --git a/internal/analysis/semanticdiff/semanticdiff_test.go b/internal/analysis/semanticdiff/semanticdiff_test.go new file mode 100644 index 0000000..ee9e6c4 --- /dev/null +++ b/internal/analysis/semanticdiff/semanticdiff_test.go @@ -0,0 +1,325 @@ +package semanticdiff + +import ( + "strings" + "testing" +) + +func TestParseNumstatLines(t *testing.T) { + tests := []struct { + name string + input string + want int + wantAdd int + wantDel int + }{ + { + name: "normal file", + input: "10\t5\tsrc/main.go\n", + want: 1, + wantAdd: 10, + wantDel: 5, + }, + { + name: "binary file", + input: "-\t-\tassets/logo.png\n", + want: 1, + wantAdd: 0, + wantDel: 0, + }, + { + name: "multiple files", + input: "10\t5\ta.go\n3\t1\tb.go\n", + want: 2, + wantAdd: 13, + wantDel: 6, + }, + { + name: "empty input", + input: "", + want: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + files, err := parseNumstatLines(tt.input) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(files) != tt.want { + t.Fatalf("got %d files, want %d", len(files), tt.want) + } + totalAdd, totalDel := 0, 0 + for _, f := range files { + totalAdd += f.Additions + totalDel += f.Deletions + } + if totalAdd != tt.wantAdd { + t.Errorf("total additions = %d, want %d", totalAdd, tt.wantAdd) + } + if totalDel != tt.wantDel { + t.Errorf("total deletions = %d, want %d", totalDel, tt.wantDel) + } + }) + } +} + +func TestParseNumstatBinaryFlag(t *testing.T) { + files, err := parseNumstatLines("-\t-\timage.png\n") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(files) != 1 { + t.Fatalf("expected 1 file, got %d", len(files)) + } + if !files[0].IsBinary { + t.Error("expected IsBinary=true") + } +} + +func TestClassifyFromMessage(t *testing.T) { + tests := []struct { + subject string + body string + want Category + }{ + {"fix: null pointer in handler", "", CategoryBugfix}, + {"feat: add user profile page", "", CategoryFeature}, + {"docs: update README", "", CategoryDocs}, + {"test: add integration tests", "", CategoryTest}, + {"refactor: extract helper", "", CategoryRefactor}, + {"chore: clean up old scripts", "", CategoryCleanup}, + {"ci: update workflow", "", CategoryConfig}, + {"deps: bump cobra to v1.8", "", CategoryDeps}, + {"Fix a bug in login", "", CategoryBugfix}, + {"Add new endpoint", "", CategoryFeature}, + {"something unrelated", "", CategoryUnknown}, + } + + for _, tt := range tests { + t.Run(tt.subject, func(t *testing.T) { + got, _ := classifyFromMessage(tt.subject, tt.body) + if got != tt.want { + t.Errorf("classifyFromMessage(%q) = %s, want %s", tt.subject, got, tt.want) + } + }) + } +} + +func TestClassifyFileByPath(t *testing.T) { + tests := []struct { + path string + want Category + }{ + {"internal/foo/foo_test.go", CategoryTest}, + {"tests/integration.py", CategoryTest}, + {"README.md", CategoryDocs}, + {"docs/guide.rst", CategoryDocs}, + {"go.mod", CategoryDeps}, + {"package.json", CategoryDeps}, + {".github/workflows/ci.yml", CategoryConfig}, + {"Dockerfile", CategoryConfig}, + {"Makefile", CategoryConfig}, + } + + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + f := FileChange{Path: tt.path} + got, _ := classifyFile(f, "", "") + if got != tt.want { + t.Errorf("classifyFile(%q) = %s, want %s", tt.path, got, tt.want) + } + }) + } +} + +func TestIsTestFile(t *testing.T) { + if !isTestFile("cmd/foo_test.go", "foo_test.go") { + t.Error("expected _test.go to be detected") + } + if !isTestFile("tests/main.py", "main.py") { + t.Error("expected tests/ directory to be detected") + } + if !isTestFile("src/app.test.js", "app.test.js") { + t.Error("expected .test.js to be detected") + } + if isTestFile("src/main.go", "main.go") { + t.Error("expected non-test file to not be detected") + } +} + +func TestIsDocsFile(t *testing.T) { + if !isDocsFile("readme.md", "readme.md", ".md") { + t.Error("expected .md to be detected") + } + if !isDocsFile("docs/guide.rst", "guide.rst", ".rst") { + t.Error("expected .rst to be detected") + } + if isDocsFile("src/main.go", "main.go", ".go") { + t.Error("expected .go to not be docs") + } +} + +func TestIsDepsFile(t *testing.T) { + if !isDepsFile("go.mod") { + t.Error("expected go.mod to be deps") + } + if !isDepsFile("package-lock.json") { + t.Error("expected package-lock.json to be deps") + } + if isDepsFile("main.go") { + t.Error("expected main.go to not be deps") + } +} + +func TestMajorityCategory(t *testing.T) { + changes := []ClassifiedChange{ + {Category: CategoryBugfix}, + {Category: CategoryBugfix}, + {Category: CategoryFeature}, + } + got, _ := majorityCategory(changes) + if got != CategoryBugfix { + t.Errorf("got %s, want bugfix", got) + } +} + +func TestGenerateReport(t *testing.T) { + cs := &ChangeSet{ + BaseRef: "abc1234", + HeadRef: "def5678", + Commits: []Commit{ + {Hash: "aaa1111", Subject: "feat: add widget", Author: "alice", Files: []FileChange{ + {Path: "widget.go", Additions: 50, Deletions: 0}, + }}, + {Hash: "bbb2222", Subject: "fix: null check", Author: "bob", Files: []FileChange{ + {Path: "handler.go", Additions: 2, Deletions: 1}, + }}, + }, + AllFiles: []FileChange{ + {Path: "widget.go", Additions: 50, Deletions: 0}, + {Path: "handler.go", Additions: 2, Deletions: 1}, + }, + } + + classified := ClassifyChangeSet(cs) + report := GenerateReport(cs, classified) + + if report.Stats.TotalCommits != 2 { + t.Errorf("expected 2 commits, got %d", report.Stats.TotalCommits) + } + if report.Stats.TotalFiles != 2 { + t.Errorf("expected 2 files, got %d", report.Stats.TotalFiles) + } + if report.Stats.TotalAdded != 52 { + t.Errorf("expected 52 additions, got %d", report.Stats.TotalAdded) + } + if report.Stats.TotalDeleted != 1 { + t.Errorf("expected 1 deletion, got %d", report.Stats.TotalDeleted) + } + if len(report.Categories) == 0 { + t.Fatal("expected at least one category group") + } + + // Verify markdown rendering. + md := RenderMarkdown(report) + if !strings.Contains(md, "Semantic Diff") { + t.Error("markdown should contain header") + } + if !strings.Contains(md, "TL;DR") { + t.Error("markdown should contain TL;DR section") + } + if !strings.Contains(md, "widget.go") { + t.Error("markdown should reference changed files") + } +} + +func TestRenderMarkdownEmpty(t *testing.T) { + r := &Report{ + BaseRef: "aaa", + HeadRef: "bbb", + Summary: "No changes found between the specified refs.", + } + md := RenderMarkdown(r) + if !strings.Contains(md, "No changes") { + t.Error("empty report should show no-changes message") + } +} + +func TestDetectHighlights(t *testing.T) { + cs := &ChangeSet{ + AllFiles: []FileChange{ + {Path: "internal/api/handler.go", Additions: 10, Deletions: 5}, + {Path: "internal/auth/middleware.go", Additions: 3, Deletions: 2}, + {Path: "db/migrations/001_init.sql", Additions: 50, Deletions: 0}, + {Path: "README.md", Additions: 5, Deletions: 2}, + }, + } + highlights := detectHighlights(cs, nil) + found := map[string]bool{"api": false, "auth": false, "migration": false} + for _, h := range highlights { + if strings.Contains(h, "API surface") { + found["api"] = true + } + if strings.Contains(h, "Security-sensitive") { + found["auth"] = true + } + if strings.Contains(h, "Schema/migration") { + found["migration"] = true + } + } + for k, v := range found { + if !v { + t.Errorf("expected %s highlight to be detected", k) + } + } +} + +func TestChangeSetTotals(t *testing.T) { + cs := &ChangeSet{ + AllFiles: []FileChange{ + {Additions: 10, Deletions: 3}, + {Additions: 5, Deletions: 7}, + }, + } + if cs.TotalAdditions() != 15 { + t.Errorf("TotalAdditions = %d, want 15", cs.TotalAdditions()) + } + if cs.TotalDeletions() != 10 { + t.Errorf("TotalDeletions = %d, want 10", cs.TotalDeletions()) + } +} + +func TestShortHash(t *testing.T) { + if shortHash("abc1234567890") != "abc1234" { + t.Error("expected 7-char short hash") + } + if shortHash("abc") != "abc" { + t.Error("expected short input returned as-is") + } +} + +func TestCategoryLabel(t *testing.T) { + if categoryLabel(CategoryFeature) != "New Features" { + t.Error("unexpected label for feature") + } + if categoryLabel(CategoryUnknown) != "Other Changes" { + t.Error("unexpected label for unknown") + } +} + +func TestBuildSummary(t *testing.T) { + groups := []CategoryGroup{ + {Category: CategoryFeature, Label: "New Features", Commits: []ClassifiedCommit{{}}}, + {Category: CategoryBugfix, Label: "Bug Fixes", Commits: []ClassifiedCommit{{}, {}}}, + } + stats := DiffStats{TotalCommits: 3, TotalFiles: 5, TotalAdded: 100, TotalDeleted: 20} + summary := buildSummary(groups, stats) + if !strings.Contains(summary, "3 commits") { + t.Error("summary should mention commit count") + } + if !strings.Contains(summary, "+100/-20") { + t.Error("summary should mention line changes") + } +} diff --git a/internal/tasks/tasks.go b/internal/tasks/tasks.go index 2c7dabb..5bcace6 100644 --- a/internal/tasks/tasks.go +++ b/internal/tasks/tasks.go @@ -392,10 +392,18 @@ Apply safe updates directly, and leave concise follow-ups for anything uncertain DefaultInterval: 72 * time.Hour, }, TaskSemanticDiff: { - Type: TaskSemanticDiff, - Category: CategoryAnalysis, - Name: "Semantic Diff Explainer", - Description: "Explain the semantic meaning of code changes", + Type: TaskSemanticDiff, + Category: CategoryAnalysis, + Name: "Semantic Diff Explainer", + Description: `Analyze recent git changes and produce a semantic diff report. +Run 'nightshift semantic-diff --since 72h' (or the configured interval) on the target repository. +Classify every commit and changed file into one of: feature, bugfix, refactor, dependency-update, +config-change, test-change, docs-change, or cleanup. Group the results by category and highlight +high-impact changes (API surface modifications, schema/migration changes, security-sensitive files, +and large diffs exceeding 200 lines). Output a structured markdown report with a TL;DR summary, +aggregate stats (commits, files, lines added/deleted), an impact-highlights section, and per-category +breakdowns listing each commit with its affected files. When --json is used, return the report as +structured JSON for downstream tooling.`, CostTier: CostMedium, RiskLevel: RiskLow, DefaultInterval: 72 * time.Hour,