diff --git a/README.md b/README.md index f288531..9b8c94f 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,11 @@ This repo provides utilities for managing copyright headers and license files across many repos at scale. -You can use it to add or validate copyright headers on source code files, add a -LICENSE file to a repo, report on what licenses repos are using, and more. +Features: +- Add or validate copyright headers on source code files +- Add and/or manage LICENSE files with git-aware copyright year detection +- Report on licenses used across multiple repositories +- Automate compliance checks in CI/CD pipelines ## Getting Started @@ -33,7 +36,7 @@ Usage: copywrite [command] Common Commands: - headers Adds missing copyright headers to all source code files + headers Adds missing copyright headers and updates existing headers' year information. init Generates a .copywrite.hcl config for a new project license Validates that a LICENSE file is present and remediates any issues if found @@ -62,8 +65,18 @@ scan all files in your repo and copyright headers to any that are missing: copywrite headers --spdx "MPL-2.0" ``` -You may omit the `--spdx` flag if you add a `.copywrite.hcl` config, as outlined -[here](#config-structure). +The `copywrite license` command validates and manages LICENSE files with git-aware copyright years: + +```sh +copywrite license --spdx "MPL-2.0" +``` + +**Copyright Year Behavior:** +- **Start Year**: Auto-detected from config file and if not found defaults to repository's first commit +- **End Year**: Set to current year when an update is triggered (git history only determines if update is needed) +- **Update Trigger**: Git detects if source code file was modified since the copyright end year + +You may omit the `--spdx` flag if you add a `.copywrite.hcl` config, as outlined [here](#config-structure). ### `--plan` Flag @@ -72,6 +85,24 @@ performs a dry-run and will outline what changes would be made. This flag also returns a non-zero exit code if any changes are needed. As such, it can be used to validate if a repo is in compliance or not. +## Technical Details + +### Copyright Year Logic + +**Source File Headers:** +- End year: Set to current year when file's source code is modified +- Git history determines if update is needed (compares file's last commit year to copyright end year) +- When triggered, end year updates to current year +- Ignores copyright header updates made to a file as it is not source code change. + +**LICENSE Files:** +- End year: Set to current year when any project file is modified +- Git history determines if update is needed (compares repo's last commit year to copyright end year) +- When triggered, end year updates to current year +- Preserves historical accuracy for archived projects (no forced updates) + +**Key Distinction:** Git history is used as a trigger to determine *whether* an update is needed, but the actual end year value is always set to the current year when an update occurs. + ## Config Structure > :bulb: You can automatically generate a new `.copywrite.hcl` config with the @@ -99,8 +130,8 @@ project { # (OPTIONAL) Represents the year that the project initially began # This is used as the starting year in copyright statements - # If set and different from current year, headers will show: "copyright_year, current_year" - # If set and same as current year, headers will show: "current_year" + # If set and different from current year, headers will show: "copyright_year, year-2" + # If set and same as year-2, headers will show: "copyright_year" # If not set (0), the tool will auto-detect from git history (first commit year) # If auto-detection fails, it will fallback to current year only # Default: 0 (auto-detect) diff --git a/addlicense/main.go b/addlicense/main.go index 6f311db..8c6f603 100644 --- a/addlicense/main.go +++ b/addlicense/main.go @@ -280,7 +280,7 @@ func walk(ch chan<- *file, start string, logger *log.Logger) error { if fi.IsDir() { return nil } - if fileMatches(path, ignorePatterns) { + if FileMatches(path, ignorePatterns) { // The [DEBUG] level is inferred by go-hclog as a debug statement logger.Printf("[DEBUG] skipping: %s", path) return nil @@ -290,9 +290,9 @@ func walk(ch chan<- *file, start string, logger *log.Logger) error { }) } -// fileMatches determines if path matches one of the provided file patterns. +// FileMatches determines if path matches one of the provided file patterns. // Patterns are assumed to be valid. -func fileMatches(path string, patterns []string) bool { +func FileMatches(path string, patterns []string) bool { for _, p := range patterns { if runtime.GOOS == "windows" { diff --git a/addlicense/main_test.go b/addlicense/main_test.go index 9c00803..ecd6560 100644 --- a/addlicense/main_test.go +++ b/addlicense/main_test.go @@ -471,7 +471,7 @@ func TestFileMatches(t *testing.T) { for _, tt := range tests { patterns := []string{tt.pattern} - if got := fileMatches(tt.path, patterns); got != tt.wantMatch { + if got := FileMatches(tt.path, patterns); got != tt.wantMatch { t.Errorf("fileMatches(%q, %q) returned %v, want %v", tt.path, patterns, got, tt.wantMatch) } } diff --git a/cmd/headers.go b/cmd/headers.go index 6902699..b7c63da 100644 --- a/cmd/headers.go +++ b/cmd/headers.go @@ -6,8 +6,14 @@ package cmd import ( "fmt" "os" + "path/filepath" + "runtime" + "strings" + "sync" + "sync/atomic" "github.com/hashicorp/copywrite/addlicense" + "github.com/hashicorp/copywrite/licensecheck" "github.com/hashicorp/go-hclog" "github.com/jedib0t/go-pretty/v6/text" "github.com/samber/lo" @@ -21,9 +27,13 @@ var ( var headersCmd = &cobra.Command{ Use: "headers", - Short: "Adds missing copyright headers to all source code files", + Short: "Adds missing copyright headers and updates existing headers' year information in all source code files", Long: `Recursively checks for all files in the given directory and subdirectories, -adding copyright statements and license headers to any that are missing them. +adding copyright statements and license headers to any that are missing them and +updating the year information in existing headers based on git history. + +By default, the command will modify files in place. To perform a dry-run without +modifying any files, use the --plan flag. Autogenerated files and common file types that don't support headers (e.g., prose) will automatically be exempted. Any other files or folders should be added to the @@ -87,10 +97,23 @@ config, see the "copywrite init" command.`, ".github/workflows/**", ".github/dependabot.yml", "**/node_modules/**", + ".copywrite.hcl", } ignoredPatterns := lo.Union(conf.Project.HeaderIgnore, autoSkippedPatterns) - // Construct the configuration addLicense needs to properly format headers + // STEP 1: Update existing copyright headers + gha.StartGroup("Updating existing copyright headers:") + updatedCount, anyFileUpdated, licensePath := updateExistingHeaders(cmd, ignoredPatterns, plan) + gha.EndGroup() + if updatedCount > 0 { + if plan { + cmd.Printf("\n%s\n\n", text.FgYellow.Sprintf("[DRY RUN] Would update %d file(s) with new copyright years", updatedCount)) + } else { + cmd.Printf("\n%s\n\n", text.FgGreen.Sprintf("Successfully updated %d file(s) with new copyright years", updatedCount)) + } + } + + // STEP 2: Construct the configuration addLicense needs to properly format headers licenseData := addlicense.LicenseData{ Year: conf.FormatCopyrightYears(), // Format year(s) for copyright statements Holder: conf.Project.CopyrightHolder, @@ -112,10 +135,33 @@ config, see the "copywrite init" command.`, // cobra.CheckErr on the return, which will indeed output to stderr and // return a non-zero error code. - gha.StartGroup("The following files are missing headers:") - err := addlicense.Run(ignoredPatterns, "only", licenseData, "", verbose, plan, []string{"."}, stdcliLogger) + // STEP 3: Add missing headers + gha.StartGroup("Adding missing copyright headers:") + var err error + // In dry-run mode, if updateExistingHeaders found files that would be + // updated (year bumps), treat that as an error so the command exits + // non-zero to indicate work would be performed. + if plan && updatedCount > 0 { + err = fmt.Errorf("[DRY RUN] %d file(s) would be updated with new copyright years", updatedCount) + } + runErr := addlicense.Run(ignoredPatterns, "only", licenseData, "", verbose, plan, []string{"."}, stdcliLogger) + if err != nil && runErr != nil { + err = fmt.Errorf("%v; %v", err, runErr) + } else if err == nil { + err = runErr + } gha.EndGroup() + // STEP 4: Update LICENSE file if any files were modified (either updated or added headers) + // In plan mode: if addlicense found missing headers (returns error), assume files would be modified + // In normal mode: if addlicense succeeded, assume files were modified + if runErr != nil || (!plan && runErr == nil) { + anyFileUpdated = true + } + + updateLicenseFile(cmd, licensePath, anyFileUpdated, plan) + + // Check for errors after LICENSE file update so we still show what would happen cobra.CheckErr(err) }, } @@ -131,3 +177,125 @@ func init() { headersCmd.Flags().StringP("spdx", "s", "", "SPDX-compliant license identifier (e.g., 'MPL-2.0')") headersCmd.Flags().StringP("copyright-holder", "c", "", "Copyright holder (default \"IBM Corp.\")") } + +// updateExistingHeaders walks through files and updates copyright headers based on config and git history +// Returns the count of updated files, a boolean indicating if any file was updated, and the LICENSE file path (if found) +func updateExistingHeaders(cmd *cobra.Command, ignoredPatterns []string, dryRun bool) (int, bool, string) { + targetHolder := conf.Project.CopyrightHolder + if targetHolder == "" { + targetHolder = "IBM Corp." + } + + configYear := conf.Project.CopyrightYear + updatedCount := 0 + anyFileUpdated := false + var licensePath string + + // Producer/consumer: walk files (producer) and process them with a bounded + // worker pool (consumers). This preserves existing semantics while + // bounding concurrency and allowing the walk to run ahead of processors. + ch := make(chan string, 1000) + + var wg sync.WaitGroup + var updatedCount64 int64 + var anyFileUpdatedFlag int32 + var mu sync.Mutex + + workers := runtime.NumCPU() * 4 + if workers < 2 { + workers = 2 + } + + // Start worker pool + wg.Add(workers) + for i := 0; i < workers; i++ { + go func() { + defer wg.Done() + for path := range ch { + // capture base and skip LICENSE files here as well + base := filepath.Base(path) + if strings.EqualFold(base, "LICENSE") || strings.EqualFold(base, "LICENSE.TXT") || strings.EqualFold(base, "LICENSE.MD") { + mu.Lock() + if licensePath == "" { + licensePath = path + } + mu.Unlock() + continue + } + + if !dryRun { + updated, err := licensecheck.UpdateCopyrightHeader(path, targetHolder, configYear, false) + if err == nil && updated { + cmd.Printf(" %s\n", path) + atomic.AddInt64(&updatedCount64, 1) + atomic.StoreInt32(&anyFileUpdatedFlag, 1) + } + } else { + needsUpdate, err := licensecheck.NeedsUpdate(path, targetHolder, configYear, false) + if err == nil && needsUpdate { + cmd.Printf(" %s\n", path) + atomic.AddInt64(&updatedCount64, 1) + atomic.StoreInt32(&anyFileUpdatedFlag, 1) + } + } + } + }() + } + + // Producer: walk the tree and push files onto the channel + go func() { + _ = filepath.Walk(".", func(path string, info os.FileInfo, err error) error { + if err != nil || info.IsDir() { + return nil + } + + // Check if file should be ignored + if addlicense.FileMatches(path, ignoredPatterns) { + return nil + } + + // Non-ignored file -> enqueue for processing. If channel is full, + // this will block until a worker consumes entries, which is fine. + ch <- path + return nil + }) + close(ch) + }() + + // wait for workers to finish + wg.Wait() + + // finalize counts + updatedCount = int(atomic.LoadInt64(&updatedCount64)) + anyFileUpdated = atomic.LoadInt32(&anyFileUpdatedFlag) != 0 + + return updatedCount, anyFileUpdated, licensePath +} + +// updateLicenseFile updates the LICENSE file with current year if any files were modified +func updateLicenseFile(cmd *cobra.Command, licensePath string, anyFileUpdated bool, dryRun bool) { + // If no LICENSE file was found during the walk, nothing to do + if licensePath == "" { + return + } + + targetHolder := conf.Project.CopyrightHolder + if targetHolder == "" { + targetHolder = "IBM Corp." + } + + configYear := conf.Project.CopyrightYear + + // Update LICENSE file, forcing current year if any file was updated + if !dryRun { + updated, err := licensecheck.UpdateCopyrightHeader(licensePath, targetHolder, configYear, anyFileUpdated) + if err == nil && updated { + cmd.Printf("\nUpdated LICENSE file: %s\n", licensePath) + } + } else { + needsUpdate, err := licensecheck.NeedsUpdate(licensePath, targetHolder, configYear, anyFileUpdated) + if err == nil && needsUpdate { + cmd.Printf("\n[DRY RUN] Would update LICENSE file: %s\n", licensePath) + } + } +} diff --git a/cmd/license.go b/cmd/license.go index ebaad57..5e3f529 100644 --- a/cmd/license.go +++ b/cmd/license.go @@ -7,6 +7,8 @@ import ( "errors" "fmt" "path/filepath" + "strconv" + "time" "github.com/hashicorp/copywrite/github" "github.com/hashicorp/copywrite/licensecheck" @@ -63,10 +65,14 @@ var licenseCmd = &cobra.Command{ Run: func(cmd *cobra.Command, args []string) { cmd.Printf("Licensing under the following terms: %s\n", conf.Project.License) - cmd.Printf("Using copyright years: %v\n", conf.FormatCopyrightYears()) + + // Determine appropriate copyright years for LICENSE file + licenseYears := determineLicenseCopyrightYears(dirPath) + + cmd.Printf("Using copyright years: %v\n", licenseYears) cmd.Printf("Using copyright holder: %v\n\n", conf.Project.CopyrightHolder) - copyright := "Copyright " + conf.FormatCopyrightYears() + " " + conf.Project.CopyrightHolder + copyright := "Copyright " + conf.Project.CopyrightHolder + " " + licenseYears licenseFiles, err := licensecheck.FindLicenseFiles(dirPath) if err != nil { @@ -175,3 +181,34 @@ func init() { licenseCmd.Flags().StringP("spdx", "s", "", "SPDX License Identifier indicating what the LICENSE file should represent") licenseCmd.Flags().StringP("copyright-holder", "c", "", "Copyright holder (default \"IBM Corp.\")") } + +// determineLicenseCopyrightYears determines the appropriate copyright year range for LICENSE file +// Uses git history to get the start year (first commit) and end year (last commit) +func determineLicenseCopyrightYears(dirPath string) string { + currentYear := time.Now().Year() + startYear := conf.Project.CopyrightYear + + // If no start year configured, try to auto-detect from git + if startYear == 0 { + if detectedYear, err := licensecheck.GetRepoFirstCommitYear(dirPath); err == nil && detectedYear > 0 { + startYear = detectedYear + } else { + // Fallback to current year + return strconv.Itoa(currentYear) + } + } + + // Determine end year from repository's last commit year + endYear := currentYear // Default fallback + if lastRepoCommitYear, err := licensecheck.GetRepoLastCommitYear(dirPath); err == nil && lastRepoCommitYear > 0 && lastRepoCommitYear <= currentYear { + endYear = lastRepoCommitYear + } + + // If start year equals end year, return single year + if startYear == endYear { + return strconv.Itoa(endYear) + } + + // Return year range: "startYear, endYear" + return fmt.Sprintf("%d, %d", startYear, endYear) +} diff --git a/licensecheck/update.go b/licensecheck/update.go new file mode 100644 index 0000000..591cbb4 --- /dev/null +++ b/licensecheck/update.go @@ -0,0 +1,680 @@ +// Copyright IBM Corp. 2023, 2026 +// SPDX-License-Identifier: MPL-2.0 + +package licensecheck + +import ( + "bufio" + "fmt" + "os" + "os/exec" + "path/filepath" + "regexp" + "strconv" + "strings" + "time" +) + +// CopyrightInfo holds parsed copyright information from a file +type CopyrightInfo struct { + LineNumber int + OriginalLine string + Holder string + StartYear int + EndYear int + Prefix string // Comment prefix (e.g., "// ", "# ") + TrailingText string // Any text after the years + PrefixIndex int // Byte index in the line where the comment prefix starts (for inline comments) +} + +// extractAllCopyrightInfo extracts all copyright information from a file +func extractAllCopyrightInfo(filePath string) ([]*CopyrightInfo, error) { + file, err := os.Open(filePath) + if err != nil { + return nil, err + } + defer func() { _ = file.Close() }() + + scanner := bufio.NewScanner(file) + lineNum := 0 + var copyrights []*CopyrightInfo + + // Scan entire file for all copyright statements + for scanner.Scan() { + lineNum++ + line := scanner.Text() + + // Check if line contains "copyright" + if strings.Contains(strings.ToLower(line), "copyright") { + info := parseCopyrightLine(line, lineNum, filePath) + if info != nil { + copyrights = append(copyrights, info) + } + } + } + + return copyrights, scanner.Err() +} + +// parseCopyrightLine extracts copyright details from a line +func parseCopyrightLine(line string, lineNum int, filePath string) *CopyrightInfo { + // 1. Determine the prefix and content source + bestIdx := -1 + bestPrefix := "" + for _, p := range commentPrefixes { + if idx := strings.Index(line, p); idx >= 0 { + if bestIdx == -1 || idx < bestIdx { + bestIdx = idx + bestPrefix = p + } + } + } + + var prefix string + var prefixStart int + var content string + + if bestIdx != -1 { + // Case A: Comment prefix found + // Include contiguous spaces/tabs immediately before the prefix as part of the prefix index + prefixStart = bestIdx + for i := bestIdx - 1; i >= 0; i-- { + if line[i] == ' ' || line[i] == '\t' { + prefixStart = i + } else { + break + } + } + + // Calculate bounds + prefixEnd := bestIdx + len(bestPrefix) + prefix = line[prefixStart:prefixEnd] + + if prefixEnd < len(line) { + content = line[prefixEnd:] + } + } else { + // Case B: No prefix found (check if it is a LICENSE file) + base := strings.ToLower(filepath.Base(filePath)) + if strings.HasPrefix(base, "license") { + // Treat entire line as content with empty prefix + prefix = "" + prefixStart = 0 + content = line + } else { + // No prefix and not a license file -> Ignore + return nil + } + } + + // Validate content starts with "Copyright" + // Normalize content for the check + content = strings.TrimSpace(content) + if !regexp.MustCompile(`(?i)^copyright\b`).MatchString(content) { + return nil + } + + // Initialize the Info struct + info := &CopyrightInfo{ + LineNumber: lineNum, + OriginalLine: line, + Prefix: prefix, + PrefixIndex: prefixStart, + } + + // Parse the Copyright String (Unified Logic) + // Remove "Copyright" and optional (c) from the beginning + re := regexp.MustCompile(`(?i)^copyright\s*(?:\(c\))?\s*`) + afterCopyright := re.ReplaceAllString(content, "") + afterCopyright = strings.TrimSpace(afterCopyright) + + // Strategy: Find all 4-digit years in the line + yearPattern := regexp.MustCompile(`\b(\d{4})\b`) + yearMatches := yearPattern.FindAllStringIndex(afterCopyright, -1) + + // If no years found, the whole string is the holder + if len(yearMatches) == 0 { + info.Holder = strings.TrimSpace(afterCopyright) + return info + } + + // Find the last occurrence of years (which should be the copyright years) + lastYearIdx := yearMatches[len(yearMatches)-1] + + // Extract years - check if there's a year before the last one (start year) + if len(yearMatches) >= 2 { + prevYearIdx := yearMatches[len(yearMatches)-2] + between := afterCopyright[prevYearIdx[1]:lastYearIdx[0]] + + // If only separators (-, ) between them, treat as start and end year + if strings.TrimSpace(strings.Trim(between, "-, ")) == "" { + startYearStr := afterCopyright[prevYearIdx[0]:prevYearIdx[1]] + if year, err := strconv.Atoi(startYearStr); err == nil { + info.StartYear = year + } + } + } + + endYearStr := afterCopyright[lastYearIdx[0]:lastYearIdx[1]] + if year, err := strconv.Atoi(endYearStr); err == nil { + info.EndYear = year + if info.StartYear == 0 { + info.StartYear = year + } + } + + // Determine where the Holder name ends + // Usually, everything before the first recognized year (or year pair) is the holder + holderEndIdx := yearMatches[0][0] + if len(yearMatches) >= 2 && info.StartYear != 0 { + holderEndIdx = yearMatches[len(yearMatches)-2][0] + } + + holder := strings.TrimSpace(afterCopyright[:holderEndIdx]) + info.Holder = holder + + // Everything after the last year is trailing text + if lastYearIdx[1] < len(afterCopyright) { + trailing := afterCopyright[lastYearIdx[1]:] + if trailing != "" { + info.TrailingText = trailing + } + } + + return info +} + +// extractCommentPrefix extracts comment markers from the beginning of a line +func extractCommentPrefix(line string) string { + trimmed := strings.TrimLeft(line, " \t") + leadingSpace := line[:len(line)-len(trimmed)] + for _, prefix := range commentPrefixes { + if strings.HasPrefix(trimmed, prefix) { + return leadingSpace + prefix + } + } + + return leadingSpace +} + +// Generated file detection patterns (from addlicense) +var ( + // go generate: ^// Code generated .* DO NOT EDIT\.$ + goGenerated = regexp.MustCompile(`(?m)^.{1,2} Code generated .* DO NOT EDIT\.$`) + // cargo raze: ^DO NOT EDIT! Replaced on runs of cargo-raze$ + cargoRazeGenerated = regexp.MustCompile(`(?m)^DO NOT EDIT! Replaced on runs of cargo-raze$`) + // terraform init: ^# This file is maintained automatically by "terraform init"\.$ + terraformGenerated = regexp.MustCompile(`(?m)^# This file is maintained automatically by "terraform init"\.$`) +) + +// isGenerated returns true if the file content contains a string that implies +// the file was auto-generated and should not be modified. +// This prevents updating copyright headers in generated files. +func isGenerated(content []byte) bool { + // Scan entire file for generated markers + return goGenerated.Match(content) || + cargoRazeGenerated.Match(content) || + terraformGenerated.Match(content) +} + +// Special line prefixes that should be preserved at the start of files (from addlicense) +var specialLineHeads = []string{ + "#!", // shell script shebang + " 0 { + yearStr = strings.TrimSpace(lines[0]) + } + } + + year, err := strconv.Atoi(yearStr) + if err != nil { + return 0, err + } + + return year, nil +} + +// calculateYearUpdates determines if a copyright needs updating and calculates new years +// Returns: (shouldUpdate bool, newStartYear int, newEndYear int) +func calculateYearUpdates( + filePath string, + info *CopyrightInfo, + canonicalStartYear int, + lastCommitYear int, + currentYear int, + forceCurrentYear bool, +) (bool, int, int) { + shouldUpdate := false + newStartYear := info.StartYear + newEndYear := info.EndYear + + // Condition 1: Update start year if canonical year differs from file's start year + if canonicalStartYear > 0 && info.StartYear != canonicalStartYear { + newStartYear = canonicalStartYear + shouldUpdate = true + } + + // Condition 2: Only update end year if file was modified after the copyright end year, or forceCurrentYear is true + // But only if there are non-copyright changes in the file + if lastCommitYear > info.EndYear { + // Check for non-copyright changes between HEAD and HEAD~1 + currentContent, err1 := getFileContentExcludingCopyright(filePath) + prevCommittedContent, err2 := getPreviousCommittedFileContent(filePath) + if err1 == nil && err2 == nil && currentContent != prevCommittedContent { + if info.EndYear < currentYear { + newEndYear = currentYear + shouldUpdate = true + } + } + } + + // Condition 3: Force current year if requested (e.g., for LICENSE when other files updated) + if forceCurrentYear && info.EndYear < currentYear { + newEndYear = currentYear + shouldUpdate = true + } + + return shouldUpdate, newStartYear, newEndYear +} + +// getRepoRoot finds the git repository root from a given directory +func getRepoRoot(workingDir string) (string, error) { + repoRootOutput, err := executeGitCommand( + workingDir, + "rev-parse", "--show-toplevel", + ) + if err != nil { + return "", fmt.Errorf("failed to find git repo root: %w", err) + } + return strings.TrimSpace(string(repoRootOutput)), nil +} + +// getFileLastCommitYear returns the year of the last commit that modified a file +func getFileLastCommitYear(filePath string) (int, error) { + absPath, err := filepath.Abs(filePath) + if err != nil { + return 0, err + } + + // Find repository root + repoRoot, err := getRepoRoot(filepath.Dir(absPath)) + if err != nil { + return 0, err + } + + // Calculate relative path from repo root to file + relPath, err := filepath.Rel(repoRoot, absPath) + if err != nil { + return 0, fmt.Errorf("failed to calculate relative path: %w", err) + } + + // Run git log from repo root with relative path + output, err := executeGitCommand( + repoRoot, + "log", "-1", "--format=%ad", "--date=format:%Y", "--", relPath, + ) + if err != nil { + return 0, err + } + + return parseYearFromGitOutput(output, false) +} + +// GetRepoFirstCommitYear returns the year of the first commit in the repository +func GetRepoFirstCommitYear(workingDir string) (int, error) { + // Find repository root for consistency + repoRoot, err := getRepoRoot(workingDir) + if err != nil { + return 0, err + } + + output, err := executeGitCommand(repoRoot, "log", "--reverse", "--format=%ad", "--date=format:%Y") + if err != nil { + return 0, err + } + + return parseYearFromGitOutput(output, true) +} + +// GetRepoLastCommitYear returns the year of the last commit in the repository +func GetRepoLastCommitYear(workingDir string) (int, error) { + // Find repository root for consistency + repoRoot, err := getRepoRoot(workingDir) + if err != nil { + return 0, err + } + + output, err := executeGitCommand(repoRoot, "log", "-1", "--format=%ad", "--date=format:%Y") + if err != nil { + return 0, err + } + + return parseYearFromGitOutput(output, false) +} + +// evaluateCopyrightUpdates evaluates all copyrights in a file and returns which ones need updating +// This is shared logic between UpdateCopyrightHeader and NeedsUpdate +func evaluateCopyrightUpdates( + filePath string, + copyrights []*CopyrightInfo, + targetHolder string, + configYear int, + lastCommitYear int, + currentYear int, + forceCurrentYear bool, + repoFirstYear int, +) []*struct { + info *CopyrightInfo + newStartYear int + newEndYear int +} { + // If configYear is 0, use repo's first commit year + canonicalStartYear := configYear + if canonicalStartYear == 0 && repoFirstYear > 0 { + canonicalStartYear = repoFirstYear + } + + var updates []*struct { + info *CopyrightInfo + newStartYear int + newEndYear int + } + + // Process each copyright statement + for _, info := range copyrights { + // Check if holder matches target (case-insensitive partial match) + if !strings.Contains(strings.ToLower(info.Holder), strings.ToLower(targetHolder)) { + continue + } + + shouldUpdate, newStartYear, newEndYear := calculateYearUpdates( + filePath, info, canonicalStartYear, lastCommitYear, currentYear, forceCurrentYear, + ) + + if shouldUpdate { + updates = append(updates, &struct { + info *CopyrightInfo + newStartYear int + newEndYear int + }{ + info: info, + newStartYear: newStartYear, + newEndYear: newEndYear, + }) + } + } + + return updates +} + +// UpdateCopyrightHeader updates all copyright headers in a file if needed +// If forceCurrentYear is true, forces end year to current year regardless of git history +// Returns true if the file was modified +func UpdateCopyrightHeader(filePath string, targetHolder string, configYear int, forceCurrentYear bool) (bool, error) { + // Skip .copywrite.hcl config file + if filepath.Base(filePath) == ".copywrite.hcl" { + return false, nil + } + + // Read file content once for all checks + content, err := os.ReadFile(filePath) + if err != nil { + return false, err + } + + // Skip generated files (DO NOT EDIT markers, etc.) + if isGenerated(content) { + return false, nil + } + + // Extract all copyright statements in the file + copyrights, err := extractAllCopyrightInfo(filePath) + if err != nil { + return false, err + } + + if len(copyrights) == 0 { + // No copyright headers found + return false, nil + } + + currentYear := time.Now().Year() + lastCommitYear, _ := getFileLastCommitYear(filePath) + repoFirstYear, _ := GetRepoFirstCommitYear(filepath.Dir(filePath)) + + // Evaluate which copyrights need updating + updates := evaluateCopyrightUpdates( + filePath, copyrights, targetHolder, configYear, lastCommitYear, currentYear, forceCurrentYear, repoFirstYear, + ) + + if len(updates) == 0 { + return false, nil + } + + // Apply updates + lines := strings.Split(string(content), "\n") + for _, update := range updates { + info := update.info + if info.LineNumber < 1 || info.LineNumber > len(lines) { + continue + } + + // Reconstruct the copyright fragment preserving format and trailing text. + var yearStr string + if update.newStartYear == update.newEndYear { + yearStr = fmt.Sprintf("%d", update.newEndYear) + } else { + yearStr = fmt.Sprintf("%d, %d", update.newStartYear, update.newEndYear) + } + + // Build the new copyright text (prefix included) + newCopyright := fmt.Sprintf("%sCopyright %s %s", info.Prefix, info.Holder, yearStr) + if info.TrailingText != "" { + newCopyright += info.TrailingText + } + + // If PrefixIndex is set, replace only the comment suffix starting at PrefixIndex, + // preserving any code before the comment (inline comment case). + idx := info.LineNumber - 1 + origLine := lines[idx] + if info.PrefixIndex > 0 && info.PrefixIndex < len(origLine) { + lines[idx] = origLine[:info.PrefixIndex] + newCopyright + } else { + // PrefixIndex == 0 or out-of-range: replace the whole line + lines[idx] = newCopyright + } + } + + // Write back + newContent := strings.Join(lines, "\n") + err = os.WriteFile(filePath, []byte(newContent), 0644) + if err != nil { + return false, err + } + + return true, nil +} + +// NeedsUpdate checks if a file would be updated without actually modifying it +// If forceCurrentYear is true, forces end year to current year regardless of git history +// Returns true if the file has copyrights matching targetHolder that need year updates +func NeedsUpdate(filePath string, targetHolder string, configYear int, forceCurrentYear bool) (bool, error) { + // Skip .copywrite.hcl config file + if filepath.Base(filePath) == ".copywrite.hcl" { + return false, nil + } + + // Read file content for generated file check + content, err := os.ReadFile(filePath) + if err != nil { + return false, err + } + + // Skip generated files (DO NOT EDIT markers, etc.) + if isGenerated(content) { + return false, nil + } + + // Extract all copyright statements in the file + copyrights, err := extractAllCopyrightInfo(filePath) + if err != nil { + return false, err + } + + if len(copyrights) == 0 { + return false, nil + } + + currentYear := time.Now().Year() + lastCommitYear, _ := getFileLastCommitYear(filePath) + repoFirstYear, _ := GetRepoFirstCommitYear(filepath.Dir(filePath)) + + // Evaluate which copyrights need updating + updates := evaluateCopyrightUpdates( + filePath, copyrights, targetHolder, configYear, lastCommitYear, currentYear, forceCurrentYear, repoFirstYear, + ) + + return len(updates) > 0, nil +} + +// getFileContentExcludingCopyright returns the file content with copyright lines removed +func getFileContentExcludingCopyright(filePath string) (string, error) { + content, err := os.ReadFile(filePath) + if err != nil { + return "", err + } + lines := strings.Split(string(content), "\n") + // Get all copyright info (line numbers) + copyrights, err := extractAllCopyrightInfo(filePath) + if err != nil { + return "", err + } + copyrightLineNums := make(map[int]struct{}) + for _, info := range copyrights { + copyrightLineNums[info.LineNumber] = struct{}{} + } + var filtered []string + for i, line := range lines { + // Line numbers are 1-based in CopyrightInfo + if _, isCopyright := copyrightLineNums[i+1]; !isCopyright { + filtered = append(filtered, line) + } + } + return strings.Join(filtered, "\n"), nil +} + +// getPreviousCommittedFileContent returns the previous committed version (HEAD~1) of the file (excluding copyright lines) +func getPreviousCommittedFileContent(filePath string) (string, error) { + absPath, err := filepath.Abs(filePath) + if err != nil { + return "", err + } + repoRoot, err := getRepoRoot(filepath.Dir(absPath)) + if err != nil { + return "", err + } + relPath, err := filepath.Rel(repoRoot, absPath) + if err != nil { + return "", err + } + output, err := executeGitCommand(repoRoot, "show", "HEAD~1:"+relPath) + if err != nil { + return "", err + } + lines := strings.Split(string(output), "\n") + // Use parseCopyrightLine to check each line + var filtered []string + for i, line := range lines { + // parseCopyrightLine returns non-nil if line is a valid copyright + if parseCopyrightLine(line, i+1, filePath) == nil { + filtered = append(filtered, line) + } + } + return strings.Join(filtered, "\n"), nil +} diff --git a/licensecheck/update_test.go b/licensecheck/update_test.go new file mode 100644 index 0000000..fa6dc83 --- /dev/null +++ b/licensecheck/update_test.go @@ -0,0 +1,844 @@ +// Copyright IBM Corp. 2023, 2026 +// SPDX-License-Identifier: MPL-2.0 + +package licensecheck + +import ( + "os" + "path/filepath" + "strconv" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParseCopyrightLine(t *testing.T) { + tests := []struct { + name string + line string + lineNum int + expectedInfo *CopyrightInfo + expectNil bool + }{ + { + name: "Simple copyright with single year", + line: "// Copyright IBM Corp. 2023", + lineNum: 1, + expectedInfo: &CopyrightInfo{ + LineNumber: 1, + OriginalLine: "// Copyright IBM Corp. 2023", + Holder: "IBM Corp.", + StartYear: 2023, + EndYear: 2023, + Prefix: "// ", + TrailingText: "", + }, + }, + { + name: "Copyright with year range", + line: "// Copyright IBM Corp. 2022, 2025", + lineNum: 1, + expectedInfo: &CopyrightInfo{ + LineNumber: 1, + OriginalLine: "// Copyright IBM Corp. 2022, 2025", + Holder: "IBM Corp.", + StartYear: 2022, + EndYear: 2025, + Prefix: "// ", + TrailingText: "", + }, + }, + { + name: "Copyright with (c) symbol", + line: "# Copyright (c) HashiCorp, Inc. 2020", + lineNum: 2, + expectedInfo: &CopyrightInfo{ + LineNumber: 2, + OriginalLine: "# Copyright (c) HashiCorp, Inc. 2020", + Holder: "HashiCorp, Inc.", + StartYear: 2020, + EndYear: 2020, + Prefix: "# ", + TrailingText: "", + }, + }, + { + name: "Copyright with trailing text", + line: "/* Copyright IBM Corp. 2023 - All rights reserved */", + lineNum: 1, + expectedInfo: &CopyrightInfo{ + LineNumber: 1, + OriginalLine: "/* Copyright IBM Corp. 2023 - All rights reserved */", + Holder: "IBM Corp.", + StartYear: 2023, + EndYear: 2023, + Prefix: "/* ", + TrailingText: " - All rights reserved */", + }, + }, + { + name: "Line without copyright", + line: "// This is just a comment", + lineNum: 1, + expectNil: true, + }, + { + name: "Copyright without year (holder only)", + line: "// Copyright IBM Corp.", + lineNum: 1, + expectedInfo: &CopyrightInfo{ + LineNumber: 1, + OriginalLine: "// Copyright IBM Corp.", + Holder: "IBM Corp.", + StartYear: 0, + EndYear: 0, + Prefix: "// ", + TrailingText: "", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := parseCopyrightLine(tt.line, tt.lineNum, "file.go") + + if tt.expectNil { + assert.Nil(t, result) + return + } + + require.NotNil(t, result) + assert.Equal(t, tt.expectedInfo.LineNumber, result.LineNumber) + assert.Equal(t, tt.expectedInfo.OriginalLine, result.OriginalLine) + assert.Equal(t, tt.expectedInfo.Holder, result.Holder) + assert.Equal(t, tt.expectedInfo.StartYear, result.StartYear) + assert.Equal(t, tt.expectedInfo.EndYear, result.EndYear) + assert.Equal(t, tt.expectedInfo.Prefix, result.Prefix) + assert.Equal(t, tt.expectedInfo.TrailingText, result.TrailingText) + }) + } +} + +func TestExtractCommentPrefix(t *testing.T) { + tests := []struct { + name string + line string + expectedPrefix string + }{ + { + name: "Double slash comment", + line: "// Copyright IBM Corp.", + expectedPrefix: "// ", + }, + { + name: "Double slash without space", + line: "//Copyright IBM Corp.", + expectedPrefix: "//", + }, + { + name: "Hash comment", + line: "# Copyright IBM Corp.", + expectedPrefix: "# ", + }, + { + name: "Star comment", + line: "* Copyright IBM Corp.", + expectedPrefix: "* ", + }, + { + name: "Block comment start", + line: "/* Copyright IBM Corp.", + expectedPrefix: "/* ", + }, + { + name: "Indented comment", + line: " // Copyright IBM Corp.", + expectedPrefix: " // ", + }, + { + name: "Tab indented comment", + line: "\t# Copyright IBM Corp.", + expectedPrefix: "\t# ", + }, + { + name: "No comment prefix", + line: "Copyright IBM Corp.", + expectedPrefix: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := extractCommentPrefix(tt.line) + assert.Equal(t, tt.expectedPrefix, result) + }) + } +} + +func TestParseYearFromGitOutput(t *testing.T) { + tests := []struct { + name string + output []byte + useFirstLine bool + expectedYear int + expectError bool + }{ + { + name: "Single year - first line", + output: []byte("2023\n"), + useFirstLine: true, + expectedYear: 2023, + }, + { + name: "Multiple years - first line", + output: []byte("2020\n2021\n2022\n2023\n"), + useFirstLine: true, + expectedYear: 2020, + }, + { + name: "Empty output", + output: []byte(""), + useFirstLine: true, + expectError: true, + }, + { + name: "Invalid year", + output: []byte("invalid\n"), + useFirstLine: true, + expectError: true, + }, + { + name: "Whitespace handling", + output: []byte(" 2022 \n"), + useFirstLine: true, + expectedYear: 2022, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + year, err := parseYearFromGitOutput(tt.output, tt.useFirstLine) + + if tt.expectError { + assert.Error(t, err) + return + } + + require.NoError(t, err) + assert.Equal(t, tt.expectedYear, year) + }) + } +} + +func TestExtractAllCopyrightInfo(t *testing.T) { + tempDir := t.TempDir() + testFile := filepath.Join(tempDir, "test.go") + + fileContent := `// Copyright IBM Corp. 2020, 2023 + +package main + +// Some other comment +// Copyright HashiCorp, Inc. 2019 + +func main() { + // Not a copyright +} +` + + err := os.WriteFile(testFile, []byte(fileContent), 0644) + require.NoError(t, err) + + copyrights, err := extractAllCopyrightInfo(testFile) + require.NoError(t, err) + require.GreaterOrEqual(t, len(copyrights), 2, "Should find at least 2 copyright statements") + + // First copyright + assert.Equal(t, 1, copyrights[0].LineNumber) + assert.Equal(t, "IBM Corp.", copyrights[0].Holder) + assert.Equal(t, 2020, copyrights[0].StartYear) + assert.Equal(t, 2023, copyrights[0].EndYear) + + // Find the HashiCorp copyright (should be second or later) + var hashicorpFound bool + for _, c := range copyrights { + if c.Holder == "HashiCorp, Inc." { + assert.Equal(t, 2019, c.StartYear) + assert.Equal(t, 2019, c.EndYear) + hashicorpFound = true + break + } + } + assert.True(t, hashicorpFound, "Should find HashiCorp copyright") +} + +func TestExtractCopyrightInfo(t *testing.T) { + tempDir := t.TempDir() + testFile := filepath.Join(tempDir, "test.go") + + fileContent := `// Copyright IBM Corp. 2020, 2023 +// SPDX-License-Identifier: MPL-2.0 + +package main +` + + err := os.WriteFile(testFile, []byte(fileContent), 0644) + require.NoError(t, err) + + copyrights, err := extractAllCopyrightInfo(testFile) + require.NoError(t, err) + require.NotEmpty(t, copyrights) + + info := copyrights[0] + assert.Equal(t, 1, info.LineNumber) + assert.Equal(t, "IBM Corp.", info.Holder) + assert.Equal(t, 2020, info.StartYear) + assert.Equal(t, 2023, info.EndYear) +} + +func TestExtractCopyrightInfo_NoCopyright(t *testing.T) { + tempDir := t.TempDir() + testFile := filepath.Join(tempDir, "test.go") + + fileContent := `// Just a regular comment +package main +` + + err := os.WriteFile(testFile, []byte(fileContent), 0644) + require.NoError(t, err) + + copyrights, err := extractAllCopyrightInfo(testFile) + require.NoError(t, err) + assert.Empty(t, copyrights) +} + +func TestUpdateCopyrightHeader(t *testing.T) { + currentYear := time.Now().Year() + + tests := []struct { + name string + initialContent string + targetHolder string + configYear int + forceCurrentYear bool + expectModified bool + expectedContent string + }{ + { + name: "Update end year when outdated", + initialContent: `// Copyright IBM Corp. 2022, 2023 +package main +`, + targetHolder: "IBM Corp.", + configYear: 2022, + forceCurrentYear: true, + expectModified: true, + expectedContent: `// Copyright IBM Corp. 2022, ` + string(rune(currentYear/1000+48)) + string(rune((currentYear/100)%10+48)) + string(rune((currentYear/10)%10+48)) + string(rune(currentYear%10+48)) + ` +package main +`, + }, + { + name: "Update start year when different from config", + initialContent: `// Copyright IBM Corp. 2023 +package main +`, + targetHolder: "IBM Corp.", + configYear: 2020, + expectModified: true, + // Since we don't have git history in this test and forceCurrentYear is false, + // the end year should NOT update, only the start year. + expectedContent: `// Copyright IBM Corp. 2020, 2023 +package main +`, + }, + { + name: "No update needed", + initialContent: `// Copyright IBM Corp. ` + string(rune(currentYear/1000+48)) + string(rune((currentYear/100)%10+48)) + string(rune((currentYear/10)%10+48)) + string(rune(currentYear%10+48)) + ` +package main +`, + targetHolder: "IBM Corp.", + configYear: currentYear, + expectModified: false, + }, + { + name: "Wrong holder - no update", + initialContent: `// Copyright HashiCorp, Inc. 2020 +package main +`, + targetHolder: "IBM Corp.", + configYear: 2022, + expectModified: false, + }, + { + name: "No copyright - no update", + initialContent: `package main +`, + targetHolder: "IBM Corp.", + configYear: 2022, + expectModified: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tempDir := t.TempDir() + testFile := filepath.Join(tempDir, "test.go") + + err := os.WriteFile(testFile, []byte(tt.initialContent), 0644) + require.NoError(t, err) + + modified, err := UpdateCopyrightHeader(testFile, tt.targetHolder, tt.configYear, tt.forceCurrentYear) + require.NoError(t, err) + assert.Equal(t, tt.expectModified, modified) + + if tt.expectModified && tt.expectedContent != "" { + content, err := os.ReadFile(testFile) + require.NoError(t, err) + assert.Equal(t, tt.expectedContent, string(content)) + } + }) + } +} + +func TestParseCopyrightLine_UnprefixedLicense(t *testing.T) { + line := "Copyright IBM Corp. 2018, 2025" + info := parseCopyrightLine(line, 1, "LICENSE") + require.NotNil(t, info) + assert.Equal(t, "IBM Corp.", info.Holder) + assert.Equal(t, 2018, info.StartYear) + assert.Equal(t, 2025, info.EndYear) + assert.Equal(t, "", info.Prefix) +} + +func TestNeedsUpdate(t *testing.T) { + currentYear := time.Now().Year() + + tests := []struct { + name string + fileContent string + targetHolder string + configYear int + forceCurrentYear bool + expectNeedsUpdate bool + }{ + { + name: "Needs update - outdated end year", + fileContent: `// Copyright IBM Corp. 2022, 2023 +package main +`, + targetHolder: "IBM Corp.", + configYear: 2022, + forceCurrentYear: true, + expectNeedsUpdate: true, + }, + { + name: "Needs update - different start year", + fileContent: `// Copyright IBM Corp. 2023 +package main +`, + targetHolder: "IBM Corp.", + configYear: 2020, + expectNeedsUpdate: true, + }, + { + name: "No update needed - current", + fileContent: `// Copyright IBM Corp. ` + string(rune(currentYear/1000+48)) + string(rune((currentYear/100)%10+48)) + string(rune((currentYear/10)%10+48)) + string(rune(currentYear%10+48)) + ` +package main +`, + targetHolder: "IBM Corp.", + configYear: currentYear, + expectNeedsUpdate: false, + }, + { + name: "Wrong holder - no update", + fileContent: `// Copyright HashiCorp, Inc. 2020 +package main +`, + targetHolder: "IBM Corp.", + configYear: 2022, + expectNeedsUpdate: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tempDir := t.TempDir() + testFile := filepath.Join(tempDir, "test.go") + + err := os.WriteFile(testFile, []byte(tt.fileContent), 0644) + require.NoError(t, err) + + needsUpdate, err := NeedsUpdate(testFile, tt.targetHolder, tt.configYear, tt.forceCurrentYear) + require.NoError(t, err) + assert.Equal(t, tt.expectNeedsUpdate, needsUpdate) + }) + } +} + +func TestUpdateCopyrightHeader_SkipCopywriteConfig(t *testing.T) { + tempDir := t.TempDir() + testFile := filepath.Join(tempDir, ".copywrite.hcl") + + fileContent := `// Copyright IBM Corp. 2020 +schema_version = 1 +` + + err := os.WriteFile(testFile, []byte(fileContent), 0644) + require.NoError(t, err) + + modified, err := UpdateCopyrightHeader(testFile, "IBM Corp.", 2022, false) + require.NoError(t, err) + assert.False(t, modified, "Should skip .copywrite.hcl file") +} + +func TestNeedsUpdate_SkipCopywriteConfig(t *testing.T) { + tempDir := t.TempDir() + testFile := filepath.Join(tempDir, ".copywrite.hcl") + + fileContent := `// Copyright IBM Corp. 2020 +schema_version = 1 +` + + err := os.WriteFile(testFile, []byte(fileContent), 0644) + require.NoError(t, err) + + needsUpdate, err := NeedsUpdate(testFile, "IBM Corp.", 2022, false) + require.NoError(t, err) + assert.False(t, needsUpdate, "Should skip .copywrite.hcl file") +} + +func TestIsGenerated(t *testing.T) { + tests := []struct { + name string + content string + expectSkip bool + }{ + { + name: "Go generated file", + content: `// Code generated by protoc-gen-go. DO NOT EDIT. +package main + +func main() {} +`, + expectSkip: true, + }, + { + name: "Cargo raze generated file", + content: `DO NOT EDIT! Replaced on runs of cargo-raze + +[package] +name = "test" +`, + expectSkip: true, + }, + { + name: "Terraform init generated file", + content: `# This file is maintained automatically by "terraform init". + +provider "aws" {} +`, + expectSkip: true, + }, + { + name: "Regular file", + content: `// Copyright IBM Corp. 2023 +package main + +func main() {} +`, + expectSkip: false, + }, + { + name: "File with 'generated' in comment but not a marker", + content: `// This file was generated by hand +package main +`, + expectSkip: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := isGenerated([]byte(tt.content)) + assert.Equal(t, tt.expectSkip, result) + }) + } +} + +func TestHasSpecialFirstLine(t *testing.T) { + tests := []struct { + name string + content string + filePath string + expected bool + }{ + { + name: "Shell script with shebang", + content: "#!/bin/bash\necho 'hello'\n", + filePath: "test.sh", + expected: true, + }, + { + name: "XML with declaration", + content: "\n\n", + filePath: "test.xml", + expected: true, + }, + { + name: "PHP file", + content: "