From a9cf72415b05c5f659e9aacfa10c5b27de5ba648 Mon Sep 17 00:00:00 2001 From: Mohan Manikanta Date: Sat, 10 Jan 2026 01:24:53 +0530 Subject: [PATCH 01/15] Extend header functionality to update years. --- addlicense/tmpl.go | 4 +- cmd/headers.go | 167 ++++++++++++++++- cmd/license.go | 2 +- licensecheck/update.go | 407 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 575 insertions(+), 5 deletions(-) create mode 100644 licensecheck/update.go diff --git a/addlicense/tmpl.go b/addlicense/tmpl.go index d9e9693..bf60bb8 100644 --- a/addlicense/tmpl.go +++ b/addlicense/tmpl.go @@ -144,9 +144,9 @@ const tmplMPL = `This Source Code Form is subject to the terms of the Mozilla Pu License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.` -const tmplSPDX = `Copyright (c){{ if .Holder }} {{.Holder}}{{ end }}{{ if .Year }} {{.Year}}{{ end }} +const tmplSPDX = `Copyright{{ if .Holder }} {{.Holder}}{{ end }}{{ if .Year }} {{.Year}}{{ end }} {{ if .SPDXID }}SPDX-License-Identifier: {{.SPDXID}}{{ end }}` -const tmplCopyrightOnly = `Copyright (c){{ if .Holder }} {{.Holder}}{{ end }}{{ if .Year }} {{.Year}}{{ end }}` +const tmplCopyrightOnly = `Copyright{{ if .Holder }} {{.Holder}}{{ end }}{{ if .Year }} {{.Year}}{{ end }}` const spdxSuffix = "\n\nSPDX-License-Identifier: {{.SPDXID}}" diff --git a/cmd/headers.go b/cmd/headers.go index 6902699..252795a 100644 --- a/cmd/headers.go +++ b/cmd/headers.go @@ -6,8 +6,11 @@ package cmd import ( "fmt" "os" + "path/filepath" + "strings" "github.com/hashicorp/copywrite/addlicense" + "github.com/hashicorp/copywrite/licensecheck" "github.com/hashicorp/go-hclog" "github.com/jedib0t/go-pretty/v6/text" "github.com/samber/lo" @@ -87,10 +90,23 @@ config, see the "copywrite init" command.`, ".github/workflows/**", ".github/dependabot.yml", "**/node_modules/**", + ".copywrite.hcl", } ignoredPatterns := lo.Union(conf.Project.HeaderIgnore, autoSkippedPatterns) - // Construct the configuration addLicense needs to properly format headers + // STEP 1: Update existing copyright headers + gha.StartGroup("Updating existing copyright headers:") + updatedCount := updateExistingHeaders(cmd, ignoredPatterns, plan) + gha.EndGroup() + if updatedCount > 0 { + if plan { + cmd.Printf("\n%s\n\n", text.FgYellow.Sprintf("[DRY RUN] Would update %d file(s) with new copyright years", updatedCount)) + } else { + cmd.Printf("\n%s\n\n", text.FgGreen.Sprintf("Successfully updated %d file(s) with new copyright years", updatedCount)) + } + } + + // STEP 2: Construct the configuration addLicense needs to properly format headers licenseData := addlicense.LicenseData{ Year: conf.FormatCopyrightYears(), // Format year(s) for copyright statements Holder: conf.Project.CopyrightHolder, @@ -112,7 +128,8 @@ config, see the "copywrite init" command.`, // cobra.CheckErr on the return, which will indeed output to stderr and // return a non-zero error code. - gha.StartGroup("The following files are missing headers:") + // STEP 3: Add missing headers + gha.StartGroup("Adding missing copyright headers:") err := addlicense.Run(ignoredPatterns, "only", licenseData, "", verbose, plan, []string{"."}, stdcliLogger) gha.EndGroup() @@ -131,3 +148,149 @@ func init() { headersCmd.Flags().StringP("spdx", "s", "", "SPDX-compliant license identifier (e.g., 'MPL-2.0')") headersCmd.Flags().StringP("copyright-holder", "c", "", "Copyright holder (default \"IBM Corp.\")") } + +// updateExistingHeaders walks through files and updates copyright headers based on config and git history +func updateExistingHeaders(cmd *cobra.Command, ignoredPatterns []string, dryRun bool) int { + targetHolder := conf.Project.CopyrightHolder + if targetHolder == "" { + targetHolder = "IBM Corp." + } + + configYear := conf.Project.CopyrightYear + updatedCount := 0 + anyFileUpdated := false + var licensePath string + + // Walk through all files in current directory + _ = filepath.Walk(".", func(path string, info os.FileInfo, err error) error { + if err != nil || info.IsDir() { + return nil + } + + // Check if file should be ignored + if shouldIgnoreFile(path, ignoredPatterns) { + return nil + } + + // Track LICENSE file location but process it later + fileName := strings.ToUpper(filepath.Base(path)) + if fileName == "LICENSE" || fileName == "LICENSE.TXT" || fileName == "LICENSE.MD" { + licensePath = path + return nil + } + + // Try to update copyright in this file + if !dryRun { + updated, err := licensecheck.UpdateCopyrightHeader(path, targetHolder, configYear, false) + if err == nil && updated { + cmd.Printf(" %s\n", path) + updatedCount++ + anyFileUpdated = true + } + } else { + // In dry-run mode, check if update would happen + needsUpdate, err := licensecheck.NeedsUpdate(path, targetHolder, configYear, false) + if err == nil && needsUpdate { + cmd.Printf(" %s\n", path) + updatedCount++ + anyFileUpdated = true + } + } + + return nil + }) + + // Process LICENSE file at the end, forcing current year if any file was updated + if licensePath != "" { + if !dryRun { + updated, err := licensecheck.UpdateCopyrightHeader(licensePath, targetHolder, configYear, anyFileUpdated) + if err == nil && updated { + cmd.Printf(" %s\n", licensePath) + updatedCount++ + } + } else { + needsUpdate, err := licensecheck.NeedsUpdate(licensePath, targetHolder, configYear, anyFileUpdated) + if err == nil && needsUpdate { + cmd.Printf(" %s\n", licensePath) + updatedCount++ + } + } + } + + return updatedCount +} + +// shouldIgnoreFile checks if a file path matches any of the ignore patterns +func shouldIgnoreFile(path string, patterns []string) bool { + path = filepath.ToSlash(path) + + for _, pattern := range patterns { + pattern = filepath.ToSlash(pattern) + + // Handle ** for recursive matching + if strings.Contains(pattern, "**") { + parts := strings.Split(pattern, "**") + if len(parts) == 2 { + prefix := strings.TrimPrefix(parts[0], "/") + suffix := strings.TrimSuffix(strings.TrimPrefix(parts[1], "/"), "/") + + hasPrefix := prefix == "" || strings.HasPrefix(path, prefix) + hasSuffix := suffix == "" || strings.Contains(path, suffix) + + if hasPrefix && hasSuffix { + return true + } + } + continue + } + + // Handle * for wildcard matching + if strings.Contains(pattern, "*") { + matched := matchWildcard(path, pattern) + if matched { + return true + } + continue + } + + // Exact match + if path == pattern { + return true + } + } + + return false +} + +// matchWildcard performs simple wildcard matching +func matchWildcard(path, pattern string) bool { + parts := strings.Split(pattern, "*") + if len(parts) == 1 { + return path == pattern + } + + // Check prefix + if parts[0] != "" && !strings.HasPrefix(path, parts[0]) { + return false + } + + // Check suffix + if parts[len(parts)-1] != "" && !strings.HasSuffix(path, parts[len(parts)-1]) { + return false + } + + // Check middle parts + pos := len(parts[0]) + for i := 1; i < len(parts)-1; i++ { + if parts[i] == "" { + continue + } + idx := strings.Index(path[pos:], parts[i]) + if idx == -1 { + return false + } + pos += idx + len(parts[i]) + } + + return true +} diff --git a/cmd/license.go b/cmd/license.go index ebaad57..095e896 100644 --- a/cmd/license.go +++ b/cmd/license.go @@ -66,7 +66,7 @@ var licenseCmd = &cobra.Command{ cmd.Printf("Using copyright years: %v\n", conf.FormatCopyrightYears()) cmd.Printf("Using copyright holder: %v\n\n", conf.Project.CopyrightHolder) - copyright := "Copyright " + conf.FormatCopyrightYears() + " " + conf.Project.CopyrightHolder + copyright := "Copyright " + conf.Project.CopyrightHolder + " " + conf.FormatCopyrightYears() licenseFiles, err := licensecheck.FindLicenseFiles(dirPath) if err != nil { diff --git a/licensecheck/update.go b/licensecheck/update.go new file mode 100644 index 0000000..c490ff3 --- /dev/null +++ b/licensecheck/update.go @@ -0,0 +1,407 @@ +// Copyright IBM Corp. 2023, 2026 +// SPDX-License-Identifier: MPL-2.0 + +package licensecheck + +import ( + "bufio" + "fmt" + "os" + "os/exec" + "path/filepath" + "regexp" + "strconv" + "strings" + "time" +) + +// CopyrightInfo holds parsed copyright information from a file +type CopyrightInfo struct { + LineNumber int + OriginalLine string + Holder string + StartYear int + EndYear int + Prefix string // Comment prefix (e.g., "// ", "# ") + TrailingText string // Any text after the years +} + +// ExtractAllCopyrightInfo extracts all copyright information from a file +func ExtractAllCopyrightInfo(filePath string) ([]*CopyrightInfo, error) { + file, err := os.Open(filePath) + if err != nil { + return nil, err + } + defer file.Close() + + scanner := bufio.NewScanner(file) + lineNum := 0 + var copyrights []*CopyrightInfo + + // Scan entire file for all copyright statements + for scanner.Scan() { + lineNum++ + line := scanner.Text() + + // Check if line contains "copyright" + if strings.Contains(strings.ToLower(line), "copyright") { + info := parseCopyrightLine(line, lineNum) + if info != nil { + copyrights = append(copyrights, info) + } + } + } + + return copyrights, scanner.Err() +} + +// ExtractCopyrightInfo extracts the first copyright information from a file (for compatibility) +func ExtractCopyrightInfo(filePath string) (*CopyrightInfo, error) { + copyrights, err := ExtractAllCopyrightInfo(filePath) + if err != nil { + return nil, err + } + if len(copyrights) == 0 { + return nil, nil + } + return copyrights[0], nil +} + +// parseCopyrightLine extracts copyright details from a line +func parseCopyrightLine(line string, lineNum int) *CopyrightInfo { + // Extract comment prefix + prefix := extractCommentPrefix(line) + + // Get the content after the prefix + contentStart := len(prefix) + if contentStart >= len(line) { + return nil + } + content := line[contentStart:] + + // Must contain "copyright" + if !strings.Contains(strings.ToLower(content), "copyright") { + return nil + } + + info := &CopyrightInfo{ + LineNumber: lineNum, + OriginalLine: line, + Prefix: prefix, + } + + // Remove "Copyright" and optional (c) from the beginning + re := regexp.MustCompile(`(?i)^copyright\s*(?:\(c\))?\s*`) + afterCopyright := re.ReplaceAllString(content, "") + afterCopyright = strings.TrimSpace(afterCopyright) + + // Strategy: Find all 4-digit years in the line + yearPattern := regexp.MustCompile(`\b(\d{4})\b`) + yearMatches := yearPattern.FindAllStringIndex(afterCopyright, -1) + + if len(yearMatches) == 0 { + // No year found, everything is the holder + info.Holder = strings.TrimSpace(afterCopyright) + return info + } + + // Find the last occurrence of years (which should be the copyright years) + // Look for patterns like "YYYY" or "YYYY, YYYY" or "YYYY-YYYY" + lastYearIdx := yearMatches[len(yearMatches)-1] + + // Extract years - check if there's a year before the last one (start year) + if len(yearMatches) >= 2 { + // Check if the previous year is close to the last year (within 20 chars) + prevYearIdx := yearMatches[len(yearMatches)-2] + between := afterCopyright[prevYearIdx[1]:lastYearIdx[0]] + + // If only separators between them, treat as start and end year + if strings.TrimSpace(strings.Trim(between, "-, ")) == "" { + startYearStr := afterCopyright[prevYearIdx[0]:prevYearIdx[1]] + if year, err := strconv.Atoi(startYearStr); err == nil { + info.StartYear = year + } + } + } + + // Extract the last year (end year or only year) + endYearStr := afterCopyright[lastYearIdx[0]:lastYearIdx[1]] + if year, err := strconv.Atoi(endYearStr); err == nil { + info.EndYear = year + if info.StartYear == 0 { + info.StartYear = year + } + } + + // Everything before the first year (or before the pair of years) is the holder + holderEndIdx := yearMatches[0][0] + if len(yearMatches) >= 2 && info.StartYear != 0 { + holderEndIdx = yearMatches[len(yearMatches)-2][0] + } + + holder := strings.TrimSpace(afterCopyright[:holderEndIdx]) + info.Holder = holder + + // Everything after the last year is trailing text - preserve it exactly + if lastYearIdx[1] < len(afterCopyright) { + trailing := afterCopyright[lastYearIdx[1]:] + if trailing != "" { + info.TrailingText = trailing + } + } + + return info +} + +// extractCommentPrefix extracts comment markers from the beginning of a line +func extractCommentPrefix(line string) string { + trimmed := strings.TrimLeft(line, " \t") + leadingSpace := line[:len(line)-len(trimmed)] + + // Check for common comment prefixes + commentPrefixes := []string{"// ", "//", "# ", "#", "* ", "*", "/* "} + + for _, prefix := range commentPrefixes { + if strings.HasPrefix(trimmed, prefix) { + return leadingSpace + prefix + } + } + + return leadingSpace +} + +// GetFileLastCommitYear returns the year of the last commit that modified a file +func GetFileLastCommitYear(filePath string) (int, error) { + absPath, err := filepath.Abs(filePath) + if err != nil { + return 0, err + } + + dir := filepath.Dir(absPath) + fileName := filepath.Base(absPath) + + cmd := exec.Command("git", "log", "-1", "--format=%ad", "--date=format:%Y", "--", fileName) + cmd.Dir = dir + + output, err := cmd.Output() + if err != nil { + return 0, err + } + + yearStr := strings.TrimSpace(string(output)) + if yearStr == "" { + return 0, fmt.Errorf("no commits found") + } + + year, err := strconv.Atoi(yearStr) + if err != nil { + return 0, err + } + + return year, nil +} + +// GetRepoFirstCommitYear returns the year of the first commit in the repository +func GetRepoFirstCommitYear(workingDir string) (int, error) { + cmd := exec.Command("git", "log", "--reverse", "--format=%ad", "--date=format:%Y") + cmd.Dir = workingDir + + output, err := cmd.Output() + if err != nil { + return 0, err + } + + // Parse the first line (first commit year) + lines := strings.Split(strings.TrimSpace(string(output)), "\n") + if len(lines) == 0 || lines[0] == "" { + return 0, fmt.Errorf("no commits found") + } + + year, err := strconv.Atoi(strings.TrimSpace(lines[0])) + if err != nil { + return 0, err + } + + return year, nil +} + +// UpdateCopyrightHeader updates all copyright headers in a file if needed +// If forceCurrentYear is true, forces end year to current year regardless of git history +// Returns true if the file was modified +func UpdateCopyrightHeader(filePath string, targetHolder string, configYear int, forceCurrentYear bool) (bool, error) { + // Skip .copywrite.hcl config file + if filepath.Base(filePath) == ".copywrite.hcl" { + return false, nil + } + + // Extract all copyright statements in the file + copyrights, err := ExtractAllCopyrightInfo(filePath) + if err != nil { + return false, err + } + + if len(copyrights) == 0 { + // No copyright headers found + return false, nil + } + + currentYear := time.Now().Year() + + // Get last commit year once for the file + lastCommitYear, _ := GetFileLastCommitYear(filePath) + + // If configYear is 0, try to auto-detect from repo's first commit + canonicalStartYear := configYear + if canonicalStartYear == 0 { + if repoFirstYear, err := GetRepoFirstCommitYear(filepath.Dir(filePath)); err == nil && repoFirstYear > 0 { + canonicalStartYear = repoFirstYear + } + } + + // Read file content + content, err := os.ReadFile(filePath) + if err != nil { + return false, err + } + + lines := strings.Split(string(content), "\n") + modified := false + + // Process each copyright statement + for _, info := range copyrights { + // Check if holder matches target (case-insensitive partial match) + if !strings.Contains(strings.ToLower(info.Holder), strings.ToLower(targetHolder)) { + continue + } + + shouldUpdate := false + newStartYear := info.StartYear + newEndYear := info.EndYear + + // Condition 1: Update start year if canonical year differs from file's start year + if canonicalStartYear > 0 && info.StartYear != canonicalStartYear { + newStartYear = canonicalStartYear + shouldUpdate = true + } + + // Condition 2: Check if file was modified after the copyright end year OR we're making any update + if lastCommitYear > info.EndYear || shouldUpdate { + // File was modified after copyright end year (or will be modified by us), update end year + if info.EndYear < currentYear { + newEndYear = currentYear + shouldUpdate = true + } + } + + // Condition 3: Force current year if requested (e.g., for LICENSE when other files updated) + if forceCurrentYear && info.EndYear < currentYear { + newEndYear = currentYear + shouldUpdate = true + } + + if !shouldUpdate { + continue + } + + if info.LineNumber < 1 || info.LineNumber > len(lines) { + continue + } + + // Reconstruct the copyright line preserving format and trailing text + var yearStr string + if newStartYear == newEndYear { + yearStr = fmt.Sprintf("%d", newEndYear) + } else { + yearStr = fmt.Sprintf("%d, %d", newStartYear, newEndYear) + } + + // Build new line: prefix + "Copyright " + holder + " " + years + trailing + newLine := fmt.Sprintf("%sCopyright %s %s", info.Prefix, info.Holder, yearStr) + if info.TrailingText != "" { + newLine += info.TrailingText + } + + lines[info.LineNumber-1] = newLine + modified = true + } + + if !modified { + return false, nil + } + + // Write back + newContent := strings.Join(lines, "\n") + err = os.WriteFile(filePath, []byte(newContent), 0644) + if err != nil { + return false, err + } + + return true, nil +} + +// NeedsUpdate checks if a file would be updated without actually modifying it +// If forceCurrentYear is true, forces end year to current year regardless of git history +// Returns true if the file has copyrights matching targetHolder that need year updates +func NeedsUpdate(filePath string, targetHolder string, configYear int, forceCurrentYear bool) (bool, error) { + // Skip .copywrite.hcl config file + if filepath.Base(filePath) == ".copywrite.hcl" { + return false, nil + } + + // Extract all copyright statements in the file + copyrights, err := ExtractAllCopyrightInfo(filePath) + if err != nil { + return false, err + } + + if len(copyrights) == 0 { + return false, nil + } + + currentYear := time.Now().Year() + + // Get last commit year once for the file + lastCommitYear, _ := GetFileLastCommitYear(filePath) + + // If configYear is 0, try to auto-detect from repo's first commit + canonicalStartYear := configYear + if canonicalStartYear == 0 { + if repoFirstYear, err := GetRepoFirstCommitYear(filepath.Dir(filePath)); err == nil && repoFirstYear > 0 { + canonicalStartYear = repoFirstYear + } + } + + // Process each copyright statement + for _, info := range copyrights { + // Check if holder matches target (case-insensitive partial match) + if !strings.Contains(strings.ToLower(info.Holder), strings.ToLower(targetHolder)) { + continue + } + + needsUpdate := false + + // Condition 1: Update start year if canonical year differs from file's start year + if canonicalStartYear > 0 && info.StartYear != canonicalStartYear { + needsUpdate = true + } + + // Condition 2: Check if file was modified after the copyright end year OR we're making any update + if lastCommitYear > info.EndYear || needsUpdate { + // File was modified after copyright end year (or will be modified by us), update end year + if info.EndYear < currentYear { + needsUpdate = true + } + } + + // Condition 3: Force current year if requested + if forceCurrentYear && info.EndYear < currentYear { + needsUpdate = true + } + + if needsUpdate { + return true, nil + } + } + + return false, nil +} From 85701d10394adbc6efc161cd48fda3d3b34a7b2c Mon Sep 17 00:00:00 2001 From: Mohan Manikanta Date: Tue, 13 Jan 2026 02:21:08 +0530 Subject: [PATCH 02/15] Updated readme, year logic in license command and added tests for headers update --- README.md | 45 +++- cmd/license.go | 41 ++- licensecheck/update.go | 56 ++-- licensecheck/update_test.go | 496 ++++++++++++++++++++++++++++++++++++ 4 files changed, 607 insertions(+), 31 deletions(-) create mode 100644 licensecheck/update_test.go diff --git a/README.md b/README.md index f288531..3dd5098 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,11 @@ This repo provides utilities for managing copyright headers and license files across many repos at scale. -You can use it to add or validate copyright headers on source code files, add a -LICENSE file to a repo, report on what licenses repos are using, and more. +Features: +- Add or validate copyright headers on source code files +- Add and/or manage LICENSE files with git-aware copyright year detection +- Report on licenses used across multiple repositories +- Automate compliance checks in CI/CD pipelines ## Getting Started @@ -33,7 +36,7 @@ Usage: copywrite [command] Common Commands: - headers Adds missing copyright headers to all source code files + headers Adds missing copyright headers to all source code files and/or update existing headers based on the each file's last updated year. init Generates a .copywrite.hcl config for a new project license Validates that a LICENSE file is present and remediates any issues if found @@ -62,8 +65,18 @@ scan all files in your repo and copyright headers to any that are missing: copywrite headers --spdx "MPL-2.0" ``` -You may omit the `--spdx` flag if you add a `.copywrite.hcl` config, as outlined -[here](#config-structure). +The `copywrite license` command validates and manages LICENSE files with git-aware copyright years: + +```sh +copywrite license --spdx "MPL-2.0" +``` + +**Copyright Year Behavior:** +- **Start Year**: Auto-detected from config file and if not found defaults to repository's first commit +- **End Year**: Based on repository's last commit (not forced to current year) +- **Example**: First commit 2020, last commit 2025 → `Copyright 2020, 2025` + +You may omit the `--spdx` flag if you add a `.copywrite.hcl` config, as outlined [here](#config-structure). ### `--plan` Flag @@ -99,8 +112,8 @@ project { # (OPTIONAL) Represents the year that the project initially began # This is used as the starting year in copyright statements - # If set and different from current year, headers will show: "copyright_year, current_year" - # If set and same as current year, headers will show: "current_year" + # If set and different from current year, headers will show: "copyright_year, year-2" + # If set and same as year-2, headers will show: "copyright_year" # If not set (0), the tool will auto-detect from git history (first commit year) # If auto-detection fails, it will fallback to current year only # Default: 0 (auto-detect) @@ -171,6 +184,24 @@ snippet to your repo's `.pre-commit-config.yaml`: - id: copywrite-headers ``` +## Technical Details + +### Copyright Year Logic + +**Source File Headers:** +- End year: Individual file's last commit year +- Updates when file is modified + +**LICENSE Files:** +- End year: Repository's last commit year (git-aware) +- Preserves historical accuracy for archived projects + +### Key Functions + +- `GetRepoLastCommitYear()` - Repository's last commit year +- `UpdateCopyrightHeader()` - Git-aware header updates +- `determineLicenseCopyrightYears()` - Smart LICENSE year calculation + ## Debugging Copywrite supports several built-in features to aid with debugging. The first diff --git a/cmd/license.go b/cmd/license.go index 095e896..5e3f529 100644 --- a/cmd/license.go +++ b/cmd/license.go @@ -7,6 +7,8 @@ import ( "errors" "fmt" "path/filepath" + "strconv" + "time" "github.com/hashicorp/copywrite/github" "github.com/hashicorp/copywrite/licensecheck" @@ -63,10 +65,14 @@ var licenseCmd = &cobra.Command{ Run: func(cmd *cobra.Command, args []string) { cmd.Printf("Licensing under the following terms: %s\n", conf.Project.License) - cmd.Printf("Using copyright years: %v\n", conf.FormatCopyrightYears()) + + // Determine appropriate copyright years for LICENSE file + licenseYears := determineLicenseCopyrightYears(dirPath) + + cmd.Printf("Using copyright years: %v\n", licenseYears) cmd.Printf("Using copyright holder: %v\n\n", conf.Project.CopyrightHolder) - copyright := "Copyright " + conf.Project.CopyrightHolder + " " + conf.FormatCopyrightYears() + copyright := "Copyright " + conf.Project.CopyrightHolder + " " + licenseYears licenseFiles, err := licensecheck.FindLicenseFiles(dirPath) if err != nil { @@ -175,3 +181,34 @@ func init() { licenseCmd.Flags().StringP("spdx", "s", "", "SPDX License Identifier indicating what the LICENSE file should represent") licenseCmd.Flags().StringP("copyright-holder", "c", "", "Copyright holder (default \"IBM Corp.\")") } + +// determineLicenseCopyrightYears determines the appropriate copyright year range for LICENSE file +// Uses git history to get the start year (first commit) and end year (last commit) +func determineLicenseCopyrightYears(dirPath string) string { + currentYear := time.Now().Year() + startYear := conf.Project.CopyrightYear + + // If no start year configured, try to auto-detect from git + if startYear == 0 { + if detectedYear, err := licensecheck.GetRepoFirstCommitYear(dirPath); err == nil && detectedYear > 0 { + startYear = detectedYear + } else { + // Fallback to current year + return strconv.Itoa(currentYear) + } + } + + // Determine end year from repository's last commit year + endYear := currentYear // Default fallback + if lastRepoCommitYear, err := licensecheck.GetRepoLastCommitYear(dirPath); err == nil && lastRepoCommitYear > 0 && lastRepoCommitYear <= currentYear { + endYear = lastRepoCommitYear + } + + // If start year equals end year, return single year + if startYear == endYear { + return strconv.Itoa(endYear) + } + + // Return year range: "startYear, endYear" + return fmt.Sprintf("%d, %d", startYear, endYear) +} diff --git a/licensecheck/update.go b/licensecheck/update.go index c490ff3..c51d07d 100644 --- a/licensecheck/update.go +++ b/licensecheck/update.go @@ -170,35 +170,45 @@ func extractCommentPrefix(line string) string { return leadingSpace } -// GetFileLastCommitYear returns the year of the last commit that modified a file -func GetFileLastCommitYear(filePath string) (int, error) { - absPath, err := filepath.Abs(filePath) +// parseYearFromGitOutput parses the year from git command output +func parseYearFromGitOutput(output []byte, useFirstLine bool) (int, error) { + yearStr := strings.TrimSpace(string(output)) + if yearStr == "" { + return 0, fmt.Errorf("no commits found") + } + + // For commands with multiple lines, extract the first line if requested + if useFirstLine && strings.Contains(yearStr, "\n") { + lines := strings.Split(yearStr, "\n") + if len(lines) > 0 { + yearStr = strings.TrimSpace(lines[0]) + } + } + + year, err := strconv.Atoi(yearStr) if err != nil { return 0, err } - dir := filepath.Dir(absPath) - fileName := filepath.Base(absPath) - - cmd := exec.Command("git", "log", "-1", "--format=%ad", "--date=format:%Y", "--", fileName) - cmd.Dir = dir + return year, nil +} - output, err := cmd.Output() +// GetFileLastCommitYear returns the year of the last commit that modified a file +func GetFileLastCommitYear(filePath string) (int, error) { + absPath, err := filepath.Abs(filePath) if err != nil { return 0, err } - yearStr := strings.TrimSpace(string(output)) - if yearStr == "" { - return 0, fmt.Errorf("no commits found") - } + cmd := exec.Command("git", "log", "-1", "--format=%ad", "--date=format:%Y", "--", filepath.Base(absPath)) + cmd.Dir = filepath.Dir(absPath) - year, err := strconv.Atoi(yearStr) + output, err := cmd.Output() if err != nil { return 0, err } - return year, nil + return parseYearFromGitOutput(output, false) } // GetRepoFirstCommitYear returns the year of the first commit in the repository @@ -211,18 +221,20 @@ func GetRepoFirstCommitYear(workingDir string) (int, error) { return 0, err } - // Parse the first line (first commit year) - lines := strings.Split(strings.TrimSpace(string(output)), "\n") - if len(lines) == 0 || lines[0] == "" { - return 0, fmt.Errorf("no commits found") - } + return parseYearFromGitOutput(output, true) +} - year, err := strconv.Atoi(strings.TrimSpace(lines[0])) +// GetRepoLastCommitYear returns the year of the last commit in the repository +func GetRepoLastCommitYear(workingDir string) (int, error) { + cmd := exec.Command("git", "log", "-1", "--format=%ad", "--date=format:%Y") + cmd.Dir = workingDir + + output, err := cmd.Output() if err != nil { return 0, err } - return year, nil + return parseYearFromGitOutput(output, false) } // UpdateCopyrightHeader updates all copyright headers in a file if needed diff --git a/licensecheck/update_test.go b/licensecheck/update_test.go new file mode 100644 index 0000000..9c1b6b7 --- /dev/null +++ b/licensecheck/update_test.go @@ -0,0 +1,496 @@ +// Copyright IBM Corp. 2023, 2026 +// SPDX-License-Identifier: MPL-2.0 + +package licensecheck + +import ( + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParseCopyrightLine(t *testing.T) { + tests := []struct { + name string + line string + lineNum int + expectedInfo *CopyrightInfo + expectNil bool + }{ + { + name: "Simple copyright with single year", + line: "// Copyright IBM Corp. 2023", + lineNum: 1, + expectedInfo: &CopyrightInfo{ + LineNumber: 1, + OriginalLine: "// Copyright IBM Corp. 2023", + Holder: "IBM Corp.", + StartYear: 2023, + EndYear: 2023, + Prefix: "// ", + TrailingText: "", + }, + }, + { + name: "Copyright with year range", + line: "// Copyright IBM Corp. 2022, 2025", + lineNum: 1, + expectedInfo: &CopyrightInfo{ + LineNumber: 1, + OriginalLine: "// Copyright IBM Corp. 2022, 2025", + Holder: "IBM Corp.", + StartYear: 2022, + EndYear: 2025, + Prefix: "// ", + TrailingText: "", + }, + }, + { + name: "Copyright with (c) symbol", + line: "# Copyright (c) HashiCorp, Inc. 2020", + lineNum: 2, + expectedInfo: &CopyrightInfo{ + LineNumber: 2, + OriginalLine: "# Copyright (c) HashiCorp, Inc. 2020", + Holder: "HashiCorp, Inc.", + StartYear: 2020, + EndYear: 2020, + Prefix: "# ", + TrailingText: "", + }, + }, + { + name: "Copyright with trailing text", + line: "/* Copyright IBM Corp. 2023 - All rights reserved */", + lineNum: 1, + expectedInfo: &CopyrightInfo{ + LineNumber: 1, + OriginalLine: "/* Copyright IBM Corp. 2023 - All rights reserved */", + Holder: "IBM Corp.", + StartYear: 2023, + EndYear: 2023, + Prefix: "/* ", + TrailingText: " - All rights reserved */", + }, + }, + { + name: "Line without copyright", + line: "// This is just a comment", + lineNum: 1, + expectNil: true, + }, + { + name: "Copyright without year (holder only)", + line: "// Copyright IBM Corp.", + lineNum: 1, + expectedInfo: &CopyrightInfo{ + LineNumber: 1, + OriginalLine: "// Copyright IBM Corp.", + Holder: "IBM Corp.", + StartYear: 0, + EndYear: 0, + Prefix: "// ", + TrailingText: "", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := parseCopyrightLine(tt.line, tt.lineNum) + + if tt.expectNil { + assert.Nil(t, result) + return + } + + require.NotNil(t, result) + assert.Equal(t, tt.expectedInfo.LineNumber, result.LineNumber) + assert.Equal(t, tt.expectedInfo.OriginalLine, result.OriginalLine) + assert.Equal(t, tt.expectedInfo.Holder, result.Holder) + assert.Equal(t, tt.expectedInfo.StartYear, result.StartYear) + assert.Equal(t, tt.expectedInfo.EndYear, result.EndYear) + assert.Equal(t, tt.expectedInfo.Prefix, result.Prefix) + assert.Equal(t, tt.expectedInfo.TrailingText, result.TrailingText) + }) + } +} + +func TestExtractCommentPrefix(t *testing.T) { + tests := []struct { + name string + line string + expectedPrefix string + }{ + { + name: "Double slash comment", + line: "// Copyright IBM Corp.", + expectedPrefix: "// ", + }, + { + name: "Double slash without space", + line: "//Copyright IBM Corp.", + expectedPrefix: "//", + }, + { + name: "Hash comment", + line: "# Copyright IBM Corp.", + expectedPrefix: "# ", + }, + { + name: "Star comment", + line: "* Copyright IBM Corp.", + expectedPrefix: "* ", + }, + { + name: "Block comment start", + line: "/* Copyright IBM Corp.", + expectedPrefix: "/* ", + }, + { + name: "Indented comment", + line: " // Copyright IBM Corp.", + expectedPrefix: " // ", + }, + { + name: "Tab indented comment", + line: "\t# Copyright IBM Corp.", + expectedPrefix: "\t# ", + }, + { + name: "No comment prefix", + line: "Copyright IBM Corp.", + expectedPrefix: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := extractCommentPrefix(tt.line) + assert.Equal(t, tt.expectedPrefix, result) + }) + } +} + +func TestParseYearFromGitOutput(t *testing.T) { + tests := []struct { + name string + output []byte + useFirstLine bool + expectedYear int + expectError bool + }{ + { + name: "Single year - first line", + output: []byte("2023\n"), + useFirstLine: true, + expectedYear: 2023, + }, + { + name: "Multiple years - first line", + output: []byte("2020\n2021\n2022\n2023\n"), + useFirstLine: true, + expectedYear: 2020, + }, + { + name: "Empty output", + output: []byte(""), + useFirstLine: true, + expectError: true, + }, + { + name: "Invalid year", + output: []byte("invalid\n"), + useFirstLine: true, + expectError: true, + }, + { + name: "Whitespace handling", + output: []byte(" 2022 \n"), + useFirstLine: true, + expectedYear: 2022, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + year, err := parseYearFromGitOutput(tt.output, tt.useFirstLine) + + if tt.expectError { + assert.Error(t, err) + return + } + + require.NoError(t, err) + assert.Equal(t, tt.expectedYear, year) + }) + } +} + +func TestExtractAllCopyrightInfo(t *testing.T) { + tempDir := t.TempDir() + testFile := filepath.Join(tempDir, "test.go") + + fileContent := `// Copyright IBM Corp. 2020, 2023 + +package main + +// Some other comment +// Copyright HashiCorp, Inc. 2019 + +func main() { + // Not a copyright +} +` + + err := os.WriteFile(testFile, []byte(fileContent), 0644) + require.NoError(t, err) + + copyrights, err := ExtractAllCopyrightInfo(testFile) + require.NoError(t, err) + require.GreaterOrEqual(t, len(copyrights), 2, "Should find at least 2 copyright statements") + + // First copyright + assert.Equal(t, 1, copyrights[0].LineNumber) + assert.Equal(t, "IBM Corp.", copyrights[0].Holder) + assert.Equal(t, 2020, copyrights[0].StartYear) + assert.Equal(t, 2023, copyrights[0].EndYear) + + // Find the HashiCorp copyright (should be second or later) + var hashicorpFound bool + for _, c := range copyrights { + if c.Holder == "HashiCorp, Inc." { + assert.Equal(t, 2019, c.StartYear) + assert.Equal(t, 2019, c.EndYear) + hashicorpFound = true + break + } + } + assert.True(t, hashicorpFound, "Should find HashiCorp copyright") +} + +func TestExtractCopyrightInfo(t *testing.T) { + tempDir := t.TempDir() + testFile := filepath.Join(tempDir, "test.go") + + fileContent := `// Copyright IBM Corp. 2020, 2023 +// SPDX-License-Identifier: MPL-2.0 + +package main +` + + err := os.WriteFile(testFile, []byte(fileContent), 0644) + require.NoError(t, err) + + info, err := ExtractCopyrightInfo(testFile) + require.NoError(t, err) + require.NotNil(t, info) + + assert.Equal(t, 1, info.LineNumber) + assert.Equal(t, "IBM Corp.", info.Holder) + assert.Equal(t, 2020, info.StartYear) + assert.Equal(t, 2023, info.EndYear) +} + +func TestExtractCopyrightInfo_NoCopyright(t *testing.T) { + tempDir := t.TempDir() + testFile := filepath.Join(tempDir, "test.go") + + fileContent := `// Just a regular comment +package main +` + + err := os.WriteFile(testFile, []byte(fileContent), 0644) + require.NoError(t, err) + + info, err := ExtractCopyrightInfo(testFile) + require.NoError(t, err) + assert.Nil(t, info) +} + +func TestUpdateCopyrightHeader(t *testing.T) { + currentYear := time.Now().Year() + + tests := []struct { + name string + initialContent string + targetHolder string + configYear int + forceCurrentYear bool + expectModified bool + expectedContent string + }{ + { + name: "Update end year when outdated", + initialContent: `// Copyright IBM Corp. 2022, 2023 +package main +`, + targetHolder: "IBM Corp.", + configYear: 2022, + forceCurrentYear: true, + expectModified: true, + expectedContent: `// Copyright IBM Corp. 2022, ` + string(rune(currentYear/1000+48)) + string(rune((currentYear/100)%10+48)) + string(rune((currentYear/10)%10+48)) + string(rune(currentYear%10+48)) + ` +package main +`, + }, + { + name: "Update start year when different from config", + initialContent: `// Copyright IBM Corp. 2023 +package main +`, + targetHolder: "IBM Corp.", + configYear: 2020, + expectModified: true, + expectedContent: `// Copyright IBM Corp. 2020, ` + string(rune(currentYear/1000+48)) + string(rune((currentYear/100)%10+48)) + string(rune((currentYear/10)%10+48)) + string(rune(currentYear%10+48)) + ` +package main +`, + }, + { + name: "No update needed", + initialContent: `// Copyright IBM Corp. ` + string(rune(currentYear/1000+48)) + string(rune((currentYear/100)%10+48)) + string(rune((currentYear/10)%10+48)) + string(rune(currentYear%10+48)) + ` +package main +`, + targetHolder: "IBM Corp.", + configYear: currentYear, + expectModified: false, + }, + { + name: "Wrong holder - no update", + initialContent: `// Copyright HashiCorp, Inc. 2020 +package main +`, + targetHolder: "IBM Corp.", + configYear: 2022, + expectModified: false, + }, + { + name: "No copyright - no update", + initialContent: `package main +`, + targetHolder: "IBM Corp.", + configYear: 2022, + expectModified: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tempDir := t.TempDir() + testFile := filepath.Join(tempDir, "test.go") + + err := os.WriteFile(testFile, []byte(tt.initialContent), 0644) + require.NoError(t, err) + + modified, err := UpdateCopyrightHeader(testFile, tt.targetHolder, tt.configYear, tt.forceCurrentYear) + require.NoError(t, err) + assert.Equal(t, tt.expectModified, modified) + + if tt.expectModified && tt.expectedContent != "" { + content, err := os.ReadFile(testFile) + require.NoError(t, err) + assert.Equal(t, tt.expectedContent, string(content)) + } + }) + } +} + +func TestNeedsUpdate(t *testing.T) { + currentYear := time.Now().Year() + + tests := []struct { + name string + fileContent string + targetHolder string + configYear int + forceCurrentYear bool + expectNeedsUpdate bool + }{ + { + name: "Needs update - outdated end year", + fileContent: `// Copyright IBM Corp. 2022, 2023 +package main +`, + targetHolder: "IBM Corp.", + configYear: 2022, + forceCurrentYear: true, + expectNeedsUpdate: true, + }, + { + name: "Needs update - different start year", + fileContent: `// Copyright IBM Corp. 2023 +package main +`, + targetHolder: "IBM Corp.", + configYear: 2020, + expectNeedsUpdate: true, + }, + { + name: "No update needed - current", + fileContent: `// Copyright IBM Corp. ` + string(rune(currentYear/1000+48)) + string(rune((currentYear/100)%10+48)) + string(rune((currentYear/10)%10+48)) + string(rune(currentYear%10+48)) + ` +package main +`, + targetHolder: "IBM Corp.", + configYear: currentYear, + expectNeedsUpdate: false, + }, + { + name: "Wrong holder - no update", + fileContent: `// Copyright HashiCorp, Inc. 2020 +package main +`, + targetHolder: "IBM Corp.", + configYear: 2022, + expectNeedsUpdate: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tempDir := t.TempDir() + testFile := filepath.Join(tempDir, "test.go") + + err := os.WriteFile(testFile, []byte(tt.fileContent), 0644) + require.NoError(t, err) + + needsUpdate, err := NeedsUpdate(testFile, tt.targetHolder, tt.configYear, tt.forceCurrentYear) + require.NoError(t, err) + assert.Equal(t, tt.expectNeedsUpdate, needsUpdate) + }) + } +} + +func TestUpdateCopyrightHeader_SkipCopywriteConfig(t *testing.T) { + tempDir := t.TempDir() + testFile := filepath.Join(tempDir, ".copywrite.hcl") + + fileContent := `// Copyright IBM Corp. 2020 +schema_version = 1 +` + + err := os.WriteFile(testFile, []byte(fileContent), 0644) + require.NoError(t, err) + + modified, err := UpdateCopyrightHeader(testFile, "IBM Corp.", 2022, false) + require.NoError(t, err) + assert.False(t, modified, "Should skip .copywrite.hcl file") +} + +func TestNeedsUpdate_SkipCopywriteConfig(t *testing.T) { + tempDir := t.TempDir() + testFile := filepath.Join(tempDir, ".copywrite.hcl") + + fileContent := `// Copyright IBM Corp. 2020 +schema_version = 1 +` + + err := os.WriteFile(testFile, []byte(fileContent), 0644) + require.NoError(t, err) + + needsUpdate, err := NeedsUpdate(testFile, "IBM Corp.", 2022, false) + require.NoError(t, err) + assert.False(t, needsUpdate, "Should skip .copywrite.hcl file") +} From 23c1a5ba8221cf6aa7f129aebb31bea37f0bffb1 Mon Sep 17 00:00:00 2001 From: Mohan Manikanta Date: Tue, 13 Jan 2026 04:45:54 +0530 Subject: [PATCH 03/15] Preserve add Headers behaviour for header updates --- licensecheck/update.go | 360 ++++++++++++++++++++++++------------ licensecheck/update_test.go | 337 +++++++++++++++++++++++++++++++++ 2 files changed, 580 insertions(+), 117 deletions(-) diff --git a/licensecheck/update.go b/licensecheck/update.go index c51d07d..53a90a6 100644 --- a/licensecheck/update.go +++ b/licensecheck/update.go @@ -79,8 +79,10 @@ func parseCopyrightLine(line string, lineNum int) *CopyrightInfo { } content := line[contentStart:] - // Must contain "copyright" - if !strings.Contains(strings.ToLower(content), "copyright") { + // Must start with "copyright" (case-insensitive) - not just contain it anywhere + // This ensures we only match actual copyright statements, not comments that mention copyright + content = strings.TrimSpace(content) + if !regexp.MustCompile(`(?i)^copyright\b`).MatchString(content) { return nil } @@ -158,8 +160,21 @@ func extractCommentPrefix(line string) string { trimmed := strings.TrimLeft(line, " \t") leadingSpace := line[:len(line)-len(trimmed)] - // Check for common comment prefixes - commentPrefixes := []string{"// ", "//", "# ", "#", "* ", "*", "/* "} + // Check for common comment prefixes (ordered by specificity - longer prefixes first) + commentPrefixes := []string{ + "<%/* ", "<%/*", // EJS templates + "(** ", "(**", // OCaml + "/** ", "/**", // JSDoc-style comments + "