diff --git a/clicommand/bootstrap.go b/clicommand/bootstrap.go index 40c6efbda5..63dbc18d3b 100644 --- a/clicommand/bootstrap.go +++ b/clicommand/bootstrap.go @@ -87,6 +87,8 @@ type BootstrapConfig struct { PluginsEnabled bool `cli:"plugins-enabled"` PluginValidation bool `cli:"plugin-validation"` PluginsAlwaysCloneFresh bool `cli:"plugins-always-clone-fresh"` + PluginsPathIncludesAgentName bool `cli:"plugins-path-includes-agent-name"` + PluginsLockTimeout int `cli:"plugins-lock-timeout"` LocalHooksEnabled bool `cli:"local-hooks-enabled"` StrictSingleHooks bool `cli:"strict-single-hooks"` PTY bool `cli:"pty"` @@ -307,7 +309,7 @@ var BootstrapCommand = cli.Command{ SocketsPathFlag, cli.StringFlag{ Name: "plugins-path", - Value: "", + Value: "/workspace/plugins", Usage: "Directory where the plugins are saved to", EnvVar: "BUILDKITE_PLUGINS_PATH", }, @@ -331,6 +333,17 @@ var BootstrapCommand = cli.Command{ Usage: "Always make a new clone of plugin source, even if already present", EnvVar: "BUILDKITE_PLUGINS_ALWAYS_CLONE_FRESH", }, + cli.BoolFlag{ + Name: "plugins-path-includes-agent-name", + Usage: "Include the agent name in the plugins path (when false, plugins are shared and file locking is used)", + EnvVar: "BUILDKITE_PLUGINS_PATH_INCLUDES_AGENT_NAME", + }, + cli.IntFlag{ + Name: "plugins-lock-timeout", + Value: 300, + Usage: "Seconds to wait before timing out when acquiring plugin clone lock", + EnvVar: "BUILDKITE_PLUGINS_LOCK_TIMEOUT", + }, cli.BoolTFlag{ Name: "local-hooks-enabled", Usage: "Allow local hooks to be run", @@ -495,6 +508,8 @@ var BootstrapCommand = cli.Command{ PluginsEnabled: cfg.PluginsEnabled, PluginsAlwaysCloneFresh: cfg.PluginsAlwaysCloneFresh, PluginsPath: cfg.PluginsPath, + PluginsPathIncludesAgentName: cfg.PluginsPathIncludesAgentName, + PluginsLockTimeout: cfg.PluginsLockTimeout, PullRequest: cfg.PullRequest, PullRequestUsingMergeRefspec: cfg.PullRequestUsingMergeRefspec, Queue: cfg.Queue, diff --git a/clicommand/kubernetes_bootstrap.go b/clicommand/kubernetes_bootstrap.go index ebe42ae60b..fbd1cfe980 100644 --- a/clicommand/kubernetes_bootstrap.go +++ b/clicommand/kubernetes_bootstrap.go @@ -136,6 +136,12 @@ var KubernetesBootstrapCommand = cli.Command{ environ.Set("BUILDKITE_BUILD_CHECKOUT_PATH", filepath.Join(buildPath, "buildkite")) } + // For k8s agents, use shared plugin paths by default since each pod runs ephemerally. + // This enables plugin caching across jobs when using persistent storage. + if _, exists := environ.Get("BUILDKITE_PLUGINS_PATH_INCLUDES_AGENT_NAME"); !exists { + environ.Set("BUILDKITE_PLUGINS_PATH_INCLUDES_AGENT_NAME", "false") + } + // BUILDKITE_BIN_PATH is a funny one. The bootstrap adds it to PATH, // and the agent deduces it from its own path (as we do below), but in // the k8s stack the agent could run from two different locations: diff --git a/internal/job/config.go b/internal/job/config.go index 8e5d374edd..bcd8ba6d8b 100644 --- a/internal/job/config.go +++ b/internal/job/config.go @@ -141,6 +141,13 @@ type ExecutorConfig struct { // Path to the plugins directory PluginsPath string + // Whether to include the agent name in plugin paths (defaults to true for backward compatibility) + // When false, plugins are stored in a shared location and require file locking + PluginsPathIncludesAgentName bool + + // Seconds to wait before allowing plugin clone lock to be acquired (only used when PluginsPathIncludesAgentName is false) + PluginsLockTimeout int + // Paths to automatically upload as artifacts when the build finishes AutomaticArtifactUploadPaths string `env:"BUILDKITE_ARTIFACT_PATHS"` diff --git a/internal/job/plugin.go b/internal/job/plugin.go index 91345b8f2d..3e3f461fd0 100644 --- a/internal/job/plugin.go +++ b/internal/job/plugin.go @@ -11,11 +11,13 @@ import ( "slices" "strconv" "strings" + "syscall" "time" "github.com/buildkite/agent/v3/agent/plugin" "github.com/buildkite/agent/v3/internal/job/hook" "github.com/buildkite/agent/v3/internal/osutil" + "github.com/buildkite/agent/v3/internal/shell" "github.com/buildkite/roko" "github.com/buildkite/shellwords" ) @@ -39,6 +41,123 @@ type pluginCheckout struct { HooksDir string } +// acquirePluginLock attempts to acquire a file lock for plugin cloning +// Returns the lock file handle and an error if the lock cannot be acquired +func acquirePluginLock(pluginPath string, timeout int, logger shell.Logger) (*os.File, error) { + lockPath := pluginPath + ".lock" + + // Check if a stale lock file exists and remove it + if fileInfo, err := os.Stat(lockPath); err == nil { + // Check if the file is older than 5 minutes (stale) + if time.Since(fileInfo.ModTime()) > 5*time.Minute { + os.Remove(lockPath) + } + } + + // Create the lock file + lockFile, err := os.OpenFile(lockPath, os.O_CREATE|os.O_WRONLY, 0o644) + if err != nil { + return nil, fmt.Errorf("failed to create lock file %s: %w", lockPath, err) + } + + // Try to acquire exclusive lock with timeout + start := time.Now() + timeoutDuration := time.Duration(timeout) * time.Second + waitingLogged := false + + for { + err := syscall.Flock(int(lockFile.Fd()), syscall.LOCK_EX|syscall.LOCK_NB) + if err == nil { + // Lock acquired successfully + if waitingLogged { + logger.Commentf("Plugin lock acquired, plugin was already downloaded by another job") + } + return lockFile, nil + } + + if errors.Is(err, syscall.EWOULDBLOCK) { + // Lock is held by another process + elapsed := time.Since(start) + + // Log waiting message only once + if !waitingLogged { + logger.Commentf("Waiting for plugin download to complete (another job is currently downloading the plugin)...") + waitingLogged = true + } + + if elapsed > timeoutDuration { + lockFile.Close() + return nil, fmt.Errorf("timeout waiting for plugin lock after %d seconds", timeout) + } + + // Wait a bit before retrying + time.Sleep(100 * time.Millisecond) + continue + } + + // Other error occurred - check if it's a stale file handle + if errors.Is(err, syscall.ESTALE) { + // Remove the stale lock file and retry + lockFile.Close() + os.Remove(lockPath) + lockFile, err = os.OpenFile(lockPath, os.O_CREATE|os.O_WRONLY, 0o644) + if err != nil { + return nil, fmt.Errorf("failed to recreate lock file %s: %w", lockPath, err) + } + continue + } + + // Other error occurred + lockFile.Close() + return nil, fmt.Errorf("failed to acquire plugin lock: %w", err) + } +} + +// releasePluginLock releases the file lock and closes the lock file +func releasePluginLock(lockFile *os.File) error { + if lockFile != nil { + // Release the lock + err := syscall.Flock(int(lockFile.Fd()), syscall.LOCK_UN) + if err != nil { + lockFile.Close() + return fmt.Errorf("failed to release plugin lock: %w", err) + } + + // Close the lock file + lockFile.Close() + + // Remove the lock file - ignore errors as the file might already be gone + // or there might be a stale file handle + os.Remove(lockFile.Name()) + } + return nil +} + +// openCachedPlugin opens an already checked out plugin and returns the checkout +func (e *Executor) openCachedPlugin(ctx context.Context, p *plugin.Plugin, pluginDirectory string, checkout *pluginCheckout) (*pluginCheckout, error) { + // It'd be nice to show the current commit of the plugin, so let's figure that out. + headCommit, err := gitRevParseInWorkingDirectory(ctx, e.shell, pluginDirectory, "--short=7", "HEAD") + if err != nil { + e.shell.Commentf("Plugin %q already checked out (can't `git rev-parse HEAD` plugin git directory)", p.Label()) + } else { + e.shell.Commentf("Plugin %q already checked out (%s)", p.Label(), strings.TrimSpace(headCommit)) + } + + // Open the plugin directory as the checkout root. + pluginRoot, err := os.OpenRoot(pluginDirectory) + if err != nil { + return nil, fmt.Errorf("opening plugin directory as a root: %w", err) + } + runtime.AddCleanup(checkout, func(r *os.Root) { r.Close() }, pluginRoot) + checkout.Root = pluginRoot + + // Ensure hooks is a directory that exists within the checkout. + if fi, err := pluginRoot.Stat(checkout.HooksDir); err != nil || !fi.IsDir() { + return nil, fmt.Errorf("%q was not a directory within the %q plugin: %w", checkout.HooksDir, checkout.Plugin.Name(), err) + } + return checkout, nil +} + func (e *Executor) hasPlugins() bool { return e.Plugins != "" } @@ -301,7 +420,13 @@ func (e *Executor) checkoutPlugin(ctx context.Context, p *plugin.Plugin) (*plugi return nil, err } - pluginParentDir := filepath.Join(e.PluginsPath, e.AgentName) + // Determine plugin parent directory based on whether we include agent name + var pluginParentDir string + if e.PluginsPathIncludesAgentName { + pluginParentDir = filepath.Join(e.PluginsPath, e.AgentName) + } else { + pluginParentDir = e.PluginsPath + } // Ensure the parent of the plugin directory exists, otherwise we can't move the temp git repo dir // into it. The actual file permissions will be reduced by umask, and won't be 0o777 unless the @@ -355,30 +480,29 @@ func (e *Executor) checkoutPlugin(ctx context.Context, p *plugin.Plugin) (*plugi } } - // Does the .git directory exist? (i.e. it's already checkout out?) - if osutil.FileExists(pluginGitDirectory) { - // It'd be nice to show the current commit of the plugin, so - // let's figure that out. - headCommit, err := gitRevParseInWorkingDirectory(ctx, e.shell, pluginDirectory, "--short=7", "HEAD") - if err != nil { - e.shell.Commentf("Plugin %q already checked out (can't `git rev-parse HEAD` plugin git directory)", p.Label()) - } else { - e.shell.Commentf("Plugin %q already checked out (%s)", p.Label(), strings.TrimSpace(headCommit)) - } + // Does the .git directory exist? (i.e. it's already checked out?) + // Skip this check when using shared plugin paths - we'll handle it with file locking + if e.PluginsPathIncludesAgentName && osutil.FileExists(pluginGitDirectory) { + return e.openCachedPlugin(ctx, p, pluginDirectory, checkout) + } + + // Plugin doesn't exist yet. If we're using shared plugin paths, we need to acquire a lock + // to prevent multiple processes from downloading the same plugin simultaneously. + var lockFile *os.File + if !e.PluginsPathIncludesAgentName { + e.shell.Commentf("Using shared plugin storage at %s (locking enabled)", pluginParentDir) - // Open the plugin directory as the checkout root. - pluginRoot, err := os.OpenRoot(pluginDirectory) + var err error + lockFile, err = acquirePluginLock(pluginDirectory, e.PluginsLockTimeout, e.shell) if err != nil { - return nil, fmt.Errorf("opening plugin directory as a root: %w", err) + return nil, fmt.Errorf("failed to acquire plugin lock for %s: %w", p.Label(), err) } - runtime.AddCleanup(checkout, func(r *os.Root) { r.Close() }, pluginRoot) - checkout.Root = pluginRoot + defer releasePluginLock(lockFile) - // Ensure hooks is a directory that exists within the checkout. - if fi, err := pluginRoot.Stat(checkout.HooksDir); err != nil || !fi.IsDir() { - return nil, fmt.Errorf("%q was not a directory within the %q plugin: %w", checkout.HooksDir, checkout.Plugin.Name(), err) + // Re-check if plugin exists now that we have the lock (another process might have downloaded it) + if osutil.FileExists(pluginGitDirectory) { + return e.openCachedPlugin(ctx, p, pluginDirectory, checkout) } - return checkout, nil } e.shell.Commentf("Plugin %q will be checked out to %q", p.DisplayName(), pluginDirectory)