Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion clicommand/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ type BootstrapConfig struct {
PluginsEnabled bool `cli:"plugins-enabled"`
PluginValidation bool `cli:"plugin-validation"`
PluginsAlwaysCloneFresh bool `cli:"plugins-always-clone-fresh"`
PluginsPathIncludesAgentName bool `cli:"plugins-path-includes-agent-name"`
PluginsLockTimeout int `cli:"plugins-lock-timeout"`
LocalHooksEnabled bool `cli:"local-hooks-enabled"`
StrictSingleHooks bool `cli:"strict-single-hooks"`
PTY bool `cli:"pty"`
Expand Down Expand Up @@ -307,7 +309,7 @@ var BootstrapCommand = cli.Command{
SocketsPathFlag,
cli.StringFlag{
Name: "plugins-path",
Value: "",
Value: "/workspace/plugins",
Usage: "Directory where the plugins are saved to",
EnvVar: "BUILDKITE_PLUGINS_PATH",
},
Expand All @@ -331,6 +333,17 @@ var BootstrapCommand = cli.Command{
Usage: "Always make a new clone of plugin source, even if already present",
EnvVar: "BUILDKITE_PLUGINS_ALWAYS_CLONE_FRESH",
},
cli.BoolFlag{
Name: "plugins-path-includes-agent-name",
Usage: "Include the agent name in the plugins path (when false, plugins are shared and file locking is used)",
EnvVar: "BUILDKITE_PLUGINS_PATH_INCLUDES_AGENT_NAME",
},
cli.IntFlag{
Name: "plugins-lock-timeout",
Value: 300,
Usage: "Seconds to wait before timing out when acquiring plugin clone lock",
EnvVar: "BUILDKITE_PLUGINS_LOCK_TIMEOUT",
},
cli.BoolTFlag{
Name: "local-hooks-enabled",
Usage: "Allow local hooks to be run",
Expand Down Expand Up @@ -495,6 +508,8 @@ var BootstrapCommand = cli.Command{
PluginsEnabled: cfg.PluginsEnabled,
PluginsAlwaysCloneFresh: cfg.PluginsAlwaysCloneFresh,
PluginsPath: cfg.PluginsPath,
PluginsPathIncludesAgentName: cfg.PluginsPathIncludesAgentName,
PluginsLockTimeout: cfg.PluginsLockTimeout,
PullRequest: cfg.PullRequest,
PullRequestUsingMergeRefspec: cfg.PullRequestUsingMergeRefspec,
Queue: cfg.Queue,
Expand Down
6 changes: 6 additions & 0 deletions clicommand/kubernetes_bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,12 @@ var KubernetesBootstrapCommand = cli.Command{
environ.Set("BUILDKITE_BUILD_CHECKOUT_PATH", filepath.Join(buildPath, "buildkite"))
}

// For k8s agents, use shared plugin paths by default since each pod runs ephemerally.
// This enables plugin caching across jobs when using persistent storage.
if _, exists := environ.Get("BUILDKITE_PLUGINS_PATH_INCLUDES_AGENT_NAME"); !exists {
environ.Set("BUILDKITE_PLUGINS_PATH_INCLUDES_AGENT_NAME", "false")
}

// BUILDKITE_BIN_PATH is a funny one. The bootstrap adds it to PATH,
// and the agent deduces it from its own path (as we do below), but in
// the k8s stack the agent could run from two different locations:
Expand Down
7 changes: 7 additions & 0 deletions internal/job/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,13 @@ type ExecutorConfig struct {
// Path to the plugins directory
PluginsPath string

// Whether to include the agent name in plugin paths (defaults to true for backward compatibility)
// When false, plugins are stored in a shared location and require file locking
PluginsPathIncludesAgentName bool

// Seconds to wait before allowing plugin clone lock to be acquired (only used when PluginsPathIncludesAgentName is false)
PluginsLockTimeout int

// Paths to automatically upload as artifacts when the build finishes
AutomaticArtifactUploadPaths string `env:"BUILDKITE_ARTIFACT_PATHS"`

Expand Down
164 changes: 144 additions & 20 deletions internal/job/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ import (
"slices"
"strconv"
"strings"
"syscall"
"time"

"github.com/buildkite/agent/v3/agent/plugin"
"github.com/buildkite/agent/v3/internal/job/hook"
"github.com/buildkite/agent/v3/internal/osutil"
"github.com/buildkite/agent/v3/internal/shell"
"github.com/buildkite/roko"
"github.com/buildkite/shellwords"
)
Expand All @@ -39,6 +41,123 @@ type pluginCheckout struct {
HooksDir string
}

// acquirePluginLock attempts to acquire a file lock for plugin cloning
// Returns the lock file handle and an error if the lock cannot be acquired
func acquirePluginLock(pluginPath string, timeout int, logger shell.Logger) (*os.File, error) {
lockPath := pluginPath + ".lock"

// Check if a stale lock file exists and remove it
if fileInfo, err := os.Stat(lockPath); err == nil {
// Check if the file is older than 5 minutes (stale)
if time.Since(fileInfo.ModTime()) > 5*time.Minute {
os.Remove(lockPath)
}
}

// Create the lock file
lockFile, err := os.OpenFile(lockPath, os.O_CREATE|os.O_WRONLY, 0o644)
if err != nil {
return nil, fmt.Errorf("failed to create lock file %s: %w", lockPath, err)
}

// Try to acquire exclusive lock with timeout
start := time.Now()
timeoutDuration := time.Duration(timeout) * time.Second
waitingLogged := false

for {
err := syscall.Flock(int(lockFile.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
if err == nil {
// Lock acquired successfully
if waitingLogged {
logger.Commentf("Plugin lock acquired, plugin was already downloaded by another job")
}
return lockFile, nil
}

if errors.Is(err, syscall.EWOULDBLOCK) {
// Lock is held by another process
elapsed := time.Since(start)

// Log waiting message only once
if !waitingLogged {
logger.Commentf("Waiting for plugin download to complete (another job is currently downloading the plugin)...")
waitingLogged = true
}

if elapsed > timeoutDuration {
lockFile.Close()
return nil, fmt.Errorf("timeout waiting for plugin lock after %d seconds", timeout)
}

// Wait a bit before retrying
time.Sleep(100 * time.Millisecond)
continue
}

// Other error occurred - check if it's a stale file handle
if errors.Is(err, syscall.ESTALE) {
// Remove the stale lock file and retry
lockFile.Close()
os.Remove(lockPath)
lockFile, err = os.OpenFile(lockPath, os.O_CREATE|os.O_WRONLY, 0o644)
if err != nil {
return nil, fmt.Errorf("failed to recreate lock file %s: %w", lockPath, err)
}
continue
}

// Other error occurred
lockFile.Close()
return nil, fmt.Errorf("failed to acquire plugin lock: %w", err)
}
}

// releasePluginLock releases the file lock and closes the lock file
func releasePluginLock(lockFile *os.File) error {
if lockFile != nil {
// Release the lock
err := syscall.Flock(int(lockFile.Fd()), syscall.LOCK_UN)
if err != nil {
lockFile.Close()
return fmt.Errorf("failed to release plugin lock: %w", err)
}

// Close the lock file
lockFile.Close()

// Remove the lock file - ignore errors as the file might already be gone
// or there might be a stale file handle
os.Remove(lockFile.Name())
}
return nil
}

// openCachedPlugin opens an already checked out plugin and returns the checkout
func (e *Executor) openCachedPlugin(ctx context.Context, p *plugin.Plugin, pluginDirectory string, checkout *pluginCheckout) (*pluginCheckout, error) {
// It'd be nice to show the current commit of the plugin, so let's figure that out.
headCommit, err := gitRevParseInWorkingDirectory(ctx, e.shell, pluginDirectory, "--short=7", "HEAD")
if err != nil {
e.shell.Commentf("Plugin %q already checked out (can't `git rev-parse HEAD` plugin git directory)", p.Label())
} else {
e.shell.Commentf("Plugin %q already checked out (%s)", p.Label(), strings.TrimSpace(headCommit))
}

// Open the plugin directory as the checkout root.
pluginRoot, err := os.OpenRoot(pluginDirectory)
if err != nil {
return nil, fmt.Errorf("opening plugin directory as a root: %w", err)
}
runtime.AddCleanup(checkout, func(r *os.Root) { r.Close() }, pluginRoot)
checkout.Root = pluginRoot

// Ensure hooks is a directory that exists within the checkout.
if fi, err := pluginRoot.Stat(checkout.HooksDir); err != nil || !fi.IsDir() {
return nil, fmt.Errorf("%q was not a directory within the %q plugin: %w", checkout.HooksDir, checkout.Plugin.Name(), err)
}
return checkout, nil
}

func (e *Executor) hasPlugins() bool {
return e.Plugins != ""
}
Expand Down Expand Up @@ -301,7 +420,13 @@ func (e *Executor) checkoutPlugin(ctx context.Context, p *plugin.Plugin) (*plugi
return nil, err
}

pluginParentDir := filepath.Join(e.PluginsPath, e.AgentName)
// Determine plugin parent directory based on whether we include agent name
var pluginParentDir string
if e.PluginsPathIncludesAgentName {
pluginParentDir = filepath.Join(e.PluginsPath, e.AgentName)
} else {
pluginParentDir = e.PluginsPath
}

// Ensure the parent of the plugin directory exists, otherwise we can't move the temp git repo dir
// into it. The actual file permissions will be reduced by umask, and won't be 0o777 unless the
Expand Down Expand Up @@ -355,30 +480,29 @@ func (e *Executor) checkoutPlugin(ctx context.Context, p *plugin.Plugin) (*plugi
}
}

// Does the .git directory exist? (i.e. it's already checkout out?)
if osutil.FileExists(pluginGitDirectory) {
// It'd be nice to show the current commit of the plugin, so
// let's figure that out.
headCommit, err := gitRevParseInWorkingDirectory(ctx, e.shell, pluginDirectory, "--short=7", "HEAD")
if err != nil {
e.shell.Commentf("Plugin %q already checked out (can't `git rev-parse HEAD` plugin git directory)", p.Label())
} else {
e.shell.Commentf("Plugin %q already checked out (%s)", p.Label(), strings.TrimSpace(headCommit))
}
// Does the .git directory exist? (i.e. it's already checked out?)
// Skip this check when using shared plugin paths - we'll handle it with file locking
if e.PluginsPathIncludesAgentName && osutil.FileExists(pluginGitDirectory) {
return e.openCachedPlugin(ctx, p, pluginDirectory, checkout)
}

// Plugin doesn't exist yet. If we're using shared plugin paths, we need to acquire a lock
// to prevent multiple processes from downloading the same plugin simultaneously.
var lockFile *os.File
if !e.PluginsPathIncludesAgentName {
e.shell.Commentf("Using shared plugin storage at %s (locking enabled)", pluginParentDir)

// Open the plugin directory as the checkout root.
pluginRoot, err := os.OpenRoot(pluginDirectory)
var err error
lockFile, err = acquirePluginLock(pluginDirectory, e.PluginsLockTimeout, e.shell)
if err != nil {
return nil, fmt.Errorf("opening plugin directory as a root: %w", err)
return nil, fmt.Errorf("failed to acquire plugin lock for %s: %w", p.Label(), err)
}
runtime.AddCleanup(checkout, func(r *os.Root) { r.Close() }, pluginRoot)
checkout.Root = pluginRoot
defer releasePluginLock(lockFile)

// Ensure hooks is a directory that exists within the checkout.
if fi, err := pluginRoot.Stat(checkout.HooksDir); err != nil || !fi.IsDir() {
return nil, fmt.Errorf("%q was not a directory within the %q plugin: %w", checkout.HooksDir, checkout.Plugin.Name(), err)
// Re-check if plugin exists now that we have the lock (another process might have downloaded it)
if osutil.FileExists(pluginGitDirectory) {
return e.openCachedPlugin(ctx, p, pluginDirectory, checkout)
}
return checkout, nil
}

e.shell.Commentf("Plugin %q will be checked out to %q", p.DisplayName(), pluginDirectory)
Expand Down