Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 23 additions & 24 deletions internal/job/checkout.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"slices"
Expand Down Expand Up @@ -50,47 +51,45 @@ func (e *Executor) configureHTTPSInsteadOfSSH(ctx context.Context) error {
).Run(ctx, shell.ShowPrompt(false))
}

func (e *Executor) removeCheckoutDir() error {
func (e *Executor) removeCheckoutDir(ctx context.Context) error {
checkoutPath, _ := e.shell.Env.Get("BUILDKITE_BUILD_CHECKOUT_PATH")

// on windows, sometimes removing large dirs can fail for various reasons
// for instance having files open
// see https://github.com/golang/go/issues/20841
for range 10 {
return roko.NewRetrier(
roko.WithMaxAttempts(10),
roko.WithStrategy(roko.Constant(10*time.Second)),
).DoWithContext(ctx, func(r *roko.Retrier) error {
e.shell.Commentf("Removing %s", checkoutPath)
if err := os.RemoveAll(checkoutPath); err != nil {
e.shell.Errorf("Failed to remove \"%s\" (%s)", checkoutPath, err)
} else {
if _, err := os.Stat(checkoutPath); os.IsNotExist(err) {
return nil
} else {
e.shell.Errorf("Failed to remove %s", checkoutPath)
}
}
e.shell.Commentf("Waiting 10 seconds")
<-time.After(time.Second * 10)
}

return fmt.Errorf("failed to remove %s", checkoutPath)
if err := hardRemoveAll(checkoutPath); err != nil {
e.shell.Errorf("Failed to remove %q (%s)", checkoutPath, err)
return err
}
if _, err := os.Stat(checkoutPath); err == nil || !errors.Is(err, fs.ErrNotExist) {
e.shell.Errorf("Failed to remove %q", checkoutPath)
return errors.New("checkout path still exists")
}
return nil
})
}

func (e *Executor) createCheckoutDir() error {
checkoutPath, _ := e.shell.Env.Get("BUILDKITE_BUILD_CHECKOUT_PATH")

if !osutil.FileExists(checkoutPath) {
e.shell.Commentf("Creating \"%s\"", checkoutPath)
// Actual file permissions will be reduced by umask, and won't be 0o777 unless the user has manually changed the umask to 000
if err := os.MkdirAll(checkoutPath, 0o777); err != nil {
// Actual file permissions will be reduced by umask, and won't be 0777
// unless the user has manually changed the umask to 000
if err := os.MkdirAll(checkoutPath, 0777); err != nil {
return err
}
}

if e.shell.Getwd() != checkoutPath {
if err := e.shell.Chdir(checkoutPath); err != nil {
return err
}
return e.shell.Chdir(checkoutPath)
}

return nil
}

Expand All @@ -112,7 +111,7 @@ func (e *Executor) CheckoutPhase(ctx context.Context) error {
// Remove the checkout directory if BUILDKITE_CLEAN_CHECKOUT is present
if e.CleanCheckout {
e.shell.Headerf("Cleaning pipeline checkout")
if err = e.removeCheckoutDir(); err != nil {
if err = e.removeCheckoutDir(ctx); err != nil {
return err
}
}
Expand Down Expand Up @@ -202,7 +201,7 @@ func (e *Executor) CheckoutPhase(ctx context.Context) error {
// Checkout can fail because of corrupted files in the checkout which can leave the agent in a state where it
// keeps failing. This removes the checkout dir, which means the next checkout will be a lot slower (clone vs
// fetch), but hopefully will allow the agent to self-heal
if err := e.removeCheckoutDir(); err != nil {
if err := e.removeCheckoutDir(ctx); err != nil {
e.shell.Warningf("Failed to remove checkout dir while cleaning up after a checkout error: %v", err)
}

Expand All @@ -221,7 +220,7 @@ func (e *Executor) CheckoutPhase(ctx context.Context) error {
e.shell.Warningf("Checkout failed! %s (%s)", err, r)

// If it's some kind of error that we don't know about, clean the checkout dir just to be safe
if err := e.removeCheckoutDir(); err != nil {
if err := e.removeCheckoutDir(ctx); err != nil {
e.shell.Warningf("Failed to remove checkout dir while cleaning up after a checkout error: %v", err)
}

Expand Down
10 changes: 10 additions & 0 deletions internal/job/remove_all_nonunix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
//go:build !unix

package job

import "os"

// hardRemoveAll only does more than os.RemoveAll on Unix-likes.
func hardRemoveAll(path string) error {
return os.RemoveAll(path)
}
44 changes: 44 additions & 0 deletions internal/job/remove_all_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//go:build unix

package job

import (
"errors"
"io/fs"
"os"
"path/filepath"
"testing"
)

func TestHardRemoveAll(t *testing.T) {
container, err := os.MkdirTemp("", "TestHardRemoveAll")
if err != nil {
t.Fatalf("os.MkdirTemp(TestHardRemoveAll) error = %v", err)
}
t.Cleanup(func() { os.RemoveAll(container) }) // lol but if hardRemoveAll doesn't work...

dirA := filepath.Join(container, "a")
dirB := filepath.Join(dirA, "b")
fileC := filepath.Join(dirB, "c")
if err := os.MkdirAll(dirB, 0o777); err != nil {
t.Fatalf("os.MkdirAll(c%q, 0o777) = %v", dirB, err)
}
if err := os.WriteFile(fileC, []byte("hello!\n"), 0o664); err != nil {
t.Fatalf("os.WriteFile(%q, hello!, 0o664) = %v", fileC, err)
}

// break directory perms
if err := os.Chmod(dirB, 0o666); err != nil {
t.Fatalf("os.Chmod(%q, 0o666) = %v", dirB, err)
}
if err := os.Chmod(dirA, 0o444); err != nil {
t.Fatalf("os.Chmod(%q, 0o444) = %v", dirA, err)
}

if err := hardRemoveAll(dirA); err != nil {
t.Errorf("hardRemoveAll(%q) = %v", dirA, err)
}
if _, err := os.Stat(dirA); !errors.Is(err, fs.ErrNotExist) {
t.Errorf("os.Stat(%q) = %v, want %v", dirA, err, fs.ErrNotExist)
}
}
57 changes: 57 additions & 0 deletions internal/job/remove_all_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
//go:build unix

package job

import (
"errors"
"os"
"path/filepath"

"golang.org/x/sys/unix"
)

// hardRemoveAll tries very hard to remove all items from the directory at path.
// In addition to calling os.RemoveAll, it fixes missing +x bits on directories.
func hardRemoveAll(path string) error {
// Only try to fix the permissions on 10000 directories.
// Any more and we could be here all day.
for range 10000 {
err := os.RemoveAll(path)
if err == nil { // If os.RemoveAll worked, then exit early.
return nil
}
// os.RemoveAll documents its only non-nil error as *os.PathError.
pathErr, ok := err.(*os.PathError)
if !ok {
return err
}

// Did we not have permission to open something within a directory?
if pathErr.Err != unix.EACCES {
return err
}
dir := filepath.Dir(pathErr.Path)

// Check that the EACCES was caused by mode on the directory.
// (Note that this is a TOCTOU race, but we're not changing
// owner uid/gid, and if something else is concurrently writing
// files they can probably chmod +wx their files themselves)
di, statErr := os.Lstat(dir)
if statErr != nil {
return statErr
}
if !di.IsDir() {
return err
}
if unix.Faccessat(0, dir, unix.W_OK|unix.X_OK, unix.AT_EACCESS) != unix.EACCES {
// Some other failure?
return err
}
// Try to fix it with chmod +x dir
if err := os.Chmod(dir, 0o777); err != nil {
return err
}
// Now retry os.RemoveAll.
}
return errors.New("too many inaccessible directories")
}