diff --git a/integration/image/remove_unix_test.go b/integration/image/remove_unix_test.go new file mode 100644 index 0000000000000..428a312742e2c --- /dev/null +++ b/integration/image/remove_unix_test.go @@ -0,0 +1,93 @@ +// +build !windows + +package image // import "github.com/docker/docker/integration/image" + +import ( + "context" + "io" + "io/ioutil" + "os" + "strconv" + "strings" + "syscall" + "testing" + "unsafe" + + "github.com/docker/docker/api/types" + "github.com/docker/docker/internal/test/daemon" + "github.com/docker/docker/internal/test/fakecontext" + "gotest.tools/assert" + "gotest.tools/skip" +) + +// This is a regression test for #38488 +// It ensures that orphan layers can be found and cleaned up +// after unsuccessful image removal +func TestRemoveImageGarbageCollector(t *testing.T) { + // This test uses very platform specific way to prevent + // daemon for remove image layer. + skip.If(t, testEnv.DaemonInfo.OSType != "linux") + skip.If(t, os.Getenv("DOCKER_ENGINE_GOARCH") != "amd64") + + // Create daemon with overlay2 graphdriver because vfs uses disk differently + // and this test case would not work with it. + d := daemon.New(t, daemon.WithStorageDriver("overlay2"), daemon.WithImageService) + d.Start(t) + defer d.Stop(t) + + ctx := context.Background() + client := d.NewClientT(t) + i := d.ImageService() + + img := "test-garbage-collector" + + // Build a image with multiple layers + dockerfile := `FROM busybox + RUN echo echo Running... > /run.sh` + source := fakecontext.New(t, "", fakecontext.WithDockerfile(dockerfile)) + defer source.Close() + resp, err := client.ImageBuild(ctx, + source.AsTarReader(t), + types.ImageBuildOptions{ + Remove: true, + ForceRemove: true, + Tags: []string{img}, + }) + assert.NilError(t, err) + _, err = io.Copy(ioutil.Discard, resp.Body) + resp.Body.Close() + assert.NilError(t, err) + image, _, err := client.ImageInspectWithRaw(ctx, img) + assert.NilError(t, err) + + // Mark latest image layer to immutable + data := image.GraphDriver.Data + file, _ := os.Open(data["UpperDir"]) + attr := 0x00000010 + fsflags := uintptr(0x40086602) + argp := uintptr(unsafe.Pointer(&attr)) + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), fsflags, argp) + assert.Equal(t, "errno 0", errno.Error()) + + // Try to remove the image, it should generate error + // but marking layer back to mutable before checking errors (so we don't break CI server) + _, err = client.ImageRemove(ctx, img, types.ImageRemoveOptions{}) + attr = 0x00000000 + argp = uintptr(unsafe.Pointer(&attr)) + _, _, errno = syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), fsflags, argp) + assert.Equal(t, "errno 0", errno.Error()) + assert.Assert(t, err != nil) + errStr := err.Error() + if !(strings.Contains(errStr, "permission denied") || strings.Contains(errStr, "operation not permitted")) { + t.Errorf("ImageRemove error not an permission error %s", errStr) + } + + // Verify that layer remaining on disk + dir, _ := os.Stat(data["UpperDir"]) + assert.Equal(t, "true", strconv.FormatBool(dir.IsDir())) + + // Run imageService.Cleanup() and make sure that layer was removed from disk + i.Cleanup() + dir, err = os.Stat(data["UpperDir"]) + assert.ErrorContains(t, err, "no such file or directory") +} diff --git a/internal/test/daemon/daemon.go b/internal/test/daemon/daemon.go index 7ad377347989c..f5a55d4758e32 100644 --- a/internal/test/daemon/daemon.go +++ b/internal/test/daemon/daemon.go @@ -16,6 +16,7 @@ import ( "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/events" "github.com/docker/docker/client" + "github.com/docker/docker/daemon/images" "github.com/docker/docker/internal/test" "github.com/docker/docker/internal/test/request" "github.com/docker/docker/opts" @@ -77,6 +78,7 @@ type Daemon struct { init bool dockerdBinary string log logT + imageService *images.ImageService // swarm related field swarmListenAddr string @@ -720,3 +722,8 @@ func cleanupRaftDir(t testingT, rootPath string) { } } } + +// ImageService returns the Daemon's ImageService +func (d *Daemon) ImageService() *images.ImageService { + return d.imageService +} diff --git a/internal/test/daemon/ops.go b/internal/test/daemon/ops.go index ff8cd88946f9c..a8dd283b82bda 100644 --- a/internal/test/daemon/ops.go +++ b/internal/test/daemon/ops.go @@ -63,3 +63,10 @@ func WithEnvironment(e environment.Execution) func(*Daemon) { } } } + +// WithStorageDriver sets store driver option +func WithStorageDriver(driver string) func(d *Daemon) { + return func(d *Daemon) { + d.storageDriver = driver + } +} diff --git a/internal/test/daemon/ops_unix.go b/internal/test/daemon/ops_unix.go new file mode 100644 index 0000000000000..41011e87c5c09 --- /dev/null +++ b/internal/test/daemon/ops_unix.go @@ -0,0 +1,34 @@ +// +build !windows + +package daemon + +import ( + "path/filepath" + "runtime" + + "github.com/docker/docker/daemon/images" + "github.com/docker/docker/layer" + + // register graph drivers + _ "github.com/docker/docker/daemon/graphdriver/register" + "github.com/docker/docker/pkg/idtools" +) + +// WithImageService sets imageService options +func WithImageService(d *Daemon) { + layerStores := make(map[string]layer.Store) + os := runtime.GOOS + layerStores[os], _ = layer.NewStoreFromOptions(layer.StoreOptions{ + Root: d.Root, + MetadataStorePathTemplate: filepath.Join(d.RootDir(), "image", "%s", "layerdb"), + GraphDriver: d.storageDriver, + GraphDriverOptions: nil, + IDMapping: &idtools.IdentityMapping{}, + PluginGetter: nil, + ExperimentalEnabled: false, + OS: os, + }) + d.imageService = images.NewImageService(images.ImageServiceConfig{ + LayerStores: layerStores, + }) +} diff --git a/layer/filestore.go b/layer/filestore.go index 208a0c3a859a4..a1726a5a404c4 100644 --- a/layer/filestore.go +++ b/layer/filestore.go @@ -14,7 +14,7 @@ import ( "github.com/docker/distribution" "github.com/docker/docker/pkg/ioutils" - "github.com/opencontainers/go-digest" + digest "github.com/opencontainers/go-digest" "github.com/pkg/errors" "github.com/sirupsen/logrus" ) @@ -305,6 +305,55 @@ func (fms *fileMetadataStore) GetMountParent(mount string) (ChainID, error) { return ChainID(dgst), nil } +func (fms *fileMetadataStore) getOrphan() ([]roLayer, error) { + var orphanLayers []roLayer + for _, algorithm := range supportedAlgorithms { + fileInfos, err := ioutil.ReadDir(filepath.Join(fms.root, string(algorithm))) + if err != nil { + if os.IsNotExist(err) { + continue + } + return nil, err + } + + for _, fi := range fileInfos { + if !fi.IsDir() || !strings.HasSuffix(fi.Name(), "-removing") { + continue + } + // At this stage, fi.Name value looks like --removing + // Split on '-' to get the digest value. + nameSplit := strings.Split(fi.Name(), "-") + dgst := digest.NewDigestFromEncoded(algorithm, nameSplit[0]) + if err := dgst.Validate(); err != nil { + logrus.WithError(err).WithField("digest", string(algorithm)+":"+nameSplit[0]).Debug("ignoring invalid digest") + continue + } + + chainFile := filepath.Join(fms.root, string(algorithm), fi.Name(), "cache-id") + contentBytes, err := ioutil.ReadFile(chainFile) + if err != nil { + if !os.IsNotExist(err) { + logrus.WithError(err).WithField("digest", dgst).Error("failed to read cache ID") + } + continue + } + cacheID := strings.TrimSpace(string(contentBytes)) + if cacheID == "" { + logrus.Error("invalid cache ID") + continue + } + + l := &roLayer{ + chainID: ChainID(dgst), + cacheID: cacheID, + } + orphanLayers = append(orphanLayers, *l) + } + } + + return orphanLayers, nil +} + func (fms *fileMetadataStore) List() ([]ChainID, []string, error) { var ids []ChainID for _, algorithm := range supportedAlgorithms { @@ -346,8 +395,39 @@ func (fms *fileMetadataStore) List() ([]ChainID, []string, error) { return ids, mounts, nil } -func (fms *fileMetadataStore) Remove(layer ChainID) error { - return os.RemoveAll(fms.getLayerDirectory(layer)) +// Remove layerdb folder if that is marked for removal +func (fms *fileMetadataStore) Remove(layer ChainID, cache string) error { + dgst := digest.Digest(layer) + files, err := ioutil.ReadDir(filepath.Join(fms.root, string(dgst.Algorithm()))) + if err != nil { + return err + } + for _, f := range files { + if !strings.HasSuffix(f.Name(), "-removing") || !strings.HasPrefix(f.Name(), dgst.String()) { + continue + } + + // Make sure that we only remove layerdb folder which points to + // requested cacheID + dir := filepath.Join(fms.root, string(dgst.Algorithm()), f.Name()) + chainFile := filepath.Join(dir, "cache-id") + contentBytes, err := ioutil.ReadFile(chainFile) + if err != nil { + logrus.WithError(err).WithField("file", chainFile).Error("cannot get cache ID") + continue + } + cacheID := strings.TrimSpace(string(contentBytes)) + if cacheID != cache { + continue + } + logrus.Debugf("Removing folder: %s", dir) + err = os.RemoveAll(dir) + if err != nil && !os.IsNotExist(err) { + logrus.WithError(err).WithField("name", f.Name()).Error("cannot remove layer") + continue + } + } + return nil } func (fms *fileMetadataStore) RemoveMount(mount string) error { diff --git a/layer/filestore_test.go b/layer/filestore_test.go index 498379e37fe2e..878dc54e3616e 100644 --- a/layer/filestore_test.go +++ b/layer/filestore_test.go @@ -10,6 +10,7 @@ import ( "syscall" "testing" + "github.com/docker/docker/pkg/stringid" "github.com/opencontainers/go-digest" ) @@ -102,3 +103,50 @@ func TestStartTransactionFailure(t *testing.T) { t.Fatal(err) } } + +func TestGetOrphan(t *testing.T) { + fms, td, cleanup := newFileMetadataStore(t) + defer cleanup() + + layerRoot := filepath.Join(td, "sha256") + if err := os.MkdirAll(layerRoot, 0755); err != nil { + t.Fatal(err) + } + + tx, err := fms.StartTransaction() + if err != nil { + t.Fatal(err) + } + + layerid := randomLayerID(5) + err = tx.Commit(layerid) + if err != nil { + t.Fatal(err) + } + layerPath := fms.getLayerDirectory(layerid) + if err := ioutil.WriteFile(filepath.Join(layerPath, "cache-id"), []byte(stringid.GenerateRandomID()), 0644); err != nil { + t.Fatal(err) + } + + orphanLayers, err := fms.getOrphan() + if err != nil { + t.Fatal(err) + } + if len(orphanLayers) != 0 { + t.Fatalf("Expected to have zero orphan layers") + } + + layeridSplit := strings.Split(layerid.String(), ":") + newPath := filepath.Join(layerRoot, fmt.Sprintf("%s-%s-removing", layeridSplit[1], stringid.GenerateRandomID())) + err = os.Rename(layerPath, newPath) + if err != nil { + t.Fatal(err) + } + orphanLayers, err = fms.getOrphan() + if err != nil { + t.Fatal(err) + } + if len(orphanLayers) != 1 { + t.Fatalf("Expected to have one orphan layer") + } +} diff --git a/layer/layer_store.go b/layer/layer_store.go index 81730e9d92116..7b8c011f4c07c 100644 --- a/layer/layer_store.go +++ b/layer/layer_store.go @@ -5,6 +5,8 @@ import ( "fmt" "io" "io/ioutil" + "os" + "path/filepath" "sync" "github.com/docker/distribution" @@ -415,11 +417,24 @@ func (ls *layerStore) Map() map[ChainID]Layer { } func (ls *layerStore) deleteLayer(layer *roLayer, metadata *Metadata) error { + // Rename layer digest folder first so we detect orphan layer(s) + // if ls.driver.Remove fails + dir := ls.store.getLayerDirectory(layer.chainID) + for { + dgst := digest.Digest(layer.chainID) + tmpID := fmt.Sprintf("%s-%s-removing", dgst.Hex(), stringid.GenerateRandomID()) + dir := filepath.Join(ls.store.root, string(dgst.Algorithm()), tmpID) + err := os.Rename(ls.store.getLayerDirectory(layer.chainID), dir) + if os.IsExist(err) { + continue + } + break + } err := ls.driver.Remove(layer.cacheID) if err != nil { return err } - err = ls.store.Remove(layer.chainID) + err = os.RemoveAll(dir) if err != nil { return err } @@ -452,12 +467,14 @@ func (ls *layerStore) releaseLayer(l *roLayer) ([]Metadata, error) { if l.hasReferences() { panic("cannot delete referenced layer") } + // Remove layer from layer map first so it is not considered to exist + // when if ls.deleteLayer fails. + delete(ls.layerMap, l.chainID) + var metadata Metadata if err := ls.deleteLayer(l, &metadata); err != nil { return nil, err } - - delete(ls.layerMap, l.chainID) removed = append(removed, metadata) if l.parent == nil { @@ -743,6 +760,23 @@ func (ls *layerStore) assembleTarTo(graphID string, metadata io.ReadCloser, size } func (ls *layerStore) Cleanup() error { + orphanLayers, err := ls.store.getOrphan() + if err != nil { + logrus.Errorf("Cannot get orphan layers: %v", err) + } + logrus.Debugf("found %v orphan layers", len(orphanLayers)) + for _, orphan := range orphanLayers { + logrus.Debugf("removing orphan layer, chain ID: %v , cache ID: %v", orphan.chainID, orphan.cacheID) + err = ls.driver.Remove(orphan.cacheID) + if err != nil && !os.IsNotExist(err) { + logrus.WithError(err).WithField("cache-id", orphan.cacheID).Error("cannot remove orphan layer") + continue + } + err = ls.store.Remove(orphan.chainID, orphan.cacheID) + if err != nil { + logrus.WithError(err).WithField("chain-id", orphan.chainID).Error("cannot remove orphan layer metadata") + } + } return ls.driver.Cleanup() }