From bc4a0959e4b3e5e681f83751152f3a529d701a07 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 26 Aug 2024 08:34:13 +0000 Subject: [PATCH 01/13] Add support for imagefs and fix multi stage cache probing --- pkg/executor/build.go | 108 +++++++++++++----- pkg/executor/build_test.go | 33 +++++- pkg/imagefs/imagefs.go | 218 +++++++++++++++++++++++++++++++++++++ pkg/util/util.go | 16 +++ 4 files changed, 343 insertions(+), 32 deletions(-) create mode 100644 pkg/imagefs/imagefs.go diff --git a/pkg/executor/build.go b/pkg/executor/build.go index 889da2a5b1..a41e0fd2c4 100644 --- a/pkg/executor/build.go +++ b/pkg/executor/build.go @@ -17,7 +17,10 @@ limitations under the License. package executor import ( + "archive/tar" "fmt" + "io" + "path" "path/filepath" "runtime" "sort" @@ -44,6 +47,7 @@ import ( "github.com/GoogleContainerTools/kaniko/pkg/filesystem" image_util "github.com/GoogleContainerTools/kaniko/pkg/image" "github.com/GoogleContainerTools/kaniko/pkg/image/remote" + "github.com/GoogleContainerTools/kaniko/pkg/imagefs" "github.com/GoogleContainerTools/kaniko/pkg/snapshot" "github.com/GoogleContainerTools/kaniko/pkg/timing" "github.com/GoogleContainerTools/kaniko/pkg/util" @@ -731,9 +735,10 @@ func (s *stageBuilder) saveLayerToImage(layer v1.Layer, createdBy string) error return err } -func CalculateDependencies(stages []config.KanikoStage, opts *config.KanikoOptions, stageNameToIdx map[string]string) (map[int][]string, error) { +func CalculateDependencies(stages []config.KanikoStage, opts *config.KanikoOptions, stageNameToIdx map[string]string) (map[int][]string, map[string][]string, error) { images := []v1.Image{} - depGraph := map[int][]string{} + stageDepGraph := map[int][]string{} + imageDepGraph := map[string][]string{} for _, s := range stages { ba := dockerfile.NewBuildArgs(opts.BuildArgs) ba.AddMetaArgs(s.MetaArgs) @@ -746,12 +751,12 @@ func CalculateDependencies(stages []config.KanikoStage, opts *config.KanikoOptio } else { image, err = image_util.RetrieveSourceImage(s, opts) if err != nil { - return nil, err + return nil, nil, err } } cfg, err := initializeConfig(image, opts) if err != nil { - return nil, err + return nil, nil, err } cmds, err := dockerfile.GetOnBuildInstructions(&cfg.Config, stageNameToIdx) @@ -761,29 +766,30 @@ func CalculateDependencies(stages []config.KanikoStage, opts *config.KanikoOptio switch cmd := c.(type) { case *instructions.CopyCommand: if cmd.From != "" { - i, err := strconv.Atoi(cmd.From) - if err != nil { - continue - } resolved, err := util.ResolveEnvironmentReplacementList(cmd.SourcesAndDest.SourcePaths, ba.ReplacementEnvs(cfg.Config.Env), true) if err != nil { - return nil, err + return nil, nil, err + } + i, err := strconv.Atoi(cmd.From) + if err == nil { + stageDepGraph[i] = append(stageDepGraph[i], resolved...) + } else { + imageDepGraph[cmd.From] = append(imageDepGraph[cmd.From], resolved...) } - depGraph[i] = append(depGraph[i], resolved...) } case *instructions.EnvCommand: if err := util.UpdateConfigEnv(cmd.Env, &cfg.Config, ba.ReplacementEnvs(cfg.Config.Env)); err != nil { - return nil, err + return nil, nil, err } image, err = mutate.Config(image, cfg.Config) if err != nil { - return nil, err + return nil, nil, err } case *instructions.ArgCommand: for _, arg := range cmd.Args { k, v, err := commands.ParseArg(arg.Key, arg.Value, cfg.Config.Env, ba) if err != nil { - return nil, err + return nil, nil, err } ba.AddArg(k, v) } @@ -791,7 +797,7 @@ func CalculateDependencies(stages []config.KanikoStage, opts *config.KanikoOptio } images = append(images, image) } - return depGraph, nil + return stageDepGraph, imageDepGraph, nil } // DoBuild executes building the Dockerfile @@ -816,15 +822,17 @@ func DoBuild(opts *config.KanikoOptions) (v1.Image, error) { return nil, err } - // Some stages may refer to other random images, not previous stages - if err := fetchExtraStages(kanikoStages, opts); err != nil { - return nil, err - } - crossStageDependencies, err := CalculateDependencies(kanikoStages, opts, stageNameToIdx) + crossStageDependencies, imageDependencies, err := CalculateDependencies(kanikoStages, opts, stageNameToIdx) if err != nil { return nil, err } logrus.Infof("Built cross stage deps: %v", crossStageDependencies) + logrus.Infof("Built image deps: %v", imageDependencies) + + // Some stages may refer to other random images, not previous stages + if err := fetchExtraStages(kanikoStages, opts, false, imageDependencies); err != nil { + return nil, err + } var args *dockerfile.BuildArgs @@ -959,15 +967,16 @@ func DoCacheProbe(opts *config.KanikoOptions) (v1.Image, error) { return nil, err } - // Some stages may refer to other random images, not previous stages - if err := fetchExtraStages(kanikoStages, opts); err != nil { - return nil, err - } - crossStageDependencies, err := CalculateDependencies(kanikoStages, opts, stageNameToIdx) + crossStageDependencies, imageDependencies, err := CalculateDependencies(kanikoStages, opts, stageNameToIdx) if err != nil { return nil, err } logrus.Infof("Built cross stage deps: %v", crossStageDependencies) + logrus.Infof("Built image deps: %v", imageDependencies) + // Some stages may refer to other random images, not previous stages + if err := fetchExtraStages(kanikoStages, opts, true, imageDependencies); err != nil { + return nil, err + } var args *dockerfile.BuildArgs @@ -1021,6 +1030,19 @@ func DoCacheProbe(opts *config.KanikoOptions) (v1.Image, error) { digestToCacheKey[d.String()] = sb.finalCacheKey logrus.Infof("Mapping digest %v to cachekey %v", d.String(), sb.finalCacheKey) + if filesToCache, ok := crossStageDependencies[sb.stage.Index]; ok { + ifs, err := imagefs.New( + filesystem.FS, + filepath.Join(config.KanikoDir, strconv.Itoa(sb.stage.Index)), + sourceImage, + filesToCache, + ) + if err != nil { + return nil, errors.Wrap(err, "could not create image filesystem") + } + filesystem.SetFS(ifs) + } + if stage.Final { sourceImage, err = mutateCanonicalWithoutLayerEdit(sourceImage) if err != nil { @@ -1143,7 +1165,7 @@ func deduplicatePaths(paths []string) []string { return deduped } -func fetchExtraStages(stages []config.KanikoStage, opts *config.KanikoOptions) error { +func fetchExtraStages(stages []config.KanikoStage, opts *config.KanikoOptions, cacheProbe bool, imageDependencies map[string][]string) error { t := timing.Start("Fetching Extra Stages") defer timing.DefaultRun.Stop(t) @@ -1177,8 +1199,21 @@ func fetchExtraStages(stages []config.KanikoStage, opts *config.KanikoOptions) e if err := saveStageAsTarball(c.From, sourceImage); err != nil { return err } - if err := extractImageToDependencyDir(c.From, sourceImage); err != nil { - return err + if !cacheProbe { + if err := extractImageToDependencyDir(c.From, sourceImage); err != nil { + return err + } + } else { + ifs, err := imagefs.New( + filesystem.FS, + filepath.Join(config.KanikoDir, c.From), + sourceImage, + imageDependencies[c.From], + ) + if err != nil { + return errors.Wrap(err, "could not create image filesystem") + } + filesystem.SetFS(ifs) } } // Store the name of the current stage in the list with names, if applicable. @@ -1210,6 +1245,25 @@ func extractImageToDependencyDir(name string, image v1.Image) error { return err } +func extractImageFilesToStageDir(stage int, image v1.Image, files []string) error { + t := timing.Start("Extracting Image Files to Stage Dir") + defer timing.DefaultRun.Stop(t) + stageDir := filepath.Join(config.KanikoDir, fmt.Sprintf("%d", stage)) + if err := filesystem.MkdirAll(stageDir, 0o755); err != nil { + return err + } + _, err := util.GetFSFromImage(stageDir, image, func(dest string, hdr *tar.Header, cleanedName string, tr io.Reader) error { + for _, f := range files { + if ok, err := path.Match(f, "/"+cleanedName); ok && err == nil { + logrus.Infof("Extracting %s", cleanedName) + return util.ExtractFile(dest, hdr, cleanedName, tr) + } + } + return nil + }) + return err +} + func saveStageAsTarball(path string, image v1.Image) error { t := timing.Start("Saving stage as tarball") defer timing.DefaultRun.Stop(t) diff --git a/pkg/executor/build_test.go b/pkg/executor/build_test.go index fadf66ad7c..02db6e76cd 100644 --- a/pkg/executor/build_test.go +++ b/pkg/executor/build_test.go @@ -216,9 +216,10 @@ func TestCalculateDependencies(t *testing.T) { mockInitConfig func(partial.WithConfigFile, *config.KanikoOptions) (*v1.ConfigFile, error) } tests := []struct { - name string - args args - want map[int][]string + name string + args args + want map[int][]string + wantImage map[string][]string }{ { name: "no deps", @@ -359,9 +360,27 @@ COPY --from=second /bar /bat 1: {"/bar"}, }, }, + { + name: "dependency from image", + args: args{ + dockerfile: ` +FROM scratch as target +COPY --from=alpine /etc/alpine-release /etc/alpine-release +`, + }, + wantImage: map[string][]string{ + "alpine": {"/etc/alpine-release"}, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + if tt.want == nil { + tt.want = map[int][]string{} + } + if tt.wantImage == nil { + tt.wantImage = map[string][]string{} + } if tt.args.mockInitConfig != nil { original := initializeConfig defer func() { initializeConfig = original }() @@ -385,14 +404,18 @@ COPY --from=second /bar /bat } stageNameToIdx := ResolveCrossStageInstructions(kanikoStages) - got, err := CalculateDependencies(kanikoStages, opts, stageNameToIdx) + got, gotImage, err := CalculateDependencies(kanikoStages, opts, stageNameToIdx) if err != nil { t.Errorf("got error: %s,", err) } if !reflect.DeepEqual(got, tt.want) { diff := cmp.Diff(got, tt.want) - t.Errorf("CalculateDependencies() = %v, want %v, diff %v", got, tt.want, diff) + t.Errorf("CalculateDependencies() crossStageDependencies = %v, want %v, diff %v", got, tt.want, diff) + } + if !reflect.DeepEqual(gotImage, tt.wantImage) { + diff := cmp.Diff(gotImage, tt.wantImage) + t.Errorf("CalculateDependencies() imageDependencies = %v, wantImage %v, diff %v", gotImage, tt.wantImage, diff) } }) } diff --git a/pkg/imagefs/imagefs.go b/pkg/imagefs/imagefs.go new file mode 100644 index 0000000000..e24c65640a --- /dev/null +++ b/pkg/imagefs/imagefs.go @@ -0,0 +1,218 @@ +package imagefs + +import ( + "archive/tar" + "crypto/md5" + "fmt" + "io" + "io/fs" + "os" + "path/filepath" + "strconv" + "strings" + "syscall" + + v1 "github.com/google/go-containerregistry/pkg/v1" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "github.com/twpayne/go-vfs/v5" + + "github.com/GoogleContainerTools/kaniko/pkg/util" +) + +type imageFS struct { + vfs.FS + image map[string]v1.Image + dirs map[string]*cachedDir + files map[string]*cachedFileInfo +} + +func New(parent vfs.FS, root string, image v1.Image, filesToCache []string) (vfs.FS, error) { + var ifs *imageFS + + // Multiple layers of imageFS might get confusing, enable delayering. + if fs, ok := parent.(*imageFS); ok { + if _, ok := fs.image[root]; ok { + return nil, fmt.Errorf("imagefs: root already exists: %s", root) + } + fs.image[root] = image + ifs = fs + } else { + ifs = &imageFS{ + FS: vfs.NewReadOnlyFS(parent), + image: map[string]v1.Image{root: image}, + dirs: make(map[string]*cachedDir), + files: make(map[string]*cachedFileInfo), + } + } + + // Walk the image and cache file info and hash of the requested files. + _, err := util.GetFSFromImage(root, image, func(dest string, hdr *tar.Header, cleanedName string, tr io.Reader) error { + for _, f := range filesToCache { + dest := filepath.Join(root, cleanedName) + + // Check if the file matches the requested file. + if ok, err := filepath.Match(f, "/"+cleanedName); ok && err == nil { + logrus.Debugf("imagefs: Found cacheable file %q (%s) (%d:%d)", f, dest, hdr.Uid, hdr.Gid) + + f := newCachedFileInfo(hdr) + + // Hash the file, implementation must match util.CacheHasher. + h := md5.New() + h.Write([]byte(f.Mode().String())) + h.Write([]byte(strconv.FormatUint(uint64(hdr.Uid), 36))) + h.Write([]byte(",")) + h.Write([]byte(strconv.FormatUint(uint64(hdr.Gid), 36))) + if f.Mode().IsRegular() { + if _, err := io.Copy(h, tr); err != nil { + return err + } + } else if f.Mode()&os.ModeSymlink == os.ModeSymlink { + h.Write([]byte(hdr.Linkname)) + } + f.md5sum = h.Sum(nil) + + ifs.files[dest] = f + + return nil + } + + // Parent directories are needed for lookup. + if cleanedName == "/" || strings.HasPrefix(f, "/"+cleanedName+"/") { + logrus.Debugf("imagefs: Found cacheable file parent %q (%s)", f, dest) + + ifs.files[dest] = newCachedFileInfo(hdr) + } + } + return nil + }) + if err != nil { + return nil, errors.Wrap(err, "failed to walk image") + } + + for dir, d := range ifs.files { + if !d.IsDir() { + continue + } + ifs.dirs[dir] = &cachedDir{FileInfo: d} + for name, fi := range ifs.files { + if filepath.Dir(name) == dir { + ifs.dirs[dir].entry = append(ifs.dirs[dir].entry, fi) + } + } + } + + return ifs, nil +} + +func (ifs *imageFS) Open(name string) (fs.File, error) { + logrus.Debugf("imagefs: Open file %s", name) + if f, err := ifs.FS.Open(name); err == nil { + return f, nil + } + if ifs.files[name] != nil { + logrus.Debugf("imagefs: Open cached file %s", name) + return ifs.files[name], nil + } + return nil, fs.ErrNotExist +} + +func (ifs *imageFS) Lstat(name string) (fs.FileInfo, error) { + logrus.Debugf("imagefs: Lstat file %s", name) + if fi, err := ifs.FS.Lstat(name); err == nil { + return fi, nil + } + if ifs.files[name] != nil { + logrus.Debugf("imagefs: Lstat cached file %s", name) + return ifs.files[name], nil + } + return nil, fs.ErrNotExist +} + +func (ifs *imageFS) Stat(name string) (fs.FileInfo, error) { + logrus.Debugf("imagefs: Stat file %s", name) + if fi, err := ifs.FS.Stat(name); err == nil { + return fi, nil + } + if ifs.files[name] != nil { + logrus.Debugf("imagefs: Stat cached file %s", name) + return ifs.files[name], nil + } + return nil, fs.ErrNotExist +} + +func (ifs *imageFS) ReadDir(name string) ([]fs.DirEntry, error) { + logrus.Debugf("imagefs: Reading directory %s", name) + if de, err := ifs.FS.ReadDir(name); err == nil { + return de, nil + } + for dir, d := range ifs.dirs { + if ok, err := filepath.Match(name, dir); ok && err == nil { + logrus.Debugf("imagefs: Reading cached directory %s", name) + return d.entry, nil + } + } + return nil, fs.ErrNotExist +} + +type cachedDir struct { + fs.FileInfo + entry []fs.DirEntry +} + +type cachedFileInfo struct { + fs.FileInfo + hdr *tar.Header + sys interface{} + md5sum []byte +} + +// Ensure that cachedFileInfo implements the CacheHasherFileInfoSum interface. +var _ util.CacheHasherFileInfoSum = &cachedFileInfo{} + +func newCachedFileInfo(hdr *tar.Header) *cachedFileInfo { + fi := hdr.FileInfo() + return &cachedFileInfo{ + FileInfo: fi, + hdr: hdr, + sys: &syscall.Stat_t{ + Mode: uint32(fi.Mode()), + Uid: uint32(hdr.Uid), + Gid: uint32(hdr.Gid), + }, + } +} + +func (cf *cachedFileInfo) Sys() interface{} { + logrus.Debugf("imagefs: Sys cached file %s", cf.Name()) + return cf.sys +} + +func (cf *cachedFileInfo) Stat() (fs.FileInfo, error) { + logrus.Debugf("imagefs: Stat cached file %s", cf.Name()) + return cf, nil +} + +func (cf *cachedFileInfo) Read(p []byte) (n int, err error) { + panic("imagefs: Read cached file is not allowed") +} + +func (cf *cachedFileInfo) MD5Sum() ([]byte, error) { + logrus.Debugf("imagefs: MD5Sum cached file %s", cf.Name()) + return cf.md5sum, nil +} + +func (cf *cachedFileInfo) Type() fs.FileMode { + logrus.Debugf("imagefs: Type cached file %s", cf.Name()) + return cf.Mode() +} + +func (cf *cachedFileInfo) Info() (fs.FileInfo, error) { + logrus.Debugf("imagefs: Info cached file %s", cf.Name()) + return cf, nil +} + +func (cf *cachedFileInfo) Close() error { + logrus.Debugf("imagefs: Close cached file %s", cf.Name()) + return nil +} diff --git a/pkg/util/util.go b/pkg/util/util.go index 6c6acc4ee9..7501984fbe 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -88,6 +88,13 @@ func Hasher() func(string) (string, error) { return hasher } +// CacheHasherFileInfoSum is an interface for getting the MD5 sum of a file. +// This can be implemented by the concrete fs.FileInfo type to avoid reading the +// file contents. +type CacheHasherFileInfoSum interface { + MD5Sum() ([]byte, error) +} + // CacheHasher takes into account everything the regular hasher does except for mtime func CacheHasher() func(string) (string, error) { hasher := func(p string) (string, error) { @@ -96,6 +103,15 @@ func CacheHasher() func(string) (string, error) { if err != nil { return "", err } + + if fh, ok := fi.(CacheHasherFileInfoSum); ok { + b, err := fh.MD5Sum() + if err != nil { + return "", err + } + return hex.EncodeToString(b), nil + } + h.Write([]byte(fi.Mode().String())) // Cian: this is a disgusting hack, but it removes the need for the From b98e5408fef2765d268c7246a7c0244937534e83 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 26 Aug 2024 12:16:28 +0000 Subject: [PATCH 02/13] blurb --- pkg/imagefs/imagefs.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pkg/imagefs/imagefs.go b/pkg/imagefs/imagefs.go index e24c65640a..fd40e709df 100644 --- a/pkg/imagefs/imagefs.go +++ b/pkg/imagefs/imagefs.go @@ -1,3 +1,19 @@ +/* +Copyright 2018 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package imagefs import ( From e2ad0e67022f2e7c6e8bafd85b8247bf9eaca625 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 26 Aug 2024 12:28:35 +0000 Subject: [PATCH 03/13] remove unused mode --- pkg/imagefs/imagefs.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pkg/imagefs/imagefs.go b/pkg/imagefs/imagefs.go index fd40e709df..1907a4dcf8 100644 --- a/pkg/imagefs/imagefs.go +++ b/pkg/imagefs/imagefs.go @@ -192,9 +192,8 @@ func newCachedFileInfo(hdr *tar.Header) *cachedFileInfo { FileInfo: fi, hdr: hdr, sys: &syscall.Stat_t{ - Mode: uint32(fi.Mode()), - Uid: uint32(hdr.Uid), - Gid: uint32(hdr.Gid), + Uid: uint32(hdr.Uid), + Gid: uint32(hdr.Gid), }, } } From 83510bf9281434288a6f93029765c5a8bb80b123 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 26 Aug 2024 12:44:55 +0000 Subject: [PATCH 04/13] cleanup --- pkg/executor/build.go | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/pkg/executor/build.go b/pkg/executor/build.go index a41e0fd2c4..42c02a7124 100644 --- a/pkg/executor/build.go +++ b/pkg/executor/build.go @@ -17,10 +17,7 @@ limitations under the License. package executor import ( - "archive/tar" "fmt" - "io" - "path" "path/filepath" "runtime" "sort" @@ -1245,25 +1242,6 @@ func extractImageToDependencyDir(name string, image v1.Image) error { return err } -func extractImageFilesToStageDir(stage int, image v1.Image, files []string) error { - t := timing.Start("Extracting Image Files to Stage Dir") - defer timing.DefaultRun.Stop(t) - stageDir := filepath.Join(config.KanikoDir, fmt.Sprintf("%d", stage)) - if err := filesystem.MkdirAll(stageDir, 0o755); err != nil { - return err - } - _, err := util.GetFSFromImage(stageDir, image, func(dest string, hdr *tar.Header, cleanedName string, tr io.Reader) error { - for _, f := range files { - if ok, err := path.Match(f, "/"+cleanedName); ok && err == nil { - logrus.Infof("Extracting %s", cleanedName) - return util.ExtractFile(dest, hdr, cleanedName, tr) - } - } - return nil - }) - return err -} - func saveStageAsTarball(path string, image v1.Image) error { t := timing.Start("Saving stage as tarball") defer timing.DefaultRun.Stop(t) From e585662a2a10cea3de41f1f2fb3a265ceb4bb1d0 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 26 Aug 2024 13:17:58 +0000 Subject: [PATCH 05/13] Fix code review comments --- pkg/imagefs/imagefs.go | 74 +++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 27 deletions(-) diff --git a/pkg/imagefs/imagefs.go b/pkg/imagefs/imagefs.go index 1907a4dcf8..cfec3039fd 100644 --- a/pkg/imagefs/imagefs.go +++ b/pkg/imagefs/imagefs.go @@ -40,25 +40,31 @@ type imageFS struct { vfs.FS image map[string]v1.Image dirs map[string]*cachedDir - files map[string]*cachedFileInfo + files map[string]imageFSFile +} + +type imageFSFile interface { + fs.File + fs.FileInfo + fs.DirEntry } func New(parent vfs.FS, root string, image v1.Image, filesToCache []string) (vfs.FS, error) { var ifs *imageFS // Multiple layers of imageFS might get confusing, enable delayering. - if fs, ok := parent.(*imageFS); ok { - if _, ok := fs.image[root]; ok { + if pfs, ok := parent.(*imageFS); ok { + if _, ok := pfs.image[root]; ok { return nil, fmt.Errorf("imagefs: root already exists: %s", root) } - fs.image[root] = image - ifs = fs + pfs.image[root] = image + ifs = pfs } else { ifs = &imageFS{ FS: vfs.NewReadOnlyFS(parent), image: map[string]v1.Image{root: image}, dirs: make(map[string]*cachedDir), - files: make(map[string]*cachedFileInfo), + files: make(map[string]imageFSFile), } } @@ -71,7 +77,7 @@ func New(parent vfs.FS, root string, image v1.Image, filesToCache []string) (vfs if ok, err := filepath.Match(f, "/"+cleanedName); ok && err == nil { logrus.Debugf("imagefs: Found cacheable file %q (%s) (%d:%d)", f, dest, hdr.Uid, hdr.Gid) - f := newCachedFileInfo(hdr) + f := newCachedFileInfo(dest, hdr) // Hash the file, implementation must match util.CacheHasher. h := md5.New() @@ -86,9 +92,8 @@ func New(parent vfs.FS, root string, image v1.Image, filesToCache []string) (vfs } else if f.Mode()&os.ModeSymlink == os.ModeSymlink { h.Write([]byte(hdr.Linkname)) } - f.md5sum = h.Sum(nil) - ifs.files[dest] = f + ifs.files[dest] = newCachedFileInfoWithMD5Sum(f, h.Sum(nil)) return nil } @@ -177,21 +182,21 @@ type cachedDir struct { } type cachedFileInfo struct { + path string fs.FileInfo - hdr *tar.Header - sys interface{} - md5sum []byte + hdr *tar.Header + sys interface{} } -// Ensure that cachedFileInfo implements the CacheHasherFileInfoSum interface. -var _ util.CacheHasherFileInfoSum = &cachedFileInfo{} - -func newCachedFileInfo(hdr *tar.Header) *cachedFileInfo { +func newCachedFileInfo(path string, hdr *tar.Header) *cachedFileInfo { fi := hdr.FileInfo() return &cachedFileInfo{ FileInfo: fi, + path: path, hdr: hdr, sys: &syscall.Stat_t{ + // NOTE(mafredri): We only set the fields that are used by kaniko. + // This is not a complete implementation of syscall.Stat_t. Uid: uint32(hdr.Uid), Gid: uint32(hdr.Gid), }, @@ -199,35 +204,50 @@ func newCachedFileInfo(hdr *tar.Header) *cachedFileInfo { } func (cf *cachedFileInfo) Sys() interface{} { - logrus.Debugf("imagefs: Sys cached file %s", cf.Name()) + logrus.Debugf("imagefs: Sys cached file: %s", cf.path) return cf.sys } func (cf *cachedFileInfo) Stat() (fs.FileInfo, error) { - logrus.Debugf("imagefs: Stat cached file %s", cf.Name()) + logrus.Debugf("imagefs: Stat cached file: %s", cf.path) return cf, nil } func (cf *cachedFileInfo) Read(p []byte) (n int, err error) { - panic("imagefs: Read cached file is not allowed") -} - -func (cf *cachedFileInfo) MD5Sum() ([]byte, error) { - logrus.Debugf("imagefs: MD5Sum cached file %s", cf.Name()) - return cf.md5sum, nil + return 0, fmt.Errorf("imagefs: Read cached file is not allowed: %s", cf.path) } func (cf *cachedFileInfo) Type() fs.FileMode { - logrus.Debugf("imagefs: Type cached file %s", cf.Name()) + logrus.Debugf("imagefs: Type cached file: %s", cf.path) return cf.Mode() } func (cf *cachedFileInfo) Info() (fs.FileInfo, error) { - logrus.Debugf("imagefs: Info cached file %s", cf.Name()) + logrus.Debugf("imagefs: Info cached file: %s", cf.path) return cf, nil } func (cf *cachedFileInfo) Close() error { - logrus.Debugf("imagefs: Close cached file %s", cf.Name()) + logrus.Debugf("imagefs: Close cached file: %s", cf.path) return nil } + +type cachedFileInfoWithMD5Sum struct { + *cachedFileInfo + md5sum []byte +} + +func newCachedFileInfoWithMD5Sum(fi *cachedFileInfo, md5sum []byte) *cachedFileInfoWithMD5Sum { + return &cachedFileInfoWithMD5Sum{ + cachedFileInfo: fi, + md5sum: md5sum, + } +} + +// Ensure that cachedFileInfo implements the CacheHasherFileInfoSum interface. +var _ util.CacheHasherFileInfoSum = &cachedFileInfoWithMD5Sum{} + +func (cf *cachedFileInfoWithMD5Sum) MD5Sum() ([]byte, error) { + logrus.Debugf("imagefs: MD5Sum cached file: %s", cf.path) + return cf.md5sum, nil +} From 32937a1bb4698b29e4e044f77a4d1535195758c4 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Tue, 27 Aug 2024 09:11:30 +0000 Subject: [PATCH 06/13] pr fixes --- pkg/executor/build.go | 6 ++++-- pkg/imagefs/imagefs.go | 43 +++++++++++++++++++++++++----------------- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/pkg/executor/build.go b/pkg/executor/build.go index 42c02a7124..299b3519d0 100644 --- a/pkg/executor/build.go +++ b/pkg/executor/build.go @@ -735,6 +735,8 @@ func (s *stageBuilder) saveLayerToImage(layer v1.Layer, createdBy string) error func CalculateDependencies(stages []config.KanikoStage, opts *config.KanikoOptions, stageNameToIdx map[string]string) (map[int][]string, map[string][]string, error) { images := []v1.Image{} stageDepGraph := map[int][]string{} + // imageDepGraph tracks stage dependencies from non-stage + // images for use by imagefs to avoid extraction. imageDepGraph := map[string][]string{} for _, s := range stages { ba := dockerfile.NewBuildArgs(opts.BuildArgs) @@ -828,7 +830,7 @@ func DoBuild(opts *config.KanikoOptions) (v1.Image, error) { // Some stages may refer to other random images, not previous stages if err := fetchExtraStages(kanikoStages, opts, false, imageDependencies); err != nil { - return nil, err + return nil, errors.Wrap(err, "fetch extra stages failed") } var args *dockerfile.BuildArgs @@ -972,7 +974,7 @@ func DoCacheProbe(opts *config.KanikoOptions) (v1.Image, error) { logrus.Infof("Built image deps: %v", imageDependencies) // Some stages may refer to other random images, not previous stages if err := fetchExtraStages(kanikoStages, opts, true, imageDependencies); err != nil { - return nil, err + return nil, errors.Wrap(err, "fetch extra stages failed") } var args *dockerfile.BuildArgs diff --git a/pkg/imagefs/imagefs.go b/pkg/imagefs/imagefs.go index cfec3039fd..20e805a685 100644 --- a/pkg/imagefs/imagefs.go +++ b/pkg/imagefs/imagefs.go @@ -77,23 +77,13 @@ func New(parent vfs.FS, root string, image v1.Image, filesToCache []string) (vfs if ok, err := filepath.Match(f, "/"+cleanedName); ok && err == nil { logrus.Debugf("imagefs: Found cacheable file %q (%s) (%d:%d)", f, dest, hdr.Uid, hdr.Gid) - f := newCachedFileInfo(dest, hdr) - - // Hash the file, implementation must match util.CacheHasher. - h := md5.New() - h.Write([]byte(f.Mode().String())) - h.Write([]byte(strconv.FormatUint(uint64(hdr.Uid), 36))) - h.Write([]byte(",")) - h.Write([]byte(strconv.FormatUint(uint64(hdr.Gid), 36))) - if f.Mode().IsRegular() { - if _, err := io.Copy(h, tr); err != nil { - return err - } - } else if f.Mode()&os.ModeSymlink == os.ModeSymlink { - h.Write([]byte(hdr.Linkname)) + sum, err := hashFile(hdr, tr) + if err != nil { + return errors.Wrap(err, "imagefs: hash file failed") } - ifs.files[dest] = newCachedFileInfoWithMD5Sum(f, h.Sum(nil)) + f := newCachedFileInfo(dest, hdr) + ifs.files[dest] = newCachedFileInfoWithMD5Sum(f, sum) return nil } @@ -102,13 +92,13 @@ func New(parent vfs.FS, root string, image v1.Image, filesToCache []string) (vfs if cleanedName == "/" || strings.HasPrefix(f, "/"+cleanedName+"/") { logrus.Debugf("imagefs: Found cacheable file parent %q (%s)", f, dest) - ifs.files[dest] = newCachedFileInfo(hdr) + ifs.files[dest] = newCachedFileInfo(dest, hdr) } } return nil }) if err != nil { - return nil, errors.Wrap(err, "failed to walk image") + return nil, errors.Wrap(err, "imagefs: walk image failed") } for dir, d := range ifs.files { @@ -251,3 +241,22 @@ func (cf *cachedFileInfoWithMD5Sum) MD5Sum() ([]byte, error) { logrus.Debugf("imagefs: MD5Sum cached file: %s", cf.path) return cf.md5sum, nil } + +// hashFile hashes the gievn file, implementation must match util.CacheHasher. +func hashFile(hdr *tar.Header, r io.Reader) ([]byte, error) { + fi := hdr.FileInfo() + + h := md5.New() + h.Write([]byte(fi.Mode().String())) + h.Write([]byte(strconv.FormatUint(uint64(fi.Sys().(*syscall.Stat_t).Uid), 36))) + h.Write([]byte(",")) + h.Write([]byte(strconv.FormatUint(uint64(fi.Sys().(*syscall.Stat_t).Gid), 36))) + if fi.Mode().IsRegular() { + if _, err := io.Copy(h, r); err != nil { + return nil, errors.Wrap(err, "imagefs: copy file content failed") + } + } else if fi.Mode()&os.ModeSymlink == os.ModeSymlink { + h.Write([]byte(hdr.Linkname)) + } + return h.Sum(nil), nil +} From a298d5a71e8d22c61afc0fdb90f8475f1d72f4cf Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Tue, 27 Aug 2024 09:35:14 +0000 Subject: [PATCH 07/13] cleanup sys --- pkg/imagefs/imagefs.go | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/pkg/imagefs/imagefs.go b/pkg/imagefs/imagefs.go index 20e805a685..c6cfebf024 100644 --- a/pkg/imagefs/imagefs.go +++ b/pkg/imagefs/imagefs.go @@ -27,6 +27,7 @@ import ( "strconv" "strings" "syscall" + "time" v1 "github.com/google/go-containerregistry/pkg/v1" "github.com/pkg/errors" @@ -179,17 +180,11 @@ type cachedFileInfo struct { } func newCachedFileInfo(path string, hdr *tar.Header) *cachedFileInfo { - fi := hdr.FileInfo() return &cachedFileInfo{ - FileInfo: fi, + FileInfo: hdr.FileInfo(), path: path, hdr: hdr, - sys: &syscall.Stat_t{ - // NOTE(mafredri): We only set the fields that are used by kaniko. - // This is not a complete implementation of syscall.Stat_t. - Uid: uint32(hdr.Uid), - Gid: uint32(hdr.Gid), - }, + sys: tarHeaderToStat_t(hdr), } } @@ -242,6 +237,24 @@ func (cf *cachedFileInfoWithMD5Sum) MD5Sum() ([]byte, error) { return cf.md5sum, nil } +// tarHeaderToStat_t converts a tar.Header to a syscall.Stat_t. +func tarHeaderToStat_t(hdr *tar.Header) *syscall.Stat_t { + fi := hdr.FileInfo() + return &syscall.Stat_t{ + Mode: uint32(fi.Mode()), + Uid: uint32(hdr.Uid), + Gid: uint32(hdr.Gid), + Size: fi.Size(), + Atim: timespec(hdr.AccessTime), + Ctim: timespec(hdr.ChangeTime), + Mtim: timespec(fi.ModTime()), + } +} + +func timespec(t time.Time) syscall.Timespec { + return syscall.Timespec{Sec: t.Unix(), Nsec: int64(t.Nanosecond())} +} + // hashFile hashes the gievn file, implementation must match util.CacheHasher. func hashFile(hdr *tar.Header, r io.Reader) ([]byte, error) { fi := hdr.FileInfo() From 0f87b47e6bc844ce550ce0041004bb6a9b17b4d0 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Tue, 27 Aug 2024 09:40:17 +0000 Subject: [PATCH 08/13] protect maps with mutex --- pkg/imagefs/imagefs.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pkg/imagefs/imagefs.go b/pkg/imagefs/imagefs.go index c6cfebf024..13f9c7610a 100644 --- a/pkg/imagefs/imagefs.go +++ b/pkg/imagefs/imagefs.go @@ -26,6 +26,7 @@ import ( "path/filepath" "strconv" "strings" + "sync" "syscall" "time" @@ -39,6 +40,8 @@ import ( type imageFS struct { vfs.FS + + mu sync.RWMutex // Protects following. image map[string]v1.Image dirs map[string]*cachedDir files map[string]imageFSFile @@ -55,6 +58,9 @@ func New(parent vfs.FS, root string, image v1.Image, filesToCache []string) (vfs // Multiple layers of imageFS might get confusing, enable delayering. if pfs, ok := parent.(*imageFS); ok { + pfs.mu.Lock() + defer pfs.mu.Unlock() + if _, ok := pfs.image[root]; ok { return nil, fmt.Errorf("imagefs: root already exists: %s", root) } @@ -122,6 +128,9 @@ func (ifs *imageFS) Open(name string) (fs.File, error) { if f, err := ifs.FS.Open(name); err == nil { return f, nil } + + ifs.mu.RLock() + defer ifs.mu.RUnlock() if ifs.files[name] != nil { logrus.Debugf("imagefs: Open cached file %s", name) return ifs.files[name], nil @@ -134,6 +143,9 @@ func (ifs *imageFS) Lstat(name string) (fs.FileInfo, error) { if fi, err := ifs.FS.Lstat(name); err == nil { return fi, nil } + + ifs.mu.RLock() + defer ifs.mu.RUnlock() if ifs.files[name] != nil { logrus.Debugf("imagefs: Lstat cached file %s", name) return ifs.files[name], nil @@ -146,6 +158,9 @@ func (ifs *imageFS) Stat(name string) (fs.FileInfo, error) { if fi, err := ifs.FS.Stat(name); err == nil { return fi, nil } + + ifs.mu.RLock() + defer ifs.mu.RUnlock() if ifs.files[name] != nil { logrus.Debugf("imagefs: Stat cached file %s", name) return ifs.files[name], nil @@ -158,6 +173,9 @@ func (ifs *imageFS) ReadDir(name string) ([]fs.DirEntry, error) { if de, err := ifs.FS.ReadDir(name); err == nil { return de, nil } + + ifs.mu.RLock() + defer ifs.mu.RUnlock() for dir, d := range ifs.dirs { if ok, err := filepath.Match(name, dir); ok && err == nil { logrus.Debugf("imagefs: Reading cached directory %s", name) From da6fccdf3e883e9e13b991b17fd795a4b097b6bf Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Tue, 27 Aug 2024 09:42:15 +0000 Subject: [PATCH 09/13] cleanup --- pkg/imagefs/imagefs.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/imagefs/imagefs.go b/pkg/imagefs/imagefs.go index 13f9c7610a..19de8f5026 100644 --- a/pkg/imagefs/imagefs.go +++ b/pkg/imagefs/imagefs.go @@ -191,10 +191,10 @@ type cachedDir struct { } type cachedFileInfo struct { - path string fs.FileInfo - hdr *tar.Header - sys interface{} + path string + hdr *tar.Header + sys *syscall.Stat_t } func newCachedFileInfo(path string, hdr *tar.Header) *cachedFileInfo { From 0ef352dd12fdd9bd207fda485ec51b2a1eb2774b Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Tue, 27 Aug 2024 11:28:18 +0000 Subject: [PATCH 10/13] fix cleanup --- pkg/imagefs/imagefs.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/imagefs/imagefs.go b/pkg/imagefs/imagefs.go index 19de8f5026..95e9f2005c 100644 --- a/pkg/imagefs/imagefs.go +++ b/pkg/imagefs/imagefs.go @@ -279,9 +279,9 @@ func hashFile(hdr *tar.Header, r io.Reader) ([]byte, error) { h := md5.New() h.Write([]byte(fi.Mode().String())) - h.Write([]byte(strconv.FormatUint(uint64(fi.Sys().(*syscall.Stat_t).Uid), 36))) + h.Write([]byte(strconv.FormatUint(uint64(hdr.Uid), 36))) h.Write([]byte(",")) - h.Write([]byte(strconv.FormatUint(uint64(fi.Sys().(*syscall.Stat_t).Gid), 36))) + h.Write([]byte(strconv.FormatUint(uint64(hdr.Gid), 36))) if fi.Mode().IsRegular() { if _, err := io.Copy(h, r); err != nil { return nil, errors.Wrap(err, "imagefs: copy file content failed") From 7ed3388f2ef29ef3994d947c5300e7c878e3b174 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Tue, 27 Aug 2024 16:01:07 +0300 Subject: [PATCH 11/13] Apply suggestions from code review Co-authored-by: Cian Johnston --- pkg/executor/build.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/executor/build.go b/pkg/executor/build.go index 299b3519d0..596b2be4a9 100644 --- a/pkg/executor/build.go +++ b/pkg/executor/build.go @@ -830,7 +830,7 @@ func DoBuild(opts *config.KanikoOptions) (v1.Image, error) { // Some stages may refer to other random images, not previous stages if err := fetchExtraStages(kanikoStages, opts, false, imageDependencies); err != nil { - return nil, errors.Wrap(err, "fetch extra stages failed") + return nil, errors.Wrap(err, "fetch extra stages failed") } var args *dockerfile.BuildArgs @@ -974,7 +974,7 @@ func DoCacheProbe(opts *config.KanikoOptions) (v1.Image, error) { logrus.Infof("Built image deps: %v", imageDependencies) // Some stages may refer to other random images, not previous stages if err := fetchExtraStages(kanikoStages, opts, true, imageDependencies); err != nil { - return nil, errors.Wrap(err, "fetch extra stages failed") + return nil, errors.Wrap(err, "fetch extra stages failed") } var args *dockerfile.BuildArgs From 5f3faef8390bf3f214a4fb401388226aa86e043a Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Thu, 29 Aug 2024 09:18:49 +0000 Subject: [PATCH 12/13] Enable multi stage test for cache probe --- pkg/executor/cache_probe_test.go | 6 +++--- pkg/imagefs/imagefs.go | 8 ++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/pkg/executor/cache_probe_test.go b/pkg/executor/cache_probe_test.go index ce13a16c71..e028db9813 100644 --- a/pkg/executor/cache_probe_test.go +++ b/pkg/executor/cache_probe_test.go @@ -165,8 +165,6 @@ COPY foo/baz.txt copied/ }) t.Run("MultiStage", func(t *testing.T) { - t.Skip("TODO: https://github.com/coder/envbuilder/issues/230") - // Share cache between both builds. regCache := setupCacheRegistry(t) @@ -175,10 +173,12 @@ COPY foo/baz.txt copied/ dockerFile := ` FROM scratch as first COPY foo/bam.txt copied/ + COPY foo/bam.link copied/ ENV test test From scratch as second - COPY --from=first copied/bam.txt output/bam.txt` + COPY --from=first copied/bam.txt output/bam.txt + COPY --from=first copied/bam.link output/bam.link` err := filesystem.WriteFile(filepath.Join(testDir, "workspace", "Dockerfile"), []byte(dockerFile), 0o755) testutil.CheckNoError(t, err) opts := &config.KanikoOptions{ diff --git a/pkg/imagefs/imagefs.go b/pkg/imagefs/imagefs.go index 95e9f2005c..05fe13896c 100644 --- a/pkg/imagefs/imagefs.go +++ b/pkg/imagefs/imagefs.go @@ -77,11 +77,15 @@ func New(parent vfs.FS, root string, image v1.Image, filesToCache []string) (vfs // Walk the image and cache file info and hash of the requested files. _, err := util.GetFSFromImage(root, image, func(dest string, hdr *tar.Header, cleanedName string, tr io.Reader) error { + // Trim prefix for consistent path. + cleanedName = strings.TrimPrefix(cleanedName, "/") + for _, f := range filesToCache { dest := filepath.Join(root, cleanedName) + f = strings.TrimPrefix(f, "/") // Check if the file matches the requested file. - if ok, err := filepath.Match(f, "/"+cleanedName); ok && err == nil { + if ok, err := filepath.Match(f, cleanedName); ok && err == nil { logrus.Debugf("imagefs: Found cacheable file %q (%s) (%d:%d)", f, dest, hdr.Uid, hdr.Gid) sum, err := hashFile(hdr, tr) @@ -96,7 +100,7 @@ func New(parent vfs.FS, root string, image v1.Image, filesToCache []string) (vfs } // Parent directories are needed for lookup. - if cleanedName == "/" || strings.HasPrefix(f, "/"+cleanedName+"/") { + if cleanedName == "" || strings.HasPrefix(f, cleanedName+"/") { logrus.Debugf("imagefs: Found cacheable file parent %q (%s)", f, dest) ifs.files[dest] = newCachedFileInfo(dest, hdr) From d60cb015fe2f779436d9eb0d309ec6557a44c533 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Thu, 29 Aug 2024 11:32:37 +0000 Subject: [PATCH 13/13] Restore fs after cache probe --- pkg/executor/build.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/executor/build.go b/pkg/executor/build.go index 596b2be4a9..794e0a9d1d 100644 --- a/pkg/executor/build.go +++ b/pkg/executor/build.go @@ -947,6 +947,12 @@ func DoBuild(opts *config.KanikoOptions) (v1.Image, error) { // cache without modifying the filesystem. // Returns an error if any layers are missing from build cache. func DoCacheProbe(opts *config.KanikoOptions) (v1.Image, error) { + // Restore the filesystem after we're done since we're using imagefs. + origFS := filesystem.FS + defer func() { + filesystem.SetFS(origFS) + }() + digestToCacheKey := make(map[string]string) stageIdxToDigest := make(map[string]string)