From 56c0981f1ecabc129e78a970a06e487b3a9b7a28 Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Wed, 18 Aug 2021 10:26:08 -0700 Subject: [PATCH 1/4] Follow links in includedPaths to resolve incorrect caching when source path is behind symlink As discussed in #2300, includedPaths does not resolve symlinks when looking up the source path in the prefix tree. If the user requests a path that involves symlinks (for example, /a/foo when a symlink /a -> /b exists), includedPaths will not find it, and will expect nothing to be copied. This does not match the actual copy behavior implemented in fsutil, which will follow symlinks in prefix components of a given path, so it can end up caching an empty result even though the copy will produce a non-empty result, which is quite bad. To fix this, use getFollowLinks to resolve the path before walking it. In the wildcard case, this is done to the non-wildcard prefix of the path (if any), which matches the behavior in fsutil. Fixes the repro case here: https://gist.github.com/aaronlehmann/64054c9a2cff0d27e200cc107bba3d69 Fixes #2300 Signed-off-by: Aaron Lehmann --- cache/contenthash/checksum.go | 119 ++++++++++++++++++++++++++--- cache/contenthash/checksum_test.go | 39 +++++++--- 2 files changed, 140 insertions(+), 18 deletions(-) diff --git a/cache/contenthash/checksum.go b/cache/contenthash/checksum.go index dbc2d9187cb0..78d0bcec7e9c 100644 --- a/cache/contenthash/checksum.go +++ b/cache/contenthash/checksum.go @@ -506,21 +506,77 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o txn := cc.tree.Txn() root = txn.Root() var ( - updated bool - iter *iradix.Iterator - k []byte - kOk bool + updated bool + iter *iradix.Iterator + k []byte + kOk bool + origPrefix string + resolvedPrefix string ) iter = root.Iterator() if opts.Wildcard { - k, _, kOk = iter.Next() + // For consistency with what the copy implementation in fsutil + // does: split pattern into non-wildcard prefix and rest of + // pattern, then follow symlinks when resolving the non-wildcard + // prefix. + + d1, d2 := splitWildcards(p) + if d1 != "/" { + origPrefix = d1 + k = convertPathToKey([]byte(d1)) + linksWalked := 0 + if d2 != "" { + // getFollowLinks only handles symlinks in path + // components before the last component, so + // handle last component in d1 specially. + for { + v, ok := root.Get(k) + + if !ok || v.(*CacheRecord).Type != CacheRecordTypeSymlink { + break + } + + linksWalked++ + if linksWalked > 255 { + return nil, errors.Errorf("too many links") + } + + dirPath := path.Clean(d1) + if dirPath == "." || dirPath == "/" { + dirPath = "" + } + d1 = path.Clean(v.(*CacheRecord).Linkname) + if !path.IsAbs(d1) { + d1 = path.Clean(path.Join("/", path.Join(path.Dir(dirPath), d1))) + } + k = convertPathToKey([]byte(d1)) + } + } + } } else { - k = convertPathToKey([]byte(p)) - if _, kOk = root.Get(k); kOk { + origPrefix = p + k = convertPathToKey([]byte(origPrefix)) + } + + if origPrefix != "" { + // We need to resolve symlinks here, in case the base path + // involves a symlink. That will match fsutil behavior of + // calling functions such as stat and walk. + var cr *CacheRecord + k, cr, err = getFollowLinks(root, k, true) + if err != nil { + return nil, err + } + + if kOk = (cr != nil); kOk { iter.SeekLowerBound(append(append([]byte{}, k...), 0)) } + + resolvedPrefix = string(convertKeyToPath(k)) + } else { + k, _, kOk = iter.Next() } var ( @@ -531,6 +587,20 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o for kOk { fn := string(convertKeyToPath(k)) + // Convert the path prefix from what we found in the prefix + // tree to what the argument specified. + // + // For example, if the original 'p' argument was /a/b and there + // is a symlink a->c, we want fn to be /a/b/foo rather than + // /c/b/foo. This is necessary to ensure correct pattern + // matching. + // + // When wildcards are enabled, this translation applies to the + // portion of 'p' before any wildcards. + if strings.HasPrefix(fn, resolvedPrefix) { + fn = origPrefix + strings.TrimPrefix(fn, resolvedPrefix) + } + for len(parentDirHeaders) != 0 { lastParentDir := parentDirHeaders[len(parentDirHeaders)-1] if strings.HasPrefix(fn, lastParentDir.Path+"/") { @@ -570,8 +640,7 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o } } else { if !strings.HasPrefix(fn+"/", p+"/") { - k, _, kOk = iter.Next() - continue + break } shouldInclude, err = shouldIncludePath(strings.TrimSuffix(strings.TrimPrefix(fn+"/", p+"/"), "/"), includePatternMatcher, excludePatternMatcher) @@ -648,6 +717,38 @@ func shouldIncludePath( return true, nil } +func splitWildcards(p string) (d1, d2 string) { + parts := strings.Split(path.Join(p), "/") + var p1, p2 []string + var found bool + for _, p := range parts { + if !found && containsWildcards(p) { + found = true + } + if p == "" { + p = "/" + } + if !found { + p1 = append(p1, p) + } else { + p2 = append(p2, p) + } + } + return filepath.Join(p1...), filepath.Join(p2...) +} + +func containsWildcards(name string) bool { + for i := 0; i < len(name); i++ { + ch := name[i] + if ch == '\\' { + i++ + } else if ch == '*' || ch == '?' || ch == '[' { + return true + } + } + return false +} + func (cc *cacheContext) checksumNoFollow(ctx context.Context, m *mount, p string) (*CacheRecord, error) { p = keyPath(p) diff --git a/cache/contenthash/checksum_test.go b/cache/contenthash/checksum_test.go index c83b3c02c61a..13046799a201 100644 --- a/cache/contenthash/checksum_test.go +++ b/cache/contenthash/checksum_test.go @@ -639,8 +639,6 @@ func TestChecksumIncludeDoubleStar(t *testing.T) { } func TestChecksumIncludeSymlink(t *testing.T) { - t.Skip("Test will fail until https://github.com/moby/buildkit/issues/2300 is fixed") - t.Parallel() tmpdir, err := ioutil.TempDir("", "buildkit-state") require.NoError(t, err) @@ -654,9 +652,10 @@ func TestChecksumIncludeSymlink(t *testing.T) { ch := []string{ "ADD data dir", "ADD data/d1 dir", + "ADD data/d1/d2 dir", "ADD mnt dir", "ADD mnt/data symlink ../data", - "ADD data/d1/foo file abc", + "ADD data/d1/d2/foo file abc", } ref := createRef(t, cm, ch) @@ -664,20 +663,42 @@ func TestChecksumIncludeSymlink(t *testing.T) { cc, err := newCacheContext(ref.Metadata(), nil) require.NoError(t, err) - dgst, err := cc.Checksum(context.TODO(), ref, "data/d1", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil) + dgstD1, err := cc.Checksum(context.TODO(), ref, "data/d1", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil) + require.NoError(t, err) + // File should be included + require.NotEqual(t, digest.FromBytes([]byte{}), dgstD1) + + dgstMntD1, err := cc.Checksum(context.TODO(), ref, "mnt/data/d1", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil) + require.NoError(t, err) + // File should be included despite symlink + require.Equal(t, dgstD1, dgstMntD1) + + dgstD2, err := cc.Checksum(context.TODO(), ref, "data/d1/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil) require.NoError(t, err) // File should be included - require.NotEqual(t, digest.FromBytes([]byte{}), dgst) + require.NotEqual(t, digest.FromBytes([]byte{}), dgstD2) - dgst, err = cc.Checksum(context.TODO(), ref, "mnt/data/d1", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil) + dgstMntD2, err := cc.Checksum(context.TODO(), ref, "mnt/data/d1/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil) require.NoError(t, err) // File should be included despite symlink - require.NotEqual(t, digest.FromBytes([]byte{}), dgst) + require.Equal(t, dgstD2, dgstMntD2) // Same, with Wildcard = true - dgst, err = cc.Checksum(context.TODO(), ref, "mnt/data/d1", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil) + dgstMntD1Wildcard, err := cc.Checksum(context.TODO(), ref, "mnt/data/d1", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil) + require.NoError(t, err) + require.Equal(t, dgstD1, dgstMntD1Wildcard) + + dgstMntD1Wildcard2, err := cc.Checksum(context.TODO(), ref, "mnt/data/d*", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil) + require.NoError(t, err) + require.Equal(t, dgstD1, dgstMntD1Wildcard2) + + dgstMntD2Wildcard, err := cc.Checksum(context.TODO(), ref, "mnt/data/d1/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil) + require.NoError(t, err) + require.Equal(t, dgstD2, dgstMntD2Wildcard) + + dgstMntD2Wildcard2, err := cc.Checksum(context.TODO(), ref, "mnt/data/d1/d*", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil) require.NoError(t, err) - require.NotEqual(t, digest.FromBytes([]byte{}), dgst) + require.Equal(t, dgstD2, dgstMntD2Wildcard2) } func TestHandleChange(t *testing.T) { From ddd18de18ea4cb6fb340db334bda83aabffa02e2 Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Tue, 7 Sep 2021 13:40:39 -0700 Subject: [PATCH 2/4] Add test case for symlink which is not final path component before wildcard Signed-off-by: Aaron Lehmann --- cache/contenthash/checksum_test.go | 35 +++++++++++++++++------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/cache/contenthash/checksum_test.go b/cache/contenthash/checksum_test.go index 13046799a201..bba6ee5fdf91 100644 --- a/cache/contenthash/checksum_test.go +++ b/cache/contenthash/checksum_test.go @@ -651,11 +651,12 @@ func TestChecksumIncludeSymlink(t *testing.T) { ch := []string{ "ADD data dir", - "ADD data/d1 dir", - "ADD data/d1/d2 dir", + "ADD data/d0 dir", + "ADD data/d0/d1 dir", + "ADD data/d0/d1/d2 dir", "ADD mnt dir", "ADD mnt/data symlink ../data", - "ADD data/d1/d2/foo file abc", + "ADD data/d0/d1/d2/foo file abc", } ref := createRef(t, cm, ch) @@ -663,42 +664,46 @@ func TestChecksumIncludeSymlink(t *testing.T) { cc, err := newCacheContext(ref.Metadata(), nil) require.NoError(t, err) - dgstD1, err := cc.Checksum(context.TODO(), ref, "data/d1", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil) + dgstD0, err := cc.Checksum(context.TODO(), ref, "data/d0", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil) require.NoError(t, err) // File should be included - require.NotEqual(t, digest.FromBytes([]byte{}), dgstD1) + require.NotEqual(t, digest.FromBytes([]byte{}), dgstD0) - dgstMntD1, err := cc.Checksum(context.TODO(), ref, "mnt/data/d1", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil) + dgstMntD0, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil) require.NoError(t, err) // File should be included despite symlink - require.Equal(t, dgstD1, dgstMntD1) + require.Equal(t, dgstD0, dgstMntD0) - dgstD2, err := cc.Checksum(context.TODO(), ref, "data/d1/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil) + dgstD2, err := cc.Checksum(context.TODO(), ref, "data/d0/d1/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil) require.NoError(t, err) // File should be included require.NotEqual(t, digest.FromBytes([]byte{}), dgstD2) - dgstMntD2, err := cc.Checksum(context.TODO(), ref, "mnt/data/d1/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil) + dgstMntD2, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0/d1/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil) require.NoError(t, err) // File should be included despite symlink require.Equal(t, dgstD2, dgstMntD2) // Same, with Wildcard = true - dgstMntD1Wildcard, err := cc.Checksum(context.TODO(), ref, "mnt/data/d1", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil) + dgstMntD0Wildcard, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil) require.NoError(t, err) - require.Equal(t, dgstD1, dgstMntD1Wildcard) + require.Equal(t, dgstD0, dgstMntD0Wildcard) - dgstMntD1Wildcard2, err := cc.Checksum(context.TODO(), ref, "mnt/data/d*", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil) + dgstMntD0Wildcard2, err := cc.Checksum(context.TODO(), ref, "mnt/data/d*", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil) require.NoError(t, err) - require.Equal(t, dgstD1, dgstMntD1Wildcard2) + require.Equal(t, dgstD0, dgstMntD0Wildcard2) - dgstMntD2Wildcard, err := cc.Checksum(context.TODO(), ref, "mnt/data/d1/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil) + dgstMntD2Wildcard, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0/d1/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil) require.NoError(t, err) require.Equal(t, dgstD2, dgstMntD2Wildcard) - dgstMntD2Wildcard2, err := cc.Checksum(context.TODO(), ref, "mnt/data/d1/d*", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil) + dgstMntD2Wildcard2, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0/d1/d*", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil) require.NoError(t, err) require.Equal(t, dgstD2, dgstMntD2Wildcard2) + + dgstMntInnerWildcard, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0/d*/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil) + require.NoError(t, err) + require.Equal(t, dgstD2, dgstMntInnerWildcard) } func TestHandleChange(t *testing.T) { From 98f54ff22c03d5f861f81125e2e48b1eb39ddbc1 Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Tue, 7 Sep 2021 16:46:00 -0700 Subject: [PATCH 3/4] Handle the case of multiple path component symlinks (including last component) in wildcard prefix Signed-off-by: Aaron Lehmann --- cache/contenthash/checksum.go | 113 +++++++++++++++++------------ cache/contenthash/checksum_test.go | 5 ++ 2 files changed, 70 insertions(+), 48 deletions(-) diff --git a/cache/contenthash/checksum.go b/cache/contenthash/checksum.go index 78d0bcec7e9c..65019d075051 100644 --- a/cache/contenthash/checksum.go +++ b/cache/contenthash/checksum.go @@ -517,50 +517,14 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o iter = root.Iterator() if opts.Wildcard { - // For consistency with what the copy implementation in fsutil - // does: split pattern into non-wildcard prefix and rest of - // pattern, then follow symlinks when resolving the non-wildcard - // prefix. - - d1, d2 := splitWildcards(p) - if d1 != "/" { - origPrefix = d1 - k = convertPathToKey([]byte(d1)) - linksWalked := 0 - if d2 != "" { - // getFollowLinks only handles symlinks in path - // components before the last component, so - // handle last component in d1 specially. - for { - v, ok := root.Get(k) - - if !ok || v.(*CacheRecord).Type != CacheRecordTypeSymlink { - break - } - - linksWalked++ - if linksWalked > 255 { - return nil, errors.Errorf("too many links") - } - - dirPath := path.Clean(d1) - if dirPath == "." || dirPath == "/" { - dirPath = "" - } - d1 = path.Clean(v.(*CacheRecord).Linkname) - if !path.IsAbs(d1) { - d1 = path.Clean(path.Join("/", path.Join(path.Dir(dirPath), d1))) - } - k = convertPathToKey([]byte(d1)) - } - } + origPrefix, k, kOk, err = wildcardPrefix(root, p) + if err != nil { + return nil, err } } else { origPrefix = p k = convertPathToKey([]byte(origPrefix)) - } - if origPrefix != "" { // We need to resolve symlinks here, in case the base path // involves a symlink. That will match fsutil behavior of // calling functions such as stat and walk. @@ -569,8 +533,11 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o if err != nil { return nil, err } + kOk = (cr != nil) + } - if kOk = (cr != nil); kOk { + if origPrefix != "" { + if kOk { iter.SeekLowerBound(append(append([]byte{}, k...), 0)) } @@ -717,6 +684,50 @@ func shouldIncludePath( return true, nil } +func wildcardPrefix(root *iradix.Node, p string) (string, []byte, bool, error) { + // For consistency with what the copy implementation in fsutil + // does: split pattern into non-wildcard prefix and rest of + // pattern, then follow symlinks when resolving the non-wildcard + // prefix. + + d1, d2 := splitWildcards(p) + if d1 == "/" { + return "", nil, false, nil + } + + k, cr, err := getFollowLinks(root, convertPathToKey([]byte(d1)), true) + if err != nil { + return "", k, false, err + } + + if d2 != "" && cr != nil && cr.Type == CacheRecordTypeSymlink { + // getFollowLinks only handles symlinks in path + // components before the last component, so + // handle last component in d1 specially. + linksWalked := 0 + resolved := string(convertKeyToPath(k)) + for { + v, ok := root.Get(k) + + if !ok { + return d1, k, false, nil + } + if v.(*CacheRecord).Type != CacheRecordTypeSymlink { + break + } + + linksWalked++ + if linksWalked > 255 { + return "", k, false, errors.Errorf("too many links") + } + + resolved := cleanLink(resolved, v.(*CacheRecord).Linkname) + k = convertPathToKey([]byte(resolved)) + } + } + return d1, k, cr != nil, nil +} + func splitWildcards(p string) (d1, d2 string) { parts := strings.Split(path.Join(p), "/") var p1, p2 []string @@ -1030,14 +1041,8 @@ func getFollowLinksWalk(root *iradix.Node, k []byte, follow bool, linksWalked *i if *linksWalked > 255 { return nil, nil, errors.Errorf("too many links") } - dirPath := path.Clean(string(convertKeyToPath(dir))) - if dirPath == "." || dirPath == "/" { - dirPath = "" - } - link := path.Clean(parent.Linkname) - if !path.IsAbs(link) { - link = path.Join("/", path.Join(path.Dir(dirPath), link)) - } + + link := cleanLink(string(convertKeyToPath(dir)), parent.Linkname) return getFollowLinksWalk(root, append(convertPathToKey([]byte(link)), file...), follow, linksWalked) } } @@ -1049,6 +1054,18 @@ func getFollowLinksWalk(root *iradix.Node, k []byte, follow bool, linksWalked *i return k, nil, nil } +func cleanLink(dir, linkname string) string { + dirPath := path.Clean(dir) + if dirPath == "." || dirPath == "/" { + dirPath = "" + } + link := path.Clean(linkname) + if !path.IsAbs(link) { + return path.Join("/", path.Join(path.Dir(dirPath), link)) + } + return link +} + func prepareDigest(fp, p string, fi os.FileInfo) (digest.Digest, error) { h, err := NewFileHash(fp, fi) if err != nil { diff --git a/cache/contenthash/checksum_test.go b/cache/contenthash/checksum_test.go index bba6ee5fdf91..a674b7ef49a8 100644 --- a/cache/contenthash/checksum_test.go +++ b/cache/contenthash/checksum_test.go @@ -657,6 +657,7 @@ func TestChecksumIncludeSymlink(t *testing.T) { "ADD mnt dir", "ADD mnt/data symlink ../data", "ADD data/d0/d1/d2/foo file abc", + "ADD data/symlink-to-d0 symlink d0", } ref := createRef(t, cm, ch) @@ -704,6 +705,10 @@ func TestChecksumIncludeSymlink(t *testing.T) { dgstMntInnerWildcard, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0/d*/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil) require.NoError(t, err) require.Equal(t, dgstD2, dgstMntInnerWildcard) + + dgstMntInnerWildcard2, err := cc.Checksum(context.TODO(), ref, "mnt/data/symlink-to-d0/d*/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil) + require.NoError(t, err) + require.Equal(t, dgstD2, dgstMntInnerWildcard2) } func TestHandleChange(t *testing.T) { From e9e6cec838963ba2a47f4fd9ea27d0be976801e2 Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Wed, 8 Sep 2021 09:14:48 -0700 Subject: [PATCH 4/4] Use getFollowLinksWalked Signed-off-by: Aaron Lehmann --- cache/contenthash/checksum.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cache/contenthash/checksum.go b/cache/contenthash/checksum.go index 65019d075051..e505d05eac23 100644 --- a/cache/contenthash/checksum.go +++ b/cache/contenthash/checksum.go @@ -695,7 +695,8 @@ func wildcardPrefix(root *iradix.Node, p string) (string, []byte, bool, error) { return "", nil, false, nil } - k, cr, err := getFollowLinks(root, convertPathToKey([]byte(d1)), true) + linksWalked := 0 + k, cr, err := getFollowLinksWalk(root, convertPathToKey([]byte(d1)), true, &linksWalked) if err != nil { return "", k, false, err } @@ -704,7 +705,6 @@ func wildcardPrefix(root *iradix.Node, p string) (string, []byte, bool, error) { // getFollowLinks only handles symlinks in path // components before the last component, so // handle last component in d1 specially. - linksWalked := 0 resolved := string(convertKeyToPath(k)) for { v, ok := root.Get(k)