From 4b822e976298b65c819af18951f3083611fc772e Mon Sep 17 00:00:00 2001 From: Tonis Tiigi Date: Thu, 10 Jul 2025 00:07:58 -0700 Subject: [PATCH 1/3] debug: allow debug from remote cache config Signed-off-by: Tonis Tiigi --- cache/remotecache/v1/cachestorage.go | 27 ++++++++++- cmd/buildkitd/debug.go | 70 ++++++++++++++++++++++++---- util/cachedigest/digest.go | 17 ++++--- util/cachestore/store.go | 2 +- 4 files changed, 100 insertions(+), 16 deletions(-) diff --git a/cache/remotecache/v1/cachestorage.go b/cache/remotecache/v1/cachestorage.go index 3b40e1401885..6cd5e941a5da 100644 --- a/cache/remotecache/v1/cachestorage.go +++ b/cache/remotecache/v1/cachestorage.go @@ -106,7 +106,12 @@ func (cs *cacheKeyStorage) Exists(id string) bool { return ok } -func (cs *cacheKeyStorage) Walk(func(id string) error) error { +func (cs *cacheKeyStorage) Walk(cb func(id string) error) error { + for id := range cs.byID { + if err := cb(id); err != nil { + return err + } + } return nil } @@ -142,6 +147,26 @@ func (cs *cacheKeyStorage) Release(resultID string) error { func (cs *cacheKeyStorage) AddLink(id string, link solver.CacheInfoLink, target string) error { return nil } + +func (cs *cacheKeyStorage) WalkLinksAll(id string, fn func(id string, link solver.CacheInfoLink) error) error { + it, ok := cs.byID[id] + if !ok { + return nil + } + for nl, ids := range it.links { + for _, id2 := range ids { + if err := fn(id2, solver.CacheInfoLink{ + Input: solver.Index(nl.input), + Selector: digest.Digest(nl.selector), + Digest: nl.dgst, + }); err != nil { + return err + } + } + } + return nil +} + func (cs *cacheKeyStorage) WalkLinks(id string, link solver.CacheInfoLink, fn func(id string) error) error { it, ok := cs.byID[id] if !ok { diff --git a/cmd/buildkitd/debug.go b/cmd/buildkitd/debug.go index 546361ddedef..1f67a0565680 100644 --- a/cmd/buildkitd/debug.go +++ b/cmd/buildkitd/debug.go @@ -2,6 +2,7 @@ package main import ( "context" + "encoding/binary" "encoding/json" "expvar" "fmt" @@ -15,6 +16,7 @@ import ( "strings" "time" + cacheimport "github.com/moby/buildkit/cache/remotecache/v1" "github.com/moby/buildkit/solver" "github.com/moby/buildkit/util/bklog" "github.com/moby/buildkit/util/cachedigest" @@ -40,6 +42,7 @@ func setupDebugHandlers(addr string) error { m.Handle("/debug/cache/all", http.HandlerFunc(handleCacheAll)) m.Handle("/debug/cache/lookup", http.HandlerFunc(handleCacheLookup)) m.Handle("/debug/cache/store", http.HandlerFunc(handleDebugCacheStore)) + m.Handle("POST /debug/cache/load", http.HandlerFunc(handleCacheLoad)) m.Handle("/debug/gc", http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { runtime.GC() @@ -139,9 +142,13 @@ func printCacheRecord(record *cachedigest.Record, w io.Writer) { case cachedigest.FrameIDData: w.Write([]byte(" " + frame.ID.String() + ": " + string(frame.Data) + "\n")) case cachedigest.FrameIDSkip: - w.Write([]byte(" skipping " + string(frame.Data) + " bytes\n")) + fmt.Fprintf(w, " skipping %d bytes\n", binary.LittleEndian.Uint32(frame.Data)) } } + for _, subRec := range record.SubRecords { + w.Write([]byte("\n")) + printCacheRecord(subRec, w) + } } func cacheRecordLookup(ctx context.Context, dgst digest.Digest) (*cachedigest.Record, error) { @@ -216,18 +223,70 @@ func loadCacheAll(ctx context.Context) ([]*cachedigest.Record, error) { return records, nil } +func handleCacheLoad(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + if r.Body == nil { + http.Error(w, "body is required", http.StatusBadRequest) + return + } + defer r.Body.Close() + + recs, err := loadCacheFromReader(r.Context(), r.Body) + if err != nil { + http.Error(w, "failed to load cache: "+err.Error(), http.StatusInternalServerError) + return + } + writeCacheRecordsResponse(w, r, recs) +} + +func loadCacheFromReader(ctx context.Context, rdr io.Reader) ([]*recordWithDebug, error) { + dt, err := io.ReadAll(rdr) + if err != nil { + return nil, errors.Wrap(err, "failed to read body") + } + + allLayers := cacheimport.DescriptorProvider{} + cc := cacheimport.NewCacheChains() + if err := cacheimport.Parse(dt, allLayers, cc); err != nil { + return nil, err + } + + keyStorage, _, err := cacheimport.NewCacheKeyStorage(cc, nil) + if err != nil { + return nil, err + } + + recs, err := debugCacheStore(ctx, keyStorage) + if err != nil { + return nil, errors.Wrap(err, "failed to debug cache store") + } + + return recs, nil +} + func handleDebugCacheStore(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodGet { http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) return } - recs, err := debugCacheStore(r.Context()) + store := cacheStoreForDebug + if store == nil { + http.Error(w, "Cache store is not initialized for debug", http.StatusInternalServerError) + } + + recs, err := debugCacheStore(r.Context(), store) if err != nil { http.Error(w, "Failed to debug cache store: "+err.Error(), http.StatusInternalServerError) return } + writeCacheRecordsResponse(w, r, recs) +} +func writeCacheRecordsResponse(w http.ResponseWriter, r *http.Request, recs []*recordWithDebug) { w.WriteHeader(http.StatusOK) switch r.Header.Get("Accept") { @@ -287,12 +346,7 @@ type recordWithDebug struct { Debug []*cachedigest.Record `json:"debug,omitempty"` } -func debugCacheStore(ctx context.Context) ([]*recordWithDebug, error) { - store := cacheStoreForDebug - if store == nil { - return nil, errors.New("cache store is not initialized for debug") - } - +func debugCacheStore(ctx context.Context, store solver.CacheKeyStorage) ([]*recordWithDebug, error) { recs, err := cachestore.Records(ctx, store) if err != nil { return nil, errors.Wrap(err, "failed to get cache records") diff --git a/util/cachedigest/digest.go b/util/cachedigest/digest.go index ec92ee2c12a9..7407eddee0a6 100644 --- a/util/cachedigest/digest.go +++ b/util/cachedigest/digest.go @@ -67,11 +67,11 @@ func (h *Hash) WriteNoDebug(p []byte) (n int, err error) { if n > 0 && h.db != nil { if len(h.frames) > 0 && h.frames[len(h.frames)-1].ID == FrameIDSkip { last := &h.frames[len(h.frames)-1] - prevLen := binary.BigEndian.Uint32(last.Data) - binary.BigEndian.PutUint32(last.Data, prevLen+uint32(n)) + prevLen := binary.LittleEndian.Uint32(last.Data) + binary.LittleEndian.PutUint32(last.Data, prevLen+uint32(n)) } else { lenBytes := make([]byte, 4) - binary.BigEndian.PutUint32(lenBytes, uint32(n)) + binary.LittleEndian.PutUint32(lenBytes, uint32(n)) h.frames = append(h.frames, Frame{ID: FrameIDSkip, Data: lenBytes}) } } @@ -94,7 +94,7 @@ type Record struct { Digest digest.Digest `json:"digest"` Type Type `json:"type"` Data []Frame `json:"data,omitempty"` - SubRecords []Record `json:"subRecords,omitempty"` + SubRecords []*Record `json:"subRecords,omitempty"` } var shaRegexpOnce = sync.OnceValue(func() *regexp.Regexp { @@ -149,11 +149,16 @@ func (r *Record) LoadSubRecords(loader func(d digest.Digest) (Type, []Frame, err bklog.L.Warnf("failed to load sub-record for %s: %v", dgst, err) continue } - r.SubRecords = append(r.SubRecords, Record{ + rr := &Record{ Digest: digest.Digest(dgst), Type: typ, Data: frames, - }) + } + if err := rr.LoadSubRecords(loader); err != nil { + return err + } + + r.SubRecords = append(r.SubRecords, rr) } return nil } diff --git a/util/cachestore/store.go b/util/cachestore/store.go index 49731576084b..e4165dfc47be 100644 --- a/util/cachestore/store.go +++ b/util/cachestore/store.go @@ -35,7 +35,7 @@ type storeWithLinks interface { func Records(ctx context.Context, store solver.CacheKeyStorage) ([]*Record, error) { swl, ok := store.(storeWithLinks) if !ok { - return nil, errors.New("cache store does not support walkin all links") + return nil, errors.New("cache store does not support walking all links") } roots := []string{} From f3eabafa6295e1bb8045ef1a813fd3b42a839f84 Mon Sep 17 00:00:00 2001 From: Tonis Tiigi Date: Thu, 10 Jul 2025 16:41:01 -0700 Subject: [PATCH 2/3] use output specific digest in boltdb debug This adds extra level of indirection to the cache keys debug, making it more verbose but making sure that same digests are reported for remote cache. Signed-off-by: Tonis Tiigi --- solver/bboltcachestorage/storage.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/solver/bboltcachestorage/storage.go b/solver/bboltcachestorage/storage.go index 893b41bc0a12..db35f107b602 100644 --- a/solver/bboltcachestorage/storage.go +++ b/solver/bboltcachestorage/storage.go @@ -366,6 +366,8 @@ func (s *Store) WalkLinksAll(id string, fn func(id string, link solver.CacheInfo if err := json.Unmarshal(parts[0], &link); err != nil { return err } + // make digest relative to output as not all backends store output separately + link.Digest = digest.FromBytes(fmt.Appendf(nil, "%s@%d", link.Digest, link.Output)) links = append(links, linkEntry{ id: string(parts[1]), link: link, From 854eace665d6895ee04b0e774a501cacb2d5717a Mon Sep 17 00:00:00 2001 From: Tonis Tiigi Date: Thu, 10 Jul 2025 17:24:45 -0700 Subject: [PATCH 3/3] debug: combine links from parents intead of children Grouping the link digests to parent side (links pointing to current node) makes more sense than children side (links pointing to next nodes). This also reduces the duplicates in the output as previously same digest link could need plaintext under multiple nodes. Signed-off-by: Tonis Tiigi --- cmd/buildkitd/debug.go | 27 +++++++++++------- util/cachedigest/db_test.go | 2 +- util/cachestore/store.go | 57 +++++++++++++++++++++---------------- 3 files changed, 50 insertions(+), 36 deletions(-) diff --git a/cmd/buildkitd/debug.go b/cmd/buildkitd/debug.go index 1f67a0565680..bebc0197b62b 100644 --- a/cmd/buildkitd/debug.go +++ b/cmd/buildkitd/debug.go @@ -1,6 +1,7 @@ package main import ( + "cmp" "context" "encoding/binary" "encoding/json" @@ -309,24 +310,28 @@ func writeCacheRecordsResponse(w http.ResponseWriter, r *http.Request, recs []*r if rec.Digest != "" { fmt.Fprintf(w, "Digest: %s\n", rec.Digest) } + if len(rec.Parents) > 0 { fmt.Fprintln(w, "Parents:") - for input := range rec.Parents { - ids := slices.Collect(maps.Keys(rec.ParentIDs[input])) - s := make([]string, len(ids)) - for i, id := range ids { - s[i] = fmt.Sprintf("%d", id) + slices.SortStableFunc(rec.Parents, func(i, j cachestore.Link) int { + return cmp.Or(cmp.Compare(i.Input, j.Input), cmp.Compare(i.Digest, j.Digest)) + }) + for _, parent := range rec.Parents { + fmt.Fprintf(w, " Input %d:\t%d\t%s\n", parent.Input, parent.Record.ID, parent.Digest) + if parent.Selector != "" { + fmt.Fprintf(w, " Selector: %s\n", parent.Selector) } - fmt.Fprintf(w, " Input %d:\t %s\n", input, strings.Join(s, ", ")) } } if len(rec.Children) > 0 { fmt.Fprintln(w, "Children:") - for _, child := range rec.Children { - fmt.Fprintf(w, " %d %s (input %d, output %d)\n", child.Record.ID, child.Digest, child.Input, child.Output) - if child.Selector != "" { - fmt.Fprintf(w, " Selector: %s\n", child.Selector) + for input := range rec.Children { + ids := slices.Collect(maps.Keys(rec.ChildIDs[input])) + s := make([]string, len(ids)) + for i, id := range ids { + s[i] = fmt.Sprintf("%d", id) } + fmt.Fprintf(w, " Input %d:\t %s\n", input, strings.Join(s, ", ")) } } if len(rec.Debug) > 0 { @@ -361,7 +366,7 @@ func debugCacheStore(ctx context.Context, store solver.CacheKeyStorage) ([]*reco if rec.Digest != "" { m[rec.Digest] = nil } - for _, link := range rec.Children { + for _, link := range rec.Parents { m[link.Digest] = nil if link.Selector != "" { m[link.Selector] = nil diff --git a/util/cachedigest/db_test.go b/util/cachedigest/db_test.go index f5a6fbdd0424..36d0468125e8 100644 --- a/util/cachedigest/db_test.go +++ b/util/cachedigest/db_test.go @@ -95,7 +95,7 @@ func TestNewHashAndGet(t *testing.T) { dataFrames = append(dataFrames, f.Data) case FrameIDSkip: require.Len(t, f.Data, 4) - skipLens = append(skipLens, uint32(f.Data[0])<<24|uint32(f.Data[1])<<16|uint32(f.Data[2])<<8|uint32(f.Data[3])) + skipLens = append(skipLens, uint32(f.Data[3])<<24|uint32(f.Data[2])<<16|uint32(f.Data[1])<<8|uint32(f.Data[0])) } } require.Len(t, dataFrames, len(inputs)) diff --git a/util/cachestore/store.go b/util/cachestore/store.go index e4165dfc47be..ac445e203396 100644 --- a/util/cachestore/store.go +++ b/util/cachestore/store.go @@ -1,7 +1,10 @@ package cachestore import ( + "cmp" "context" + "maps" + "slices" "strings" "github.com/moby/buildkit/solver" @@ -10,12 +13,12 @@ import ( ) type Record struct { - ID int `json:"id"` - Parents map[int]map[*Record]struct{} `json:"-"` - Children []Link `json:"children,omitempty"` - Digest digest.Digest `json:"digest,omitempty"` - Random bool `json:"random,omitempty"` - ParentIDs map[int]map[int]struct{} `json:"parents,omitempty"` + ID int `json:"id"` + Parents []Link `json:"parents,omitempty"` + Children map[int]map[*Record]struct{} `json:"-"` + Digest digest.Digest `json:"digest,omitempty"` + Random bool `json:"random,omitempty"` + ChildIDs map[int]map[int]struct{} `json:"children,omitempty"` } type Link struct { @@ -72,16 +75,16 @@ func Records(ctx context.Context, store solver.CacheKeyStorage) ([]*Record, erro } func setLinkIDs(rec *Record) { - for i, child := range rec.Children { - child.ID = child.Record.ID - rec.Children[i] = child + for i, parent := range rec.Parents { + parent.ID = parent.Record.ID + rec.Parents[i] = parent } - if rec.Parents != nil { - rec.ParentIDs = make(map[int]map[int]struct{}) - for input, m := range rec.Parents { - rec.ParentIDs[input] = make(map[int]struct{}) - for parent := range m { - rec.ParentIDs[input][parent.ID] = struct{}{} + if rec.Children != nil { + rec.ChildIDs = make(map[int]map[int]struct{}) + for input, m := range rec.Children { + rec.ChildIDs[input] = make(map[int]struct{}) + for child := range m { + rec.ChildIDs[input][child.ID] = struct{}{} } } } @@ -93,8 +96,14 @@ func setIndex(rec *Record, arr []*Record) []*Record { } arr = append(arr, rec) rec.ID = len(arr) - for _, child := range rec.Children { - arr = setIndex(child.Record, arr) + for _, links := range rec.Children { + recs := slices.Collect(maps.Keys(links)) + slices.SortFunc(recs, func(i, j *Record) int { + return cmp.Compare(i.Digest, j.Digest) + }) + for _, child := range recs { + arr = setIndex(child, arr) + } } return arr } @@ -122,23 +131,23 @@ func loadRecord(ctx context.Context, store storeWithLinks, id string, out map[st if err != nil { return errors.Wrapf(err, "failed to load link %s for %s", linkID, id) } - rec.Children = append(rec.Children, Link{ + child.Parents = append(child.Parents, Link{ Input: int(link.Input), Output: int(link.Output), Selector: link.Selector, - Record: child, + Record: rec, Digest: link.Digest, }) - if child.Parents == nil { - child.Parents = make(map[int]map[*Record]struct{}) + if rec.Children == nil { + rec.Children = make(map[int]map[*Record]struct{}) } - m, ok := child.Parents[int(link.Input)] + m, ok := rec.Children[int(link.Output)] if !ok { m = make(map[*Record]struct{}) - child.Parents[int(link.Input)] = m + rec.Children[int(link.Output)] = m } - m[rec] = struct{}{} + m[child] = struct{}{} return nil }) if err != nil {