From 0b98b9e02b372b67652b1dd35f04b916ca161526 Mon Sep 17 00:00:00 2001 From: Hank Donnay Date: Fri, 13 Oct 2023 12:14:19 -0500 Subject: [PATCH 1/5] rpm: add failure case for package tests Signed-off-by: Hank Donnay --- rpm/packagescanner_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rpm/packagescanner_test.go b/rpm/packagescanner_test.go index d605539c0..36e001380 100644 --- a/rpm/packagescanner_test.go +++ b/rpm/packagescanner_test.go @@ -106,6 +106,9 @@ func (tc PackageTestcase) Run(ctx context.Context, a *test.CachedArena) func(*te t.Error(err) } t.Logf("found %d packages", len(got)) + if len(got) == 0 { + t.FailNow() + } if !cmp.Equal(got, want, rpmtest.Options) { t.Error(cmp.Diff(got, want, rpmtest.Options)) } From 91562a429105d983d4323d13b1872fae0539fc53 Mon Sep 17 00:00:00 2001 From: Hank Donnay Date: Tue, 24 Oct 2023 16:34:40 -0500 Subject: [PATCH 2/5] fetch: remove http.Client argument With the `http.DefaultClient` now being poisoned, there's no "good" http.Client for a test to have. This change removes the argument and uses a single, package-internal client. Test-Fail: OK Signed-off-by: Hank Donnay --- test/fetch/layer.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/test/fetch/layer.go b/test/fetch/layer.go index 294cd94db..bb434c0e6 100644 --- a/test/fetch/layer.go +++ b/test/fetch/layer.go @@ -35,8 +35,12 @@ var registry = map[string]*client{ "registry.access.redhat.com": {Root: "https://registry.access.redhat.com/"}, } +var fetchClient = &http.Client{ + Transport: &http.Transport{}, +} + // Layer returns the specified layer contents, cached in the global layer cache. -func Layer(ctx context.Context, t testing.TB, c *http.Client, from, repo string, blob claircore.Digest, opt ...Option) (*os.File, error) { +func Layer(ctx context.Context, t testing.TB, from, repo string, blob claircore.Digest, opt ...Option) (*os.File, error) { t.Helper() opts := make(map[Option]bool) for _, o := range opt { @@ -63,14 +67,11 @@ func Layer(ctx context.Context, t testing.TB, c *http.Client, from, repo string, }) t.Logf("fetching layer into: %s", cachefile) - if c == nil { - c = http.DefaultClient - } client, ok := registry[from] if !ok { return nil, fmt.Errorf("unknown registry: %q", from) } - rc, err := client.Blob(ctx, c, repo, blob) + rc, err := client.Blob(ctx, fetchClient, repo, blob) if err != nil { return nil, err } From 075e23c0e53e33f680985bdcbb1a5ac19bf3a814 Mon Sep 17 00:00:00 2001 From: Hank Donnay Date: Tue, 24 Oct 2023 16:35:29 -0500 Subject: [PATCH 3/5] periodic: update for `fetch` changes Test-Fail: OK Signed-off-by: Hank Donnay --- test/periodic/periodic_test.go | 2 +- test/periodic/rpm_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/periodic/periodic_test.go b/test/periodic/periodic_test.go index b5ac47a9c..1dd3963b1 100644 --- a/test/periodic/periodic_test.go +++ b/test/periodic/periodic_test.go @@ -10,7 +10,7 @@ import ( ) var ( - pkgClient = &http.Client{} + pkgClient = &http.Client{Transport: &http.Transport{}} fp driver.Fingerprint ) diff --git a/test/periodic/rpm_test.go b/test/periodic/rpm_test.go index 7388f70c0..80bf43f9b 100644 --- a/test/periodic/rpm_test.go +++ b/test/periodic/rpm_test.go @@ -226,7 +226,7 @@ func (doc hydraDoc) Run(dir string) func(*testing.T) { for _, ld := range image.Data[0].Parsed.Layers { // TODO(hank) Need a way to use the nicer API, but pass the // Integration bypass. - n, err := fetch.Layer(ctx, t, pkgClient, doc.Registry, doc.Repository, ld, fetch.IgnoreIntegration) + n, err := fetch.Layer(ctx, t, doc.Registry, doc.Repository, ld, fetch.IgnoreIntegration) if err != nil { t.Fatal(err) } From 356515920983ec8c0f3a32ffaec3ad125f17c920 Mon Sep 17 00:00:00 2001 From: Hank Donnay Date: Tue, 24 Oct 2023 16:35:59 -0500 Subject: [PATCH 4/5] test: update for `fetch` changes Signed-off-by: Hank Donnay --- test/fetcher.go | 3 +-- test/layer.go | 2 +- test/packagescanner.go | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/test/fetcher.go b/test/fetcher.go index de79d4ecb..34ab9d51d 100644 --- a/test/fetcher.go +++ b/test/fetcher.go @@ -3,7 +3,6 @@ package test import ( "context" "fmt" - "net/http" "net/url" "os" "path/filepath" @@ -47,7 +46,7 @@ func (a *CachedArena) LoadLayerFromRegistry(ctx context.Context, t testing.TB, r if err != nil { t.Fatal(err) } - _, err = fetch.Layer(ctx, t, http.DefaultClient, ref.Registry, ref.Name, d) + _, err = fetch.Layer(ctx, t, ref.Registry, ref.Name, d) if err != nil { t.Fatal(err) } diff --git a/test/layer.go b/test/layer.go index e71f11691..9649d6440 100644 --- a/test/layer.go +++ b/test/layer.go @@ -125,7 +125,7 @@ func RealizeLayers(ctx context.Context, t *testing.T, refs ...LayerRef) []clairc t.Error(err) continue } - f, err := fetch.Layer(ctx, t, nil, refs[n].Registry, refs[n].Name, id) + f, err := fetch.Layer(ctx, t, refs[n].Registry, refs[n].Name, id) if err != nil { t.Error(err) continue diff --git a/test/packagescanner.go b/test/packagescanner.go index 7807912db..000caa664 100644 --- a/test/packagescanner.go +++ b/test/packagescanner.go @@ -2,7 +2,6 @@ package test import ( "context" - "net/http" "sort" "strings" "testing" @@ -113,7 +112,7 @@ func (tc ScannerTestcase) RunSubset(ctx context.Context, n int) func(*testing.T) func (tc *ScannerTestcase) getLayer(ctx context.Context, t *testing.T) *claircore.Layer { d := tc.Digest() - n, err := fetch.Layer(ctx, t, http.DefaultClient, tc.Domain, tc.Name, d) + n, err := fetch.Layer(ctx, t, tc.Domain, tc.Name, d) if err != nil { t.Fatal(err) } From 8f53edd1be7a21bcc9d389d4c6a7c39481ae6703 Mon Sep 17 00:00:00 2001 From: Hank Donnay Date: Thu, 26 Oct 2023 13:50:29 -0500 Subject: [PATCH 5/5] tarfs: rewrite to support random access archives This change adds support for some schemes of encoding a table of contents and allowing individual files to be accessed independently. It breaks the API by adding a "size" parameter (like the archive/zip package) but attempts to autodetect when given a nonsense value. Signed-off-by: Hank Donnay --- go.mod | 2 +- layer.go | 3 +- pkg/tarfs/file.go | 173 ++++++++---- pkg/tarfs/fs.go | 284 +++++++++++++++++++ pkg/tarfs/metrics.go | 37 +++ pkg/tarfs/parse.go | 363 +++++++++++++----------- pkg/tarfs/pool.go | 60 ++++ pkg/tarfs/randomaccess.go | 203 ++++++++++++++ pkg/tarfs/seekable_test.go | 122 ++++++++ pkg/tarfs/srv.go | 553 +++++++++++++++++++++++++++++++++++++ pkg/tarfs/tarfs.go | 508 ++-------------------------------- pkg/tarfs/tarfs_test.go | 501 ++++++++++++++++++++++----------- 12 files changed, 1943 insertions(+), 866 deletions(-) create mode 100644 pkg/tarfs/fs.go create mode 100644 pkg/tarfs/metrics.go create mode 100644 pkg/tarfs/pool.go create mode 100644 pkg/tarfs/randomaccess.go create mode 100644 pkg/tarfs/seekable_test.go create mode 100644 pkg/tarfs/srv.go diff --git a/go.mod b/go.mod index 2970ea268..c5ef1a7d2 100644 --- a/go.mod +++ b/go.mod @@ -24,6 +24,7 @@ require ( github.com/rs/zerolog v1.30.0 github.com/ulikunitz/xz v0.5.11 go.opentelemetry.io/otel v1.19.0 + go.opentelemetry.io/otel/metric v1.19.0 go.opentelemetry.io/otel/trace v1.19.0 golang.org/x/crypto v0.14.0 golang.org/x/sync v0.4.0 @@ -56,7 +57,6 @@ require ( github.com/prometheus/common v0.44.0 // indirect github.com/prometheus/procfs v0.11.1 // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect - go.opentelemetry.io/otel/metric v1.19.0 // indirect golang.org/x/mod v0.12.0 // indirect google.golang.org/protobuf v1.31.0 // indirect lukechampine.com/uint128 v1.2.0 // indirect diff --git a/layer.go b/layer.go index 7ad61e744..fa8db62a8 100644 --- a/layer.go +++ b/layer.go @@ -110,13 +110,14 @@ func (l *Layer) Init(ctx context.Context, desc *LayerDescription, r io.ReaderAt) `application/vnd.oci.image.layer.nondistributable.v1.tar`, `application/vnd.oci.image.layer.nondistributable.v1.tar+gzip`, `application/vnd.oci.image.layer.nondistributable.v1.tar+zstd`: - sys, err := tarfs.New(r) + sys, err := tarfs.New(ctx, r, -1, nil) switch { case errors.Is(err, nil): default: return fmt.Errorf("claircore: layer %v: unable to create fs.FS: %w", desc.Digest, err) } l.sys = sys + l.cleanup = append(l.cleanup, sys) default: return fmt.Errorf("claircore: layer %v: unknown MediaType %q", desc.Digest, desc.MediaType) } diff --git a/pkg/tarfs/file.go b/pkg/tarfs/file.go index 0aca24ee7..0a04b6a36 100644 --- a/pkg/tarfs/file.go +++ b/pkg/tarfs/file.go @@ -1,84 +1,141 @@ package tarfs import ( - "archive/tar" "io" "io/fs" - "path/filepath" + "path" "strings" + "time" ) -var _ fs.File = (*file)(nil) - -// File implements fs.File. -type file struct { - h *tar.Header - r *tar.Reader +// Entry is an entry describing a file or a file chunk. +// +// This is the concrete type backing [fs.FileInfo] interfaces returned by +// this package. +type Entry struct { + Xattrs map[string]string `json:"xattrs"` + Type string `json:"type"` + Name string `json:"name"` // NB This is actually the path. + Linkname string `json:"linkName"` + Digest string `json:"digest"` + ChunkDigest string `json:"chunkDigest"` + UserName string `json:"userName"` // eStargz only + GroupName string `json:"groupName"` // eStargz only + ModTime time.Time `json:"modtime"` + AccessTime time.Time `json:"accesstime"` // Zstd chunked only + ChangeTime time.Time `json:"changetime"` // Zstd chunked only + Mode int64 `json:"mode"` + Size int64 `json:"size"` + Devmajor int64 `json:"devMajor"` + Devminor int64 `json:"devMinor"` + Offset int64 `json:"offset"` + EndOffset int64 `json:"endOffset"` // Zstd chunked only + ChunkSize int64 `json:"chunkSize"` + ChunkOffset int64 `json:"chunkOffset"` + UID int `json:"uid"` + GID int `json:"gid"` } -func (f *file) Close() error { - return nil -} +// Entry types. +const ( + typeDir = `dir` + typeReg = `reg` + typeSymlink = `symlink` + typeHardlink = `hardlink` + typeChar = `char` + typeBlock = `block` + typeFifo = `fifo` + typeChunk = `chunk` +) -func (f *file) Read(b []byte) (int, error) { - return f.r.Read(b) +// NewEntryDir returns a new Entry describing a directory at the path "n". +func newEntryDir(n string) Entry { + return Entry{ + Name: n, + Mode: int64(fs.ModeDir | 0o644), + Type: typeDir, + } } -func (f *file) Stat() (fs.FileInfo, error) { - return f.h.FileInfo(), nil +// SortDirent returns a function suitable to pass to [sort.Slice] as a "cmp" +// function. +// +// This is needed because the [io/fs] interfaces specify that [fs.DirEntry] +// slices returned by the ReadDir methods are sorted lexically. +func sortDirent(s []fs.DirEntry) func(i, j int) bool { + return func(i, j int) bool { + return strings.Compare(s[i].Name(), s[j].Name()) == -1 + } } -var _ fs.ReadDirFile = (*dir)(nil) +// Dirent implements [fs.DirEntry] using a backing [*Entry]. +type dirent struct{ *Entry } -// Dir implements fs.ReadDirFile. -type dir struct { - h *tar.Header - es []fs.DirEntry - pos int -} +// Interface assertion for dirent. +var _ fs.DirEntry = dirent{} -func (*dir) Close() error { return nil } -func (*dir) Read(_ []byte) (int, error) { return 0, io.EOF } -func (d *dir) Stat() (fs.FileInfo, error) { return d.h.FileInfo(), nil } -func (d *dir) ReadDir(n int) ([]fs.DirEntry, error) { - es := d.es[d.pos:] - if len(es) == 0 { - if n == -1 { - return nil, nil - } - return nil, io.EOF - } - end := min(len(es), n) - if n == -1 { - end = len(es) - } - d.pos += end - return es[:end], nil +// Name implements [fs.DirEntry]. +func (d dirent) Name() string { return path.Base(d.Entry.Name) } + +// IsDir implements [fs.DirEntry]. +func (d dirent) IsDir() bool { return d.Entry.Type == typeDir } + +// Type implements [fs.DirEntry]. +func (d dirent) Type() fs.FileMode { return fs.FileMode(d.Entry.Mode) & fs.ModeType } + +// Info implements [fs.DirEntry]. +func (d dirent) Info() (fs.FileInfo, error) { + return &inode{Entry: d.Entry}, nil } -func min(a, b int) int { - if a < b { - return a - } - return b +// File implements [fs.File] and [fs.ReadDirFile]. +// +// The ReadDir method errors if called on a non-dir file. +// The Read methods are implemented by a shared 0-size SectionReader for dir files. +type file struct { + inode + *io.SectionReader + dirent []fs.DirEntry + dirpos int } -type dirent struct{ *tar.Header } +// Interface assertions for file. +var ( + _ fs.ReadDirFile = (*file)(nil) + _ fs.File = (*file)(nil) -var _ fs.DirEntry = dirent{} + // Extra interfaces that we don't *need* to implement, but do for certain + // important use cases (namely reading sqlite databases). + _ io.Seeker = (*file)(nil) + _ io.ReaderAt = (*file)(nil) +) -func (d dirent) Name() string { return filepath.Base(d.Header.Name) } -func (d dirent) IsDir() bool { return d.Header.FileInfo().IsDir() } -func (d dirent) Type() fs.FileMode { return d.Header.FileInfo().Mode() & fs.ModeType } -func (d dirent) Info() (fs.FileInfo, error) { return d.FileInfo(), nil } +// Close implements [fs.File]. +func (f *file) Close() error { return nil } -// SortDirent returns a function suitable to pass to sort.Slice as a "cmp" -// function. -// -// This is needed because the fs interfaces specify that DirEntry slices -// returned by the ReadDir methods are sorted lexically. -func sortDirent(s []fs.DirEntry) func(i, j int) bool { - return func(i, j int) bool { - return strings.Compare(s[i].Name(), s[j].Name()) == -1 +// Stat implements [fs.File]. +func (f *file) Stat() (fs.FileInfo, error) { return &f.inode, nil } + +// ReadDir implements [fs.ReadDirFile]. +func (f *file) ReadDir(n int) ([]fs.DirEntry, error) { + if f.Type != `dir` { + return nil, &fs.PathError{ + Op: `readdir`, + Path: f.Entry.Name, + Err: fs.ErrInvalid, + } } + es := f.dirent[f.dirpos:] + end := min(len(es), n) + switch { + case len(es) == 0 && n <= 0: + return nil, nil + case len(es) == 0 && n > 0: + return nil, io.EOF + case n <= 0: + end = len(es) + default: + } + f.dirpos += end + return es[:end], nil } diff --git a/pkg/tarfs/fs.go b/pkg/tarfs/fs.go new file mode 100644 index 000000000..aca2d8e1c --- /dev/null +++ b/pkg/tarfs/fs.go @@ -0,0 +1,284 @@ +package tarfs + +import ( + "bytes" + "context" + "encoding/binary" + "errors" + "fmt" + "io" + "io/fs" + "os" + "runtime" + + "github.com/quay/zlog" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/metric" +) + +// FS implements [fs.FS] over an optionally compressed tar file. +// +// FS buffers contents as needed. +// [FS.Close] must be called to release any held resources. +type FS struct { + cleanup io.Closer + srv +} + +// Decompressor is an interface that abstracts over the exact compression scheme used to compress chunks. +type decompressor interface { + io.Reader + io.WriterTo + Reset(io.Reader) error +} + +// A bunch of magic constants for zstd. +const ( + zstdFrame = 0xFD2FB528 + zstdSkippableMask = 0xFFFFFFF0 + zstdSkippableFrame = 0x184D2A50 + zstdChunkedFrameMagic = 0x78556E496C556E47 +) + +// Fixed bytes for the gzip member containing the eStargz's TOC. +var gzipHeader = []byte{0x1f, 0x8b, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF} + +// New implements a filesystem abstraction over an [io.ReaderAt] containing: +// +// - An eStargz-compatible tar +// - A zstd:chunked-compatible tar +// - An optionally compressed tar +// +// See the links in the package documentation for descriptions of the "eStargz" and "zstd:chunked" schemes. +// Prioritized files specified by eStargz are ignored by this implementation; all file contents are fetched lazily. +// Contents may be compressed via gzip or zstd. +// +// As an optimization, if "r" is an [os.File] containing an uncompressed tar, it will be used directly without internal buffering. +// If the passed "size" is less than zero, the size of the underlying data will attempt to be automatically determined. +// A nil "buf" can be used, but [ErrFileNeeded] will be returned if a backing file is needed. +func New(ctx context.Context, r io.ReaderAt, size int64, buf *os.File) (*FS, error) { + var ( + // Returned FS + sys FS + // Does this constructor need to bail out? Also used for metrics. + bail = true + // Some metrics: + seekable = false + compressionKind = `unknown` + ) + ctx = zlog.ContextWithValues(ctx, "component", `pkg/tarfs.New`) + ctx, span := tracer.Start(ctx, "New") + defer func() { + // This is gigantic, sorry. + attrs := []attribute.KeyValue{ + attribute.String("compression", compressionKind), + attribute.Bool("seekable", seekable), + attribute.Bool("success", !bail), + } + span.SetAttributes(attrs...) + + if bail { + span.SetStatus(codes.Error, "unsuccessful tarfs creation") + if sys.cleanup != nil { + if err := sys.cleanup.Close(); err != nil { + zlog.Warn(ctx). + AnErr("cleanup", err). + Msg("errors encountered during error return") + span.RecordError(err) + } + } + } else { + span.SetStatus(codes.Ok, "successful tarfs creation") + } + fsCounter.Add(ctx, 1, metric.WithAttributes(attrs...)) + span.End() + }() + + // If passed in a negative number, try to autodetect the size of the reader. + if size < 0 { + switch v := r.(type) { + case interface{ Size() int64 }: + size = v.Size() + case interface{ Stat() (fs.FileInfo, error) }: + fi, err := v.Stat() + if err != nil { + return nil, fmt.Errorf("tarfs: unable to stat: %w", err) + } + size = fi.Size() + case io.Seeker: + var err error + size, err = v.Seek(0, io.SeekEnd) + if err != nil { + return nil, fmt.Errorf("tarfs: unable to seek: %w", err) + } + if _, err := v.Seek(0, io.SeekStart); err != nil { + return nil, fmt.Errorf("tarfs: unable to seek: %w", err) + } + default: + return nil, errors.New("tarfs: unable to determine size of ReaderAt") + } + } + + footer := make([]byte, 64) // Current maximum size for our supported schemes is 51 bytes. + n, err := r.ReadAt(footer, size-int64(len(footer))) + switch { + case errors.Is(err, nil): + case n == len(footer) && errors.Is(err, io.EOF): + default: + return nil, fmt.Errorf("tarfs: unable to read footer: %w", err) + } + + var toc *toc + var dec decompressor + // Examine the footer: + zframe := footer[len(footer)-48:] + isZstd := zstdSkippableFrame == (binary.LittleEndian.Uint32(zframe)&zstdSkippableMask) && + zstdChunkedFrameMagic == binary.LittleEndian.Uint64(zframe[len(zframe)-8:]) + gframe := footer[len(footer)-51:] + switch { + case isZstd: + z := getZstd() + dec = z + compressionKind = `zstd` + seekable = true + toc, err = extractZstdTOC(ctx, r, z, zframe) + case bytes.Equal(gzipHeader, gframe[:len(gzipHeader)]): // isGzip + z := getGzip() + dec = z + compressionKind = `gzip` + seekable = true + toc, err = extractGzipTOC(ctx, r, z, gframe) + default: + // So this isn't a seekable variant we're aware of. + // + // To be extremely cool, try reading a block and see if we can make + // sense of what's there and handle it as a "normal" tar. + header := make([]byte, 512) + switch n, err := r.ReadAt(header, 0); { + case errors.Is(err, nil): // OK + case n == len(header) && errors.Is(err, io.EOF): // Single member? Odd, but not _not_ OK. + default: + return nil, fmt.Errorf("tarfs: unable to read header: %w", err) + } + var unz bool + Loop: + switch { + case bytes.Equal(header[magicOff:][:8], magicOldGNU) || + bytes.Equal(header[magicOff:][:6], magicGNU) || + bytes.Equal(header[magicOff:][:6], magicPAX): + if dec == nil { + compressionKind = `none` + } + case !unz && dec != nil: + // A previous time around this switch populated a decompressor, so + // load a new block. + b := make([]byte, 512) + dec.Reset(io.NewSectionReader(r, 0, -1)) + if _, err = io.ReadFull(dec, b); err != nil { + // err is set + break + } + header = b + unz = true + goto Loop + case unz: + err = parseErr("unknown kind") + case zstdFrame == binary.LittleEndian.Uint32(header): + dec = getZstd() + compressionKind = `zstd` + goto Loop + case bytes.Equal(gzipHeader[:2], header[:2]): // See RFC1952 2.3.1 for why "2". + dec = getGzip() + compressionKind = `gzip` + goto Loop + } + if err != nil { + return nil, fmt.Errorf("tarfs: error examining standard tar: %w", err) + } + // Now that we're here, the following invariants hold: + // + // - The uncompressed data from the passed-in ReaderAt *is* a tar, to + // our satisfaction. + // - We need to construct the TOC. + // + // Since we need to do a linear read to construct the TOC (as there's no + // way to tell if the compression frames are amenable to our access + // pattern), we may as well buffer the whole thing. This is + // special-cased so that we can read through the stream as it's being + // decompressed. + toc, buf, err = buildTOC(ctx, r, dec, buf) + if err != nil { + return nil, fmt.Errorf("tarfs: unable to build TOC: %w", err) + } + err = sys.init(buf, toc.Entry, inodeIdent) + } + if err != nil { + return nil, fmt.Errorf("tarfs: error initializing FS: %w", err) + } + if toc.Version != 1 { + return nil, errors.New("unsupported version") + } + + if seekable { + if buf == nil { + return nil, ErrFileNeeded + } + d, err := newDiskBuf(r, dec, sys.inodeByIdx, buf) + if err != nil { + return nil, fmt.Errorf("tarfs: unable to create disk buffer: %w", err) + } + sys.cleanup = d + if err := sys.init(d.buf, toc.Entry, d.fetchFile); err != nil { + return nil, fmt.Errorf("tarfs: error initializing FS: %w", err) + } + } + + _, file, line, _ := runtime.Caller(1) + runtime.SetFinalizer(&sys, func(sys *FS) { + panic(fmt.Sprintf("%s:%d: FS not closed", file, line)) + }) + profile.Add(&sys, 1) + bail = false + return &sys, nil +} + +// Close releases any held resources and reports errors encountered while doing so. +// +// Failing to call Close or calling Close on an instance that was not returned by [New] may result in the program panicing. +func (s *FS) Close() error { + runtime.SetFinalizer(s, nil) + profile.Remove(s) + if s.cleanup != nil { + return s.cleanup.Close() + } + return nil +} + +// Interface assertions for FS. +var ( + _ fs.FS = (*FS)(nil) + _ fs.GlobFS = (*FS)(nil) + _ fs.ReadDirFS = (*FS)(nil) + _ fs.ReadFileFS = (*FS)(nil) + _ fs.StatFS = (*FS)(nil) + // Skipped implementing [fs.SubFS], as sharing the backing buffer + // would be complicated and probably end up needing a file lock on it. +) + +// Assert the two openFunc implementations fulfill the type. +var ( + _ openFunc = (*diskBuf)(nil).fetchFile + _ openFunc = inodeIdent +) + +// InodeIdent is an [openFunc] that uses the values in the passed [inode]. +func inodeIdent(r inode) (off, sz int64, err error) { + return r.Offset, r.Entry.Size, nil +} + +// Toc is a table of contents. +type toc struct { + Entry []Entry `json:"entries"` + Version int `json:"version"` +} diff --git a/pkg/tarfs/metrics.go b/pkg/tarfs/metrics.go new file mode 100644 index 000000000..62a59b94f --- /dev/null +++ b/pkg/tarfs/metrics.go @@ -0,0 +1,37 @@ +package tarfs + +import ( + "runtime/pprof" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/trace" +) + +// Metrics singletons. +var ( + tracer trace.Tracer + meter metric.Meter +) + +// FsCounter is the metrics for the [New] function. +var fsCounter metric.Int64Counter + +// Profile is a [pprof.Profile] for tracking FS objects. +var profile *pprof.Profile + +func init() { + const pkgname = `github.com/quay/claircore/pkg/tarfs` + tracer = otel.Tracer(pkgname) + meter = otel.Meter(pkgname) + profile = pprof.NewProfile(pkgname + ".FS") + + var err error + fsCounter, err = meter.Int64Counter("fs.creation.count", + metric.WithDescription("total number of tarfs.FS objects constructed"), + metric.WithUnit("{instance}"), + ) + if err != nil { + panic(err) + } +} diff --git a/pkg/tarfs/parse.go b/pkg/tarfs/parse.go index 30c71f498..93f095c1e 100644 --- a/pkg/tarfs/parse.go +++ b/pkg/tarfs/parse.go @@ -3,203 +3,246 @@ package tarfs import ( "archive/tar" "bytes" + "context" + "encoding/binary" + "encoding/json" "errors" "fmt" "io" + "io/fs" + "os" "strconv" -) -// The value we should find in the "magic" position of the tar header. -var ( - magicPAX = []byte("ustar\x00") - magicGNU = []byte("ustar ") - magicOldGNU = []byte("ustar \x00") + "github.com/klauspost/compress/gzip" + "github.com/klauspost/compress/zstd" + "go.opentelemetry.io/otel/attribute" ) -// ErrFormat can be compared via [errors.Is] against errors reported by [New] -// to determine if the tar fail is considered well-formed. +// ErrFormat can be compared via [errors.Is] against errors reported by +// [New] to determine if the tar file or relevant footer is considered +// well-formed. var ErrFormat = errors.New("tarfs: format error reading file") -// ParseErr returns an error that .Is reports true for ErrFormat. +// ParseErr returns an error that [errors.Is] reports true for [ErrFormat]. // // The `%w` verb does not work. func parseErr(f string, v ...interface{}) error { return parseError(fmt.Sprintf(f, v...)) } +// ParseError is the concrete type out of [parseErr]. type parseError string func (e parseError) Is(tgt error) bool { return tgt == ErrFormat } func (e parseError) Error() string { return string(e) } -// FindSegments looks at a tar blockwise to establish where individual files and -// their headers are stored. Each returned segment describes a region that is -// not a complete tar file, but can have exactly one file read from it. -func findSegments(r io.ReaderAt) ([]segment, error) { - // Constants and offsets from POSIX. - const ( - blockSz = 512 - magicOff = 257 - versionOff = 263 - typeflag = 156 - sizeOff = 124 - ) - b := make([]byte, blockSz) - var ret []segment - // Start block of the current segment. - var cur int64 - // Block number being read. - var blk int64 - // Has the parser seen a zeroes block. - var zeroes bool - -Scan: - for { - off := blk * blockSz - n, err := r.ReadAt(b, off) - switch { - case errors.Is(err, nil) && n != blockSz: - // Should be impossible with a well-formed archive, so raise an - // error. Should also be impossible with a conforming [io.ReaderAt]. - return nil, parseErr("short read at offset: %d (got: %d, want: %d)", off, n, blockSz) - case errors.Is(err, nil): // OK - case errors.Is(err, io.EOF): - switch { - case n == 0: - // Early EOF on a block boundary. Let it slide. - // - // Handle this case because some layers in the wild are a single - // directory block, with no trailer. - break Scan - case n == blockSz: - // Make sure to process the read, even if EOF was returned. - default: - return nil, parseErr("unexpected EOF at %d: %v", off, err) - } - default: - return nil, err - } +// ErrFileNeeded is reported when an [os.File] is needed for buffering tar contents, but has not been provided. +var ErrFileNeeded = errors.New("tarfs: *os.File needed but not provided") + +// BuildTOC builds a table of contents from the optionally-compressed bytes in the [io.ReaderAt] pointed to by r. +// Because we may have to decompress the bytes and have to do a linear read anyway, immediately buffer the contents. +// +// This passes the result back out to re-use the codepath for when the TOC is separable from the archive. +// The returned [os.File] is either "r" or "buf", depending on if "r" is backed by an [os.File]. +func buildTOC(ctx context.Context, r io.ReaderAt, dec decompressor, buf *os.File) (*toc, *os.File, error) { + _, span := tracer.Start(ctx, "buildTOC") + defer span.End() + // This copies the passed in bytes, optionally eliding the copy entirely if + // an *os.File was passed in at the start. + _, isFile := r.(*os.File) + if !isFile && buf == nil { + return nil, nil, ErrFileNeeded + } - magic := b[magicOff:][:6] - zeroBlock := true - for _, b := range b { - if b != 0x00 { - zeroBlock = false - break - } + var pos io.Seeker = buf + var tr *tar.Reader + switch { + case dec != nil: + if err := dec.Reset(io.NewSectionReader(r, 0, -1)); err != nil { + return nil, nil, err } - switch { - // Tar files end with two blocks of zeroes. These two arms track that. - case !zeroes && zeroBlock: - zeroes = true - continue - case zeroes && zeroBlock: - // Check for a valid second zeroes block. - break Scan - case zeroes && !zeroBlock: - // Found the first trailer block, but not the second. - return nil, parseErr("bad block at %d: expected second trailer block", off) - // These arms are belt-and-suspenders to make sure we're reading a - // header block and not a contents block, somehow. - case bytes.Equal(b[magicOff:][:8], magicOldGNU): - // OldGNU madness. This arm matching means the headers aren't - // actually POSIX conforming, but hopefully it's not an issue. Just - // roll with it. USTAR was standardized in 1988; frankly, it's the - // creator's fault if something doesn't work right because there's - // some incompatibility. - case !bytes.Equal(magic, magicPAX) && !bytes.Equal(magic, magicGNU): - return nil, parseErr("bad block at %d: got magic %+q", off, magic) - case !bytes.Equal(b[versionOff:][:2], []byte("00")): - return nil, parseErr("bad block at %d: got version %+q", off, b[versionOff:][:2]) + tr = tar.NewReader(io.TeeReader(dec, buf)) + case isFile: + span.AddEvent("using ReaderAt directly") + // Use the backing File directly. + buf = r.(*os.File) + pos = buf + if _, err := pos.Seek(0, io.SeekStart); err != nil { + return nil, nil, err } - encSz := b[sizeOff:][:12] - sz, err := parseNumber(encSz) + tr = tar.NewReader(buf) + default: + span.AddEvent("no decompression") + tr = tar.NewReader(io.TeeReader(io.NewSectionReader(r, 0, -1), buf)) + } + + var toc toc + toc.Version = 1 + toc.Entry = make([]Entry, 0, 4096) // Guess at initial capacity. + h, err := tr.Next() + for ; err == nil; h, err = tr.Next() { + off, err := pos.Seek(0, io.SeekCurrent) if err != nil { - return nil, parseErr("invalid number: %024x: %v", encSz, err) + return nil, nil, parseErr("tarfs: error reading header: %v", err) } - nBlk := sz / blockSz - if sz%blockSz != 0 { - nBlk++ - } - blk++ // Current header block - blk += nBlk // File contents - switch b[typeflag] { - case tar.TypeXHeader, tar.TypeGNULongLink, tar.TypeGNULongName, tar.TypeGNUSparse: - // All these are prepended to a "real" entry. - case tar.TypeBlock, tar.TypeChar, tar.TypeCont, tar.TypeDir, tar.TypeFifo, tar.TypeLink, tar.TypeReg, tar.TypeRegA, tar.TypeSymlink: - // Found a data block, emit it: - ret = append(ret, segment{start: cur * blockSz, size: (blk - cur) * blockSz}) - fallthrough + // Turn the tar.Header into an Entry + i := len(toc.Entry) + toc.Entry = append(toc.Entry, Entry{}) + e := &toc.Entry[i] + e.Offset = off + // Copy everything over: + e.Size = h.Size + // Callee does path normalization. + e.Name = h.Name + e.Linkname = h.Linkname + e.ModTime = h.ModTime + e.AccessTime = h.AccessTime + e.ChangeTime = h.ChangeTime + e.UserName = h.Uname + e.GroupName = h.Gname + e.UID = h.Uid + e.GID = h.Gid + e.Mode = h.Mode + e.Devmajor = h.Devmajor + e.Devminor = h.Devminor + switch h.Typeflag { + case tar.TypeReg: + e.Type = typeReg + case tar.TypeLink: + e.Type = typeHardlink + case tar.TypeDir: + e.Type = typeDir + e.Mode |= int64(fs.ModeDir) + case tar.TypeSymlink: + e.Type = typeSymlink + e.Mode |= int64(fs.ModeSymlink) + case tar.TypeChar: + e.Type = typeChar + e.Mode |= int64(fs.ModeDevice) + e.Mode |= int64(fs.ModeCharDevice) + case tar.TypeBlock: + e.Type = typeBlock + e.Mode |= int64(fs.ModeDevice) + case tar.TypeFifo: + e.Type = typeFifo + e.Mode |= int64(fs.ModeNamedPipe) default: - // any blocks not enumerated are not handled. - cur = blk + return nil, nil, fmt.Errorf("tarfs: unknown kind: %v", h.Typeflag) } } - return ret, nil -} + if !errors.Is(err, io.EOF) { + return nil, nil, parseErr("tarfs: error reading header: %v", err) + } + span.SetAttributes(attribute.Int("entries", len(toc.Entry))) -// Segment describes one file in a tar, including relevant headers. -type segment struct { - start int64 - size int64 + return &toc, buf, nil } -// ParseNumber extracts a number from the encoded form in the tar header. -// -// This is based on the internal version in archive/tar. -func parseNumber(b []byte) (int64, error) { - // If in binary format, decode it. - if len(b) > 0 && b[0]&0x80 != 0 { - // See also: src/archive/tar/strconv.go - // Handling negative numbers relies on the following identity: - // -a-1 == ^a - // - // If the number is negative, we use an inversion mask to invert the - // data bytes and treat the value as an unsigned number. - var inv byte // 0x00 if positive or zero, 0xff if negative - if b[0]&0x40 != 0 { - inv = 0xff - } +/* +NOTE(hank) The eStargz format differs from the stargz format in where the tar headers are placed relative to the gzip headers. +In stargz, the tar headers are inside the gzip header, so readers can extract individual members knowing only the offset. +The eStargz format implemented here has gzip headers "inside" the tar members, and so the length needs to be known beforehand. +This format results in every member having at least one extra tar block describing the next member, except for the first (just a header) and last (just data). +The TOC is specially defined to be a whole tar by itself. +The zstd chunked scheme is largely the same, but places the TOC in a skippable frame (a concept nonexistent in gzip) instead of as a tar member. +*/ - var x uint64 - for i, c := range b { - c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing - if i == 0 { - c &= 0x7f // Ignore signal bit in first byte - } - if (x >> 56) > 0 { - return 0, errors.New("integer overflow") - } - x = x<<8 | uint64(c) - } - if (x >> 63) > 0 { - return 0, errors.New("integer overflow") - } - if inv == 0xff { - return ^int64(x), nil - } - return int64(x), nil +// This limit copied out of the containers/storage code. +// 50MiB seems way too big but it's better than not having it. +// 🤷 +const bufLimit = (1 << 20) * 50 + +// ExtractZstdTOC pulls the table of contents out of the skippable frame as indicated in the footer. +func extractZstdTOC(ctx context.Context, r io.ReaderAt, z *zstd.Decoder, footer []byte) (*toc, error) { + const crfsKind = 1 + ctx, span := tracer.Start(ctx, "extractZstdTOC") + defer span.End() + + var h zstd.Header + if err := h.Decode(footer); err != nil { + return nil, err + } + if !h.Skippable || h.SkippableID != 0 { + return nil, parseErr("martian zstd frame") } - // Otherwise, it's stringified. - b = bytes.Trim(b, " \x00") - if len(b) == 0 { - return 0, nil + if h.SkippableSize != 40 { + return nil, parseErr("unexpected frame size") } - n, err := strconv.ParseUint(cstring(b), 8, 64) + + b := footer[h.HeaderSize:] + offset := int64(binary.LittleEndian.Uint64(b[0:8])) + length := binary.LittleEndian.Uint64(b[8:16]) + if length > bufLimit { + return nil, errors.New("manifest too big") + } + lengthUncompressed := binary.LittleEndian.Uint64(b[16:24]) + if lengthUncompressed > bufLimit { + return nil, errors.New("manifest too big") + } + if kind := binary.LittleEndian.Uint64(b[24:32]); kind != crfsKind { + return nil, parseErr("invalid manifest kind") + } + + src, dst := make([]byte, length), make([]byte, 0, lengthUncompressed) + n, err := r.ReadAt(src, offset) + switch { + case errors.Is(err, nil): + case n == len(src) && errors.Is(err, io.EOF): + default: + return nil, fmt.Errorf("tarfs: unable to read manifest: %w", err) + } + + dst, err = z.DecodeAll(src, dst) if err != nil { - return 0, err + return nil, fmt.Errorf("tarfs: unable to decompress manifest: %w", err) } - return int64(n), nil + + var toc toc + if err := json.Unmarshal(dst, &toc); err != nil { + return nil, fmt.Errorf("tarfs: unable to decode manifest: %w", err) + } + span.SetAttributes(attribute.Int("entries", len(toc.Entry))) + return &toc, nil } -// Cstring interprets the byte slice as a C string. If there is no NULL, it -// returns the entire slice as a string. -// -// The entire-slice behavior handles the case where a fixed size header field is -// fully populated. -func cstring(b []byte) string { - if i := bytes.IndexByte(b, 0); i >= 0 { - return string(b[:i]) +// ExtractGzipTOC pulls the table of contents out of the tar archive as indicated in the footer. +func extractGzipTOC(ctx context.Context, r io.ReaderAt, z *gzip.Reader, footer []byte) (*toc, error) { + ctx, span := tracer.Start(ctx, "extractGzipTOC") + defer span.End() + if err := z.Reset(bytes.NewReader(footer)); err != nil { + return nil, err + } + + b := z.Extra + // The main deviation from the stargz footer is the addition of the following 4 bytes to make the extra data RFC1952 compliant. + // The go "gzip" package doesn't bother with this framing, so that's probably where the initial (mis)use came from. + if b[0] != 'S' || b[1] != 'G' || + binary.LittleEndian.Uint16(b[2:4]) != 22 || + !bytes.Equal(b[20:26], []byte("STARGZ")) { + return nil, parseErr("invalid extra field") + } + offset, err := strconv.ParseInt(string(b[4:20]), 16, 64) + if err != nil { + return nil, parseErr("unable to parse offset: %v", err) + } + + // Abuse a section reader to get a cursor over the ReaderAt. + if err := z.Reset(io.NewSectionReader(r, offset, bufLimit)); err != nil { + return nil, err + } + tr := tar.NewReader(z) + h, err := tr.Next() + switch { + case err != nil: + return nil, err + case h.Name != `stargz.index.json`: + return nil, parseErr("found unexpected file: %v", h.Name) + } + + var toc toc + if err := json.NewDecoder(tr).Decode(&toc); err != nil { + return nil, fmt.Errorf("tarfs: unable to decode manifest: %w", err) } - return string(b) + span.SetAttributes(attribute.Int("entries", len(toc.Entry))) + return &toc, nil } diff --git a/pkg/tarfs/pool.go b/pkg/tarfs/pool.go new file mode 100644 index 000000000..cd2aef005 --- /dev/null +++ b/pkg/tarfs/pool.go @@ -0,0 +1,60 @@ +package tarfs + +import ( + "fmt" + "sync" + + "github.com/klauspost/compress/gzip" + "github.com/klauspost/compress/zstd" +) + +// TODO(hank) Remove these wrapper functions when a generic sync.Pool lands. + +// GetCopyBuf pulls a buffer from the pool. +func getCopyBuf() []byte { + b := bufpool.Get() + if b == nil { + // Allocate 1 MiB buffers to start. + // This is much too big for small files and much too small for big files. + return make([]byte, 1024*1024) + } + return b.([]byte) +} + +// PutCopyBuf returns a buffer to the pool. +func putCopyBuf(b []byte) { bufpool.Put(b) } + +// Getzstd pulls an initialized decoder from the pool. +func getZstd() *zstd.Decoder { + d := zstdpool.Get() + if d == nil { + var err error + if d, err = zstd.NewReader(nil); err != nil { + // Should *never* happen -- a nil Reader causes only internal setup allocations. + panic(fmt.Sprintf("error creating zstd reader: %v", err)) + } + } + return d.(*zstd.Decoder) +} + +// PutZstd returns a decoder to the pool. +func putZstd(d *zstd.Decoder) { zstdpool.Put(d) } + +// GetGzip pulls an initialized reader from the pool. +func getGzip() *gzip.Reader { + r := gzippool.Get() + if r == nil { + return new(gzip.Reader) + } + return r.(*gzip.Reader) +} + +// PutGzip returns a reader to the pool. +func putGzip(r *gzip.Reader) { gzippool.Put(r) } + +// Package-level pools for the respective objects. +var ( + bufpool sync.Pool + zstdpool sync.Pool + gzippool sync.Pool +) diff --git a/pkg/tarfs/randomaccess.go b/pkg/tarfs/randomaccess.go new file mode 100644 index 000000000..60829805a --- /dev/null +++ b/pkg/tarfs/randomaccess.go @@ -0,0 +1,203 @@ +package tarfs + +import ( + "bytes" + "context" + "crypto/sha256" + "crypto/sha512" + "encoding/hex" + "fmt" + "hash" + "io" + "io/fs" + "os" + "runtime" + "strings" + "sync" + + "github.com/klauspost/compress/gzip" + "github.com/klauspost/compress/zstd" + "golang.org/x/sync/semaphore" +) + +// DiskBuf encapsulates all the logic for random access and decompression from +// an upstream [io.ReaderAt] into a locally-backed buffer. +type diskBuf struct { + // All "buf" reads/writes are done by *At methods, meaning the file only needs + // to be locked when resizing it. + resizeMu sync.Mutex + upstream io.ReaderAt + dec decompressor + chunk func(int) inode + buf *os.File + sem *semaphore.Weighted +} + +// NewDiskBuf allocates a disk-backed buffer. +// +// The passed-in [os.File] is not closed with the diskBuf. +func newDiskBuf(r io.ReaderAt, z decompressor, chunk func(int) inode, spool *os.File) (*diskBuf, error) { + d := diskBuf{ + upstream: r, + dec: z, + chunk: chunk, + buf: spool, + sem: semaphore.NewWeighted(int64(runtime.GOMAXPROCS(0))), + } + return &d, nil +} + +// FetchFile reads compressed data from the backing [io.ReaderAt] and decompresses +// it into a contiguous section of the backing buffer. The buffer has no maximum +// size and is never re-used; that is to say, the maximum size is about the same +// as if the data were downloaded and decompressed in its entirety. +// +// The buffering could be implemented a few different ways transparently: +// +// - Each opened file could get its own backing file buffer that's removed once all +// opened [fs.File] implementations for it are Closed. +// - The backing file could be used as a ring buffer once it hits some size, +// at the cost of inserting file headers and then needing to seek to the next open +// section. +// - The ring buffer idea could also shrink down, if a large file pushed it over +// the target size and was then closed. +// +// All of these would require some reference counting and in the case of the ring-buffer, +// a scavenger routine to collapse and potentially move segments. The execution time +// may be able to be mitigated with fs-specific optimizations, but the need for any +// of these strategies should be demonstrated first. +// +// Multi-chunk files are handled transparently, but not implemented in the zstd:chunked +// format as implemented by the github.com/containers modules. +func (d *diskBuf) fetchFile(r inode) (off, sz int64, err error) { + const op = `fetchfile` + d.sem.Acquire(context.Background(), 1) + defer d.sem.Release(1) + + // NB Weirdness for correct locking without extra scope. + d.resizeMu.Lock() + off, err = d.buf.Seek(0, io.SeekCurrent) + if err == nil { + err = d.buf.Truncate(off + int64(r.Entry.Size)) + } + if err == nil { + _, err = d.buf.Seek(0, io.SeekEnd) + } + d.resizeMu.Unlock() + if err != nil { + return -1, -1, &fs.PathError{ + Op: op, + Path: r.Name(), + Err: err, + } + } + sz = int64(r.Entry.Size) + + fhash := io.Discard + cksum, hasher, err := toCheck(r.Entry.Digest) + // Any parse error from this optional property should be ignored. + // Any use of "cksum" or "hasher" needs to be checked that they're + // not nil. + if err == nil { + fhash = hasher + } + // From here on out, reset the file cursor on error cases. + chunks := []inode{r} + for _, ei := range r.chunk { + chunks = append(chunks, d.chunk(ei)) + } + + // Using a bigger buffer here allows for the underlying ReaderAt to be + // dumber. This is expected to be backed by making HTTP requests, where the + // latency is much worse than disk. As such, it makes sense to do fewer, + // bigger reads rather than many tiny reads. A bigger buffer here results + // in bigger range requests to the underlying ReaderAt, and means it + // (hopefully) won't need prefetching logic in an HTTP-backed + // implementation. + cp := getCopyBuf() + defer putCopyBuf(cp) + + // This src, dst pair works by swapping out what compressed bytes are being + // fed into the decoder. Every chunk resets the reader to pull from a new + // section of compressed source, then opportunistically validates the + // decompressed data chunk-wise. + src := io.TeeReader(io.LimitReader(d.dec, sz), fhash) + dst := io.NewOffsetWriter(d.buf, off) + for n, e := range chunks { + zsrc := io.NewSectionReader(d.upstream, e.Offset, -1) // abuse a section reader to get a cursor + if err := d.dec.Reset(zsrc); err != nil { + return -1, -1, &fs.PathError{ + Op: op, + Path: r.Name(), + Err: err, + } + } + hasher := io.Discard + cksum, hash, err := toCheck(e.ChunkDigest) + if err == nil { // Zstd:chunked doesn't treat this as a mandatory property. + hasher = hash + } + if _, err := io.CopyBuffer(dst, io.TeeReader(src, hasher), cp); err != nil { + return -1, -1, &fs.PathError{ + Op: op, + Path: r.Name(), + Err: err, + } + } + if cksum != nil && !bytes.Equal(cksum, hash.Sum(nil)) { + return -1, -1, &fs.PathError{ + Op: op, + Path: r.Name(), + Err: fmt.Errorf("failed to validate chunk %d: got: %q, want: %q", + n, hex.EncodeToString(hash.Sum(nil)), hex.EncodeToString(cksum)), + } + } + } + + if cksum != nil && !bytes.Equal(cksum, hasher.Sum(nil)) { + return -1, -1, &fs.PathError{ + Op: op, + Path: r.Name(), + Err: fmt.Errorf("failed to validate file: got: %q, want: %q", + hex.EncodeToString(hasher.Sum(nil)), hex.EncodeToString(cksum)), + } + } + + return off, sz, nil +} + +// Close puts decompressors back into their pools and closes the buffer file. +func (d *diskBuf) Close() error { + switch z := d.dec.(type) { + case *gzip.Reader: + putGzip(z) + case *zstd.Decoder: + putZstd(z) + default: + panic(fmt.Sprintf("programmer error: unknown decompressor type %T", z)) + } + return nil +} + +// ToCheck takes an OCI-like digest string and returns the binary checksum and a +// hasher using the same algorithm. +func toCheck(d string) ([]byte, hash.Hash, error) { + alg, enc, ok := strings.Cut(d, ":") + if !ok { + return nil, nil, fmt.Errorf("invalid digest %q", d) + } + var h hash.Hash + switch alg { + case `sha256`: + h = sha256.New() + case `sha512`: + h = sha512.New() + default: + return nil, nil, fmt.Errorf("unknown algorithm: %q", alg) + } + b, err := hex.DecodeString(enc) + if err != nil { + return nil, nil, err + } + return b, h, nil +} diff --git a/pkg/tarfs/seekable_test.go b/pkg/tarfs/seekable_test.go new file mode 100644 index 000000000..b8407e7d6 --- /dev/null +++ b/pkg/tarfs/seekable_test.go @@ -0,0 +1,122 @@ +package tarfs_test + +// These tests need to be in a separate package in order to prevent a cycle. + +import ( + "bytes" + "context" + "io/fs" + "os" + "path/filepath" + "testing" + + "github.com/quay/zlog" + + "github.com/quay/claircore" + "github.com/quay/claircore/pkg/tarfs" + "github.com/quay/claircore/test/fetch" +) + +type seekableTestcase struct { + Name string + Registry, Namespace string + Layer claircore.Digest + Check []checkFunc +} +type checkFunc func(*testing.T, fs.FS) + +func (tc seekableTestcase) Run(ctx context.Context) func(*testing.T) { + return func(t *testing.T) { + t.Helper() + t.Parallel() + ctx = zlog.Test(ctx, t) + f, err := fetch.Layer(ctx, t, + tc.Registry, tc.Namespace, tc.Layer, + fetch.NoDecompression) + if err != nil { + t.Fatal(err) + } + defer f.Close() + fi, err := f.Stat() + if err != nil { + t.Fatal(err) + } + buf, err := os.Create(filepath.Join(t.TempDir(), filepath.Base(t.Name()))) + if err != nil { + t.Error(err) + } + t.Cleanup(func() { + if err := buf.Close(); err != nil { + t.Error(err) + } + }) + + sys, err := tarfs.New(ctx, f, fi.Size(), buf) + if err != nil { + t.Fatal(err) + } + defer sys.Close() + // Do a walk, unconditionally. + fs.WalkDir(sys, ".", func(p string, d fs.DirEntry, err error) error { + if err != nil { + t.Error(err) + } + // t.Log(p) + return nil + }) + for _, f := range tc.Check { + f(t, sys) + } + } +} + +func TestSeekable(t *testing.T) { + ctx := context.Background() + t.Parallel() + for _, tc := range []seekableTestcase{ + { + Name: "eStargz", + // layer from docker://ghcr.io/stargz-containers/fedora:30-esgz + Registry: "ghcr.io", + Namespace: "stargz-containers/fedora", + Layer: claircore.MustParseDigest(`sha256:a29c6008f8735dd289a374dabb8a277f6bbb8922d921a9c89861794196d6074c`), + Check: []checkFunc{ + func(t *testing.T, sys fs.FS) { + b, err := fs.ReadFile(sys, `etc/os-release`) + if err != nil { + t.Error(err) + return + } + if !bytes.Contains(b, []byte(`CPE_NAME="cpe:/o:fedoraproject:fedora:30"`)) { + t.Logf("seemingly garbled contents: %+q", string(b)) + t.Fail() + } + t.Log("etc/os-release: OK") + }, + }, + }, + { + Name: "zstd:chunked", + // layer from docker://docker.io/gscrivano/zstd-chunked:fedora + Registry: "docker.io", + Namespace: "gscrivano/zstd-chunked", + Layer: claircore.MustParseDigest(`sha256:9970d86e7cb7a3c7ee0a3c8fc2131880b387bc5fe8022a258b456ab2cda4303f`), + Check: []checkFunc{ + func(t *testing.T, sys fs.FS) { + b, err := fs.ReadFile(sys, `etc/os-release`) + if err != nil { + t.Error(err) + return + } + if !bytes.Contains(b, []byte(`CPE_NAME="cpe:/o:fedoraproject:fedora:35"`)) { + t.Logf("seemingly garbled contents: %+q", string(b)) + t.Fail() + } + t.Log("etc/os-release: OK") + }, + }, + }, + } { + t.Run(tc.Name, tc.Run(ctx)) + } +} diff --git a/pkg/tarfs/srv.go b/pkg/tarfs/srv.go new file mode 100644 index 000000000..bb37b48c9 --- /dev/null +++ b/pkg/tarfs/srv.go @@ -0,0 +1,553 @@ +package tarfs + +import ( + "fmt" + "io" + "io/fs" + "math/big" + "path" + "path/filepath" + "sort" + "strings" + "sync" + "time" +) + +// Srv is the struct handling all the mapping for file data. +// +// I couldn't think of a good name for this, so "server" because it serves file data. +// This is meant to be embedded, then filled in with hooks depending on the format that's being read out of the backing reader. +// +// This whole dance is complicated, because we have subtly different data flows that we want to abstract away: +// - block-based "dumb" data requests +// - whole-file requests. +// +// We also need to optionally do decompresison which means in the dumb case, there's no knowing where block boundaries actually lie. +type srv struct { + // Open is a hook function that's called once for every regular file in + // order for the embedding struct to make file data available in "r" and + // report the offset and size. + open openFunc + // Raw contains uncompressed, ready-to-read file data. + raw io.ReaderAt + // Lookup is a map of strings to indexes in the following slices. + lookup map[string]int + // Mu guards the following members. + // This is only used once the object is fully initialized. + mu sync.RWMutex + // Initd is a bitset tracking initialized meta elements. + initd big.Int + // The following slices are file metadata, split into externally-visible and + // internal-only components. + entry []Entry + meta []meta +} + +// Interface assertions for srv. +var ( + _ fs.FS = (*srv)(nil) + _ fs.GlobFS = (*srv)(nil) + _ fs.ReadDirFS = (*srv)(nil) + _ fs.ReadFileFS = (*srv)(nil) + _ fs.StatFS = (*srv)(nil) + // Skipped implementing [fs.SubFS], as sharing the backing buffer would be complicated. +) + +// OpenFunc is the hook for embedders of srv to make data ready in the ReaderAt and report the size and offset. +type openFunc func(inode) (offset, size int64, err error) + +// Inode is a fake inode(7)-like structure for keeping track of filesystem entries. +// +// Inode implements [fs.FileInfo]. +// +// Inode ties together the immutable [Entry] and the internal book-keeping [meta] structs. +// Any given inode is not unique, but the structs pointed to are unique and are shared. +type inode struct { + *Entry + *meta + // N is the index data is kept at in the parent srv's members. + N int +} + +// Interface assertion for inode. +var _ fs.FileInfo = (*inode)(nil) + +// Name implements [fs.FileInfo]. +func (i *inode) Name() string { return path.Base(i.Entry.Name) } + +// Size implements [fs.FileInfo]. +func (i *inode) Size() int64 { return int64(i.Entry.Size) } + +// Mode implements [fs.FileInfo]. +func (i *inode) Mode() fs.FileMode { return fs.FileMode(i.Entry.Mode) } + +// ModTime implements [fs.FileInfo]. +func (i *inode) ModTime() time.Time { return i.Entry.ModTime } + +// IsDir implements [fs.FileInfo]. +func (i *inode) IsDir() bool { return i.Type == typeDir } + +// Sys implements [fs.FileInfo]. +func (i *inode) Sys() interface{} { return i.Entry } + +// Meta is internal book-keeping for file entries. +// +// This is kept alongside the entries to cut-down on duplication between the external [Entry] type and an internal type that would have the same information along with book-keeping fields. +type meta struct { + children map[int]struct{} + chunk []int + off, sz int64 +} + +// Init initializes the srv. +func (s *srv) init(r io.ReaderAt, es []Entry, open openFunc) error { + sz := len(es) + 1 + s.lookup = make(map[string]int, sz) + s.entry = make([]Entry, 0, sz) + s.meta = make([]meta, 0, sz) + s.initd.SetUint64(0) + s.initd.SetBit(&s.initd, sz, 0) + s.raw = r + s.open = open + + links := make(map[string][]string) + // Mkdir the root: + const root = `.` + i := len(s.entry) + s.entry = append(s.entry, newEntryDir(root)) + s.meta = append(s.meta, meta{ + children: make(map[int]struct{}), + }) + s.lookup[root] = i + // This loop needs to copy the Entries, because we may need to create entries. + // That shouldn't happen in the seekable variants but *does* happen in normal tars. + for _, e := range es { + e.Name = normPath(e.Name) // Normpath for good measure. + switch e.Type { + case typeDir: + // Hack to avoid going into the whole add path. + if _, ok := s.lookup[e.Name]; ok { + continue + } + case typeSymlink: + // Unsure what's allowed in this field. + if !path.IsAbs(e.Linkname) { + e.Linkname = path.Join(path.Dir(e.Name), e.Linkname) + } + fallthrough + case typeHardlink: + e.Linkname = normPath(e.Linkname) + } + if err := s.add(e, links); err != nil { + return err + } + } + // Cleanup any dangling hardlinks. + for _, rms := range links { + for _, rm := range rms { + idx := s.lookup[rm] + delete(s.lookup, rm) + p := s.meta[s.lookup[path.Dir(rm)]] + delete(p.children, idx) + } + } + return nil +} + +// Add does what it says on the tin. +// +// In addition, it creates any needed leading directory elements. +// The caller should check for the existence of an "out of order" directory, as this function attempts to follow the POSIX spec on actions when "creating" a file that already exists: +// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap01.html#tagtcjh_14 +// +// The "hardlink" map is used for deferring hardlink creation. +func (s *srv) add(ne Entry, hardlink map[string][]string) error { + const op = `create` + name := ne.Name +Again: + if i, ok := s.lookup[name]; ok { + e := &s.entry[i] + et, nt := e.Type, ne.Type + switch { + case nt == typeChunk: + // Chunks only exist in the seekable variants. + ni := len(s.entry) + s.entry = append(s.entry, ne) + s.meta = append(s.meta, meta{}) + m := &s.meta[i] + m.chunk = append(m.chunk, ni) + case nt != typeReg: + // If the new type isn't a regular file, fail. + return &fs.PathError{ + Op: op, + Path: name, + Err: fmt.Errorf("new type (%s) cannot replace existing type (%s): %w", nt, et, fs.ErrExist), + } + case et == typeDir: + // If the existing type is a directory, fail. + return &fs.PathError{ + Op: op, + Path: name, + Err: fmt.Errorf("new file cannot replace directory: %w", fs.ErrExist), + } + case et == typeSymlink: // Follow the link target. + name = e.Linkname + goto Again + } + // Should be OK to replace now. + // Shadow the previous inode so we don't have to renumber everything. + s.entry[i] = ne + return nil + } + + // Hardlink handling: if the target doesn't exist yet, make a note in passed-in map. + if ne.Type == typeHardlink { + tgt := ne.Linkname + if _, ok := s.lookup[tgt]; !ok { + hardlink[tgt] = append(hardlink[tgt], name) + } + } + delete(hardlink, name) + i := len(s.entry) + s.entry = append(s.entry, ne) + s.meta = append(s.meta, meta{}) + if ne.Type == typeDir { + s.meta[i].children = make(map[int]struct{}) + } + s.lookup[name] = i + + cycle := make(map[int]struct{}) + dir := filepath.Dir(name) +AddEnt: + switch dir { + case name: + // Skip + case ".": + // Add was called with a root entry, like "a" -- make sure to link this to the root directory. + root := &s.meta[s.lookup["."]] + root.children[i] = struct{}{} + default: + parent, err := s.getInode(op, dir) + if err != nil { + parent, err = s.walkTo(dir, true) + } + if err != nil { + return err + } + if _, ok := cycle[parent.N]; ok { + return &fs.PathError{ + Op: op, + Path: dir, + Err: fmt.Errorf("found cycle when resolving member: %w", fs.ErrInvalid), + } + } + cycle[parent.N] = struct{}{} + switch parent.Type { + case typeDir: + // OK + case typeHardlink: + // This is annoying -- hard linking to directories is weird + fallthrough + case typeSymlink: + dir = parent.Linkname + goto AddEnt + default: + return &fs.PathError{ + Op: op, + Path: parent.Entry.Name, + Err: fmt.Errorf("error while connecting child %q: %w", name, fs.ErrExist), + } + } + parent.children[i] = struct{}{} + } + return nil +} + +// GetInode returns an inode backing "name". +// +// The "op" parameter is used in error reporting. +func (s *srv) getInode(op, name string) (inode, error) { + if !fs.ValidPath(name) { + return inode{}, &fs.PathError{ + Op: op, + Path: name, + Err: fs.ErrInvalid, + } + } + if i, ok := s.lookup[name]; ok { + return s.inodeByIdx(i), nil + } + i, err := s.walkTo(name, false) + if err != nil { + return inode{}, &fs.PathError{ + Op: op, + Path: name, + Err: fs.ErrNotExist, + } + } + return i, nil +} + +// InodeByIdx constructs an inode without checking the provided index. +func (s *srv) inodeByIdx(i int) inode { + return inode{ + Entry: &s.entry[i], + meta: &s.meta[i], + N: i, + } +} + +// WalkTo does a walk from the root as far along the provided path as possible, resolving symlinks as necessary. +// If any segments are missing (including the final segments), they are created as directories if the "create" bool is passed. +func (s *srv) walkTo(p string, create bool) (inode, error) { + w := strings.Split(p, "/") + var cur inode + var err error + var b strings.Builder + + cur, err = s.getInode(`walk`, ".") + if err != nil { + return cur, err + } + i := 0 + for lim := len(w); i < lim; i++ { + n := w[i] + if i != 0 { + b.WriteByte('/') + } + b.WriteString(n) + var child inode + var found bool + for ci := range cur.children { + child = s.inodeByIdx(ci) + cn := path.Base(child.Entry.Name) + if cn != n { + continue + } + cycle := make(map[int]struct{}) + Resolve: + for { + if _, ok := cycle[ci]; ok { + return inode{}, &fs.PathError{ + Op: `walk`, + Path: b.String(), + Err: fmt.Errorf("found cycle when resolving member: %w", fs.ErrInvalid), + } + } + cycle[ci] = struct{}{} + switch child.Type { + case typeDir: + break Resolve + case typeSymlink: + tgt := child.Linkname + var ok bool + ci, ok = s.lookup[tgt] + switch { + case ok && create, ok && !create: + child = s.inodeByIdx(ci) + break Resolve + case !ok && create: + s.add(newEntryDir(tgt), nil) + ci = s.lookup[tgt] + child = s.inodeByIdx(ci) + case !ok && !create: + return inode{}, fmt.Errorf("tarfs: walk to %q, but missing segment %q", p, n) + } + case typeReg: + if i == (lim - 1) { + break Resolve + } + return inode{}, &fs.PathError{ + Op: `walk`, + Path: p, + Err: fmt.Errorf("found symlink to regular file while connecting child %q: %w", b.String(), fs.ErrExist), + } + } + } + found = true + break + } + switch { + case found && create, found && !create: + // OK + case !found && create: + fp := b.String() // Make sure to use the full path and not just the member name. + s.add(newEntryDir(fp), nil) + ci := s.lookup[fp] + child = s.inodeByIdx(ci) + case !found && !create: + return inode{}, fmt.Errorf("tarfs: walk to %q, but missing segment %q", p, b.String()) + } + cur = child + } + return cur, nil +} + +// Initialized reports whether the numbered inode is initialized or not. +// +// OpenFunc providers should use this to ensure lock invariants. +func (s *srv) initialized(i int) (ok bool) { + s.mu.RLock() + ok = s.initd.Bit(i) != 0 + s.mu.RUnlock() + return ok +} + +// Realize calls [srv.open] and updates metadata as needed. +// +// This function is a no-op if a previous call reported a nil error for an equivalent inode. +func (s *srv) realize(i inode) (err error) { + if s.initialized(i.N) { + return nil + } + var off, sz int64 + switch { + case i.Type == typeReg && i.Size() != 0: + // Nonzero regular file + off, sz, err = s.open(i) + if err != nil { + return err + } + } + s.mu.Lock() + defer s.mu.Unlock() + // Check if another goroutine has updated the meta struct and the + // initialized bitset. + if s.initd.Bit(i.N) != 0 { + return nil + } + i.meta.off = off + i.meta.sz = sz + s.initd.SetBit(&s.initd, i.N, 1) + return nil +} + +// Open implements [fs.FS]. +func (s *srv) Open(name string) (fs.File, error) { + const op = `open` + i, err := s.getInode(op, name) + if err != nil { + return nil, err + } + if err := s.realize(i); err != nil { + return nil, &fs.PathError{ + Op: op, + Path: name, + Err: err, + } + } + + r := file{inode: i} + switch i.Type { + case typeReg: + r.SectionReader = io.NewSectionReader(s.raw, i.off, i.sz) + case typeHardlink: + i, err = s.getInode(op, name) + if err != nil { + return nil, err + } + r.inode = i + r.SectionReader = io.NewSectionReader(s.raw, i.off, i.sz) + case typeDir: + r.dirent = make([]fs.DirEntry, len(r.children)) + n := 0 + for i := range r.children { + e := &s.entry[i] + r.dirent[n] = dirent{e} + n++ + } + sort.Slice(r.dirent, sortDirent(r.dirent)) + case typeSymlink: + return s.Open(i.Linkname) + default: + // Pretend all other kinds of files don't exist. + return nil, &fs.PathError{ + Op: op, + Path: name, + Err: fs.ErrExist, + } + } + + return &r, nil +} + +// Glob implements [fs.GlobFS]. +// +// See [path.Match] for the pattern syntax. +func (s *srv) Glob(pat string) ([]string, error) { + // GlobFS is implemented because it can avoid allocating for the walk. + // + // Path.Match is documented as only returning an error when the pattern is + // invalid, so check it here and we can avoid the check in the loop. + if _, err := path.Match(pat, ""); err != nil { + return nil, err + } + var ret []string + for n := range s.lookup { + if ok, _ := path.Match(pat, n); ok { + ret = append(ret, n) + } + } + sort.Strings(ret) + return ret, nil +} + +// ReadFile implements [fs.ReadFileFS]. +func (s *srv) ReadFile(name string) ([]byte, error) { + // ReadFileFS is implemented because it can avoid allocating an intermediate + // "file" struct and can immediately allocate a byte slice of the correct + // size. + const op = `readfile` + i, err := s.getInode(op, name) + if err != nil { + return nil, err + } + if i.Type == typeSymlink { + return s.ReadFile(i.Linkname) + } + if err := s.realize(i); err != nil { + return nil, &fs.PathError{ + Op: op, + Path: name, + Err: err, + } + } + ret := make([]byte, i.sz) + if _, err := io.ReadFull(io.NewSectionReader(s.raw, i.off, i.sz), ret); err != nil { + return nil, &fs.PathError{ + Op: op, + Path: name, + Err: err, + } + } + return ret, nil +} + +// Stat implements [fs.StatFS]. +func (s *srv) Stat(name string) (fs.FileInfo, error) { + // StatFS is implemented because it can avoid allocating an intermediate + // "file" struct. + const op = `stat` + i, err := s.getInode(op, name) + if err != nil { + return nil, err + } + return &i, nil +} + +// ReadDir implements [fs.ReadDirFS]. +func (s *srv) ReadDir(name string) ([]fs.DirEntry, error) { + // ReadDirFS is implemented because it can avoid allocating some + // intermediate structs. + const op = `readdir` + i, err := s.getInode(op, name) + if err != nil { + return nil, err + } + ret := make([]fs.DirEntry, 0, len(i.children)) + for i := range i.children { + ret = append(ret, dirent{&s.entry[i]}) + } + sort.Slice(ret, sortDirent(ret)) + return ret, nil +} diff --git a/pkg/tarfs/tarfs.go b/pkg/tarfs/tarfs.go index 0b72609c9..26eafc7c5 100644 --- a/pkg/tarfs/tarfs.go +++ b/pkg/tarfs/tarfs.go @@ -1,34 +1,19 @@ -// Package tarfs implements the fs.FS interface over a tar archive. +// Package tarfs implements the [fs.FS] interface over a tar archive. +// +// It also implements the [fs.FS] interface over the tar-compatible [eStargz] and [zstd:chunked] formats. +// This package does not support newer [eStargz] layers with the `innerOffset` field. +// +// [eStargz]: https://github.com/containerd/stargz-snapshotter/blob/main/docs/estargz.md +// [zstd:chunked]: https://github.com/containers/image/pull/1084 package tarfs import ( - "archive/tar" - "fmt" - "io" - "io/fs" - "path" "path/filepath" - "sort" "strconv" "strings" "unicode/utf8" ) -// FS implements a filesystem abstraction over an io.ReaderAt containing a tar. -type FS struct { - r io.ReaderAt - lookup map[string]int - inode []inode -} - -// Inode is a fake inode(7)-like structure for keeping track of filesystem -// entries. -type inode struct { - h *tar.Header - children map[int]struct{} - off, sz int64 -} - // NormPath removes relative elements and enforces that the resulting string is // utf8-clean. // @@ -38,7 +23,7 @@ func normPath(p string) string { if utf8.ValidString(s) { return s } - // Slow path -- need to decode the string an write out escapes. + // Slow path -- need to decode the string and write out escapes. // This is roughly modeled on [strings.ToValidUTF8], but without the run // coalescing and the replacement is based on the invalid byte sequence. The // [strings.ToValidUTF8] function only cares if the encoding is valid, not @@ -69,474 +54,19 @@ func normPath(p string) string { return b.String() } -func newDir(n string) inode { - return inode{ - h: &tar.Header{ - Typeflag: tar.TypeDir, - Name: n, - Mode: int64(fs.ModeDir | 0o644), - }, - children: make(map[int]struct{}), - } -} - -// New creates an FS from the tar contained in the ReaderAt. -// -// The ReaderAt must remain valid for the entire life of the returned FS and any -// FSes returned by Sub. -func New(r io.ReaderAt) (*FS, error) { - var err error - s := FS{ - r: r, - lookup: make(map[string]int), - } - hardlink := make(map[string][]string) - if err := s.add(".", newDir("."), hardlink); err != nil { - return nil, err - } - - segs, err := findSegments(r) - if err != nil { - return nil, fmt.Errorf("tarfs: error finding segments: %w", err) - } - for _, seg := range segs { - r := io.NewSectionReader(r, seg.start, seg.size) - rd := tar.NewReader(r) - i := inode{ - off: seg.start, - sz: seg.size, - } - i.h, err = rd.Next() - if err != nil { - return nil, fmt.Errorf("tarfs: error reading header @%d(%d): %w", seg.start, seg.size, err) - } - i.h.Name = normPath(i.h.Name) - n := i.h.Name - switch i.h.Typeflag { - case tar.TypeDir: - // Has this been created this already? - if _, ok := s.lookup[n]; ok { - continue - } - i.children = make(map[int]struct{}) - case tar.TypeSymlink, tar.TypeLink: - // If an absolute path, norm the path and it should be fine. - // A symlink could dangle, but that's really weird. - if path.IsAbs(i.h.Linkname) { - i.h.Linkname = normPath(i.h.Linkname) - break - } - if i.h.Typeflag == tar.TypeSymlink { - // Assume that symlinks are relative to the directory they're - // present in. - i.h.Linkname = path.Join(path.Dir(n), i.h.Linkname) - } - i.h.Linkname = normPath(i.h.Linkname) - // Linkname should now be a full path from the root of the tar. - case tar.TypeReg: - } - if err := s.add(n, i, hardlink); err != nil { - return nil, err - } +func min(a, b int) int { + if a < b { + return a } - // Cleanup any dangling hardlinks. - // This leaves them in the inode slice, but removes them from the observable - // tree. - for _, rms := range hardlink { - for _, rm := range rms { - idx := s.lookup[rm] - delete(s.lookup, rm) - p := s.inode[s.lookup[path.Dir(rm)]] - delete(p.children, idx) - } - } - return &s, nil + return b } -// Add does what it says on the tin. -// -// In addition, it creates any needed leading directory elements. The caller -// should check for the existence of an "out of order" directory, as this -// function attempts to follow the POSIX spec on actions when "creating" a file -// that already exists: -// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap01.html#tagtcjh_14 -// -// The "hardlink" map is used for deferring hardlink creation. -func (f *FS) add(name string, ino inode, hardlink map[string][]string) error { - const op = `create` -Again: - if i, ok := f.lookup[name]; ok { - n := &f.inode[i] - et, nt := n.h.FileInfo().Mode()&fs.ModeType, ino.h.FileInfo().Mode()&fs.ModeType - switch { - case nt != 0: - // If the new type isn't a regular file, fail. - return &fs.PathError{ - Op: op, - Path: name, - Err: fmt.Errorf("new type (%x) cannot replace existing type (%x): %w", nt, et, fs.ErrExist), - } - case et&fs.ModeDir != 0: - // If the existing type is a directory, fail. - return &fs.PathError{ - Op: op, - Path: name, - Err: fmt.Errorf("new file cannot replace directory: %w", fs.ErrExist), - } - case et&fs.ModeSymlink != 0: - // Follow the link target. - name = n.h.Linkname - goto Again - } - // Should be OK to replace now. Shadow the previous inode so we don't - // have to renumber everything. - f.inode[i] = ino - return nil - } - - // Hardlink handling: if the target doesn't exist yet, make a note in passed-in map. - if ino.h.Typeflag == tar.TypeLink { - tgt := ino.h.Linkname - if _, ok := f.lookup[tgt]; !ok { - hardlink[tgt] = append(hardlink[tgt], name) - } - } - delete(hardlink, name) - i := len(f.inode) - f.inode = append(f.inode, ino) - f.lookup[name] = i - - cycle := make(map[*inode]struct{}) - dir := filepath.Dir(name) -AddEnt: - switch dir { - case name: - // Skip - case ".": - // Add was called with a root entry, like "a" -- make sure to link this to the root directory. - root := &f.inode[f.lookup["."]] - root.children[i] = struct{}{} - default: - parent, err := f.getInode(op, dir) - if err != nil { - parent, err = f.walkTo(dir, true) - } - if err != nil { - return err - } - if _, ok := cycle[parent]; ok { - return &fs.PathError{ - Op: op, - Path: dir, - Err: fmt.Errorf("found cycle when resolving member: %w", fs.ErrInvalid), - } - } - cycle[parent] = struct{}{} - switch parent.h.Typeflag { - case tar.TypeDir: - // OK - case tar.TypeLink: - // This is annoying -- hard linking to directories is weird - fallthrough - case tar.TypeSymlink: - dir = parent.h.Linkname - goto AddEnt - default: - return &fs.PathError{ - Op: op, - Path: parent.h.Name, - Err: fmt.Errorf("error while connecting child %q: %w", name, fs.ErrExist), - } - } - parent.children[i] = struct{}{} - } - return nil -} - -// GetInode returns the inode backing "name". -// -// The "op" parameter is used in error reporting. -func (f *FS) getInode(op, name string) (*inode, error) { - if !fs.ValidPath(name) { - return nil, &fs.PathError{ - Op: op, - Path: name, - Err: fs.ErrInvalid, - } - } - name = path.Clean(name) - if i, ok := f.lookup[name]; ok { - return &f.inode[i], nil - } - - i, err := f.walkTo(name, false) - if err != nil { - return nil, &fs.PathError{ - Op: op, - Path: name, - Err: fs.ErrNotExist, - } - } - return i, nil -} - -// WalkTo does a walk from the root as far along the provided path as possible, -// resolving symlinks as necesarry. If any segments are missing (including the final -// segments), they are created as directories if the "create" bool is passed. -func (f *FS) walkTo(p string, create bool) (*inode, error) { - w := strings.Split(p, "/") - var cur *inode - var b strings.Builder - - cur = &f.inode[f.lookup["."]] - i := 0 - for lim := len(w); i < lim; i++ { - n := w[i] - if i != 0 { - b.WriteByte('/') - } - b.WriteString(n) - var child *inode - var found bool - for ci := range cur.children { - child = &f.inode[ci] - cn := path.Base(child.h.Name) - if cn != n { - continue - } - cycle := make(map[int]struct{}) - Resolve: - for { - if _, ok := cycle[ci]; ok { - return nil, &fs.PathError{ - Op: `walk`, - Path: b.String(), - Err: fmt.Errorf("found cycle when resolving member: %w", fs.ErrInvalid), - } - } - cycle[ci] = struct{}{} - switch child.h.Typeflag { - case tar.TypeDir: - break Resolve - case tar.TypeSymlink: - tgt := child.h.Linkname - var ok bool - ci, ok = f.lookup[tgt] - switch { - case ok && create, ok && !create: - child = &f.inode[ci] - break Resolve - case !ok && create: - f.add(tgt, newDir(tgt), nil) - ci = f.lookup[tgt] - child = &f.inode[ci] - case !ok && !create: - return nil, fmt.Errorf("tarfs: walk to %q, but missing segment %q", p, n) - } - case tar.TypeReg: - if i == (lim - 1) { - break Resolve - } - return nil, &fs.PathError{ - Op: `walk`, - Path: p, - Err: fmt.Errorf("found symlink to regular file while connecting child %q: %w", b.String(), fs.ErrExist), - } - } - } - found = true - break - } - switch { - case found && create, found && !create: - // OK - case !found && create: - fp := b.String() // Make sure to use the full path and not just the member name. - f.add(fp, newDir(n), nil) - ci := f.lookup[fp] - child = &f.inode[ci] - case !found && !create: - return nil, fmt.Errorf("tarfs: walk to %q, but missing segment %q", p, b.String()) - } - cur = child - } - return cur, nil -} - -// Open implements fs.FS. -func (f *FS) Open(name string) (fs.File, error) { - const op = `open` - i, err := f.getInode(op, name) - if err != nil { - return nil, err - } - typ := i.h.FileInfo().Mode().Type() - var r *tar.Reader - switch { - case typ.IsRegular() && i.h.Typeflag != tar.TypeLink: - r = tar.NewReader(io.NewSectionReader(f.r, i.off, i.sz)) - case typ.IsRegular() && i.h.Typeflag == tar.TypeLink: - tgt, err := f.getInode(op, i.h.Linkname) - if err != nil { - return nil, err - } - r = tar.NewReader(io.NewSectionReader(f.r, tgt.off, tgt.sz)) - case typ.IsDir(): - d := dir{ - h: i.h, - es: make([]fs.DirEntry, len(i.children)), - } - n := 0 - for i := range i.children { - ct := &f.inode[i] - d.es[n] = dirent{ct.h} - n++ - } - sort.Slice(d.es, sortDirent(d.es)) - return &d, nil - case typ&fs.ModeSymlink != 0: // typ.IsSymlink() - return f.Open(i.h.Linkname) - default: - // Pretend all other kinds of files don't exist. - return nil, &fs.PathError{ - Op: op, - Path: name, - Err: fs.ErrExist, - } - } - if _, err := r.Next(); err != nil { - return nil, &fs.PathError{ - Op: op, - Path: name, - Err: err, - } - } - return &file{ - h: i.h, - r: r, - }, nil -} - -// Stat implements fs.StatFS. -func (f *FS) Stat(name string) (fs.FileInfo, error) { - // StatFS is implemented because it can avoid allocating an intermediate - // "file" struct. - const op = `stat` - i, err := f.getInode(op, name) - if err != nil { - return nil, err - } - return i.h.FileInfo(), nil -} - -// ReadDir implements fs.ReadDirFS. -func (f *FS) ReadDir(name string) ([]fs.DirEntry, error) { - // ReadDirFS is implemented because it can avoid allocating an intermediate - // "dir" struct. - const op = `readdir` - i, err := f.getInode(op, name) - if err != nil { - return nil, err - } - ret := make([]fs.DirEntry, 0, len(i.children)) - for ti := range i.children { - t := &f.inode[ti] - ret = append(ret, dirent{t.h}) - } - sort.Slice(ret, sortDirent(ret)) - return ret, nil -} - -// ReadFile implements fs.ReadFileFS. -func (f *FS) ReadFile(name string) ([]byte, error) { - // ReadFileFS is implemented because it can avoid allocating an intermediate - // "file" struct and can immediately allocate a byte slice of the correct - // size. - const op = `readfile` - i, err := f.getInode(op, name) - if err != nil { - return nil, err - } - if i.h.FileInfo().Mode().Type()&fs.ModeSymlink != 0 { - return f.ReadFile(i.h.Linkname) - } - r := tar.NewReader(io.NewSectionReader(f.r, i.off, i.sz)) - if _, err := r.Next(); err != nil { - return nil, &fs.PathError{ - Op: op, - Path: name, - Err: err, - } - } - ret := make([]byte, i.h.Size) - if _, err := io.ReadFull(r, ret); err != nil { - return nil, &fs.PathError{ - Op: op, - Path: name, - Err: err, - } - } - return ret, nil -} - -// Glob implements fs.GlobFS. -// -// See path.Match for the patten syntax. -func (f *FS) Glob(pat string) ([]string, error) { - // GlobFS is implemented because it can avoid allocating for the walk. - // - // Path.Match is documented as only returning an error when the pattern is - // invalid, so check it here and we can avoid the check in the loop. - if _, err := path.Match(pat, ""); err != nil { - return nil, err - } - var ret []string - for n := range f.lookup { - if ok, _ := path.Match(pat, n); ok { - ret = append(ret, n) - } - } - sort.Strings(ret) - return ret, nil -} - -// Sub implements fs.SubFS. -func (f *FS) Sub(dir string) (fs.FS, error) { - // SubFS is implemented because it only requires a single walk and - // conditional copy of the lookup table -- the underlying reader and inode - // slice can be shared. - const op = `sub` - n, err := f.getInode(op, dir) - if err != nil { - return nil, err - } - bp := n.h.Name - ret := FS{ - r: f.r, - inode: f.inode, - lookup: make(map[string]int), - } - for n, i := range f.lookup { - rel, err := filepath.Rel(bp, n) - if err != nil { - // Can't be made relative. - continue - } - if strings.HasPrefix(rel, "..") { - // Not in this subtree. - continue - } - ret.lookup[rel] = i - } - return &ret, nil -} - -// A bunch of static assertions for the fs interfaces. +// Possible values for the "magic" in a tar header. var ( - _ fs.FS = (*FS)(nil) - _ fs.GlobFS = (*FS)(nil) - _ fs.ReadDirFS = (*FS)(nil) - _ fs.ReadFileFS = (*FS)(nil) - _ fs.StatFS = (*FS)(nil) - _ fs.SubFS = (*FS)(nil) + magicPAX = []byte("ustar\x00") + magicGNU = []byte("ustar ") + magicOldGNU = []byte("ustar \x00") ) + +// Where the "magic" value lives. +const magicOff = 257 diff --git a/pkg/tarfs/tarfs_test.go b/pkg/tarfs/tarfs_test.go index 52a453b4d..2804080ab 100644 --- a/pkg/tarfs/tarfs_test.go +++ b/pkg/tarfs/tarfs_test.go @@ -1,8 +1,10 @@ -package tarfs +package tarfs_test import ( "archive/tar" + "bufio" "bytes" + "context" "crypto/sha256" "errors" "io" @@ -14,29 +16,70 @@ import ( "sync" "testing" "testing/fstest" + "time" - "github.com/quay/claircore/test/integration" + "github.com/klauspost/compress/gzip" + "github.com/klauspost/compress/zstd" + "github.com/quay/zlog" + + "github.com/quay/claircore/pkg/tarfs" + "github.com/quay/claircore/test" +) + +var ( + // ModTime reports the newest modtime for files in the current directory. + // + // TODO(hank) Replace this with [sync.OnceValue] in go1.21. + modTime time.Time ) +func init() { + dent, err := os.ReadDir(".") + if err != nil { + panic(err) + } + var r time.Time + for _, d := range dent { + fi, err := d.Info() + if err != nil { + panic(err) + } + m := fi.ModTime() + if m.After(r) { + r = m + } + } + modTime = r +} + // TestFS runs some sanity checks on a tar generated from this package's // directory. // // The tar is generated on demand and removed if tests fail, so modifying any -// file in this package *will* cause tests to fail once. Make sure to run tests -// twice if the Checksum tests fail. +// file in this package will cause tests slow down on the next run. func TestFS(t *testing.T) { - var name = filepath.Join(integration.PackageCacheDir(t), `fstest.tar`) - checktar(t, name) + ctx := zlog.Test(context.Background(), t) + t.Parallel() + name := test.GenerateFixture(t, `fstest.tar`, modTime, makeFixture("")) + test.GenerateFixture(t, `fstest.tar.gz`, modTime, makeFixture("gz")) + test.GenerateFixture(t, `fstest.tar.zstd`, modTime, makeFixture("zstd")) fileset := []string{ "file.go", + "fs.go", + "metrics.go", "parse.go", + "pool.go", + "randomaccess.go", + "seekable_test.go", + "srv.go", "tarfs.go", "tarfs_test.go", "testdata/atroot", } - t.Run("Single", func(t *testing.T) { - f, err := os.Open(name) + mkBuf := func(t *testing.T) *os.File { + t.Helper() + f, err := os.Create(filepath.Join(t.TempDir(), "buffer")) if err != nil { t.Error(err) } @@ -45,162 +88,250 @@ func TestFS(t *testing.T) { t.Error(err) } }) - sys, err := New(f) - if err != nil { - t.Error(err) - } - - if err := fstest.TestFS(sys, fileset...); err != nil { - t.Error(err) - } - }) - - t.Run("Concurrent", func(t *testing.T) { - f, err := os.Open(name) - if err != nil { - t.Error(err) - } - t.Cleanup(func() { - if err := f.Close(); err != nil { - t.Error(err) + return f + } + openFile := func(name string) func(*testing.T) *os.File { + return func(t *testing.T) *os.File { + t.Helper() + f, err := os.Open(name) + if err != nil { + t.Fatal(err) } - }) - sys, err := New(f) - if err != nil { - t.Error(err) - } - - const lim = 8 - var wg sync.WaitGroup - t.Logf("running %d goroutines", lim) - wg.Add(lim) - for i := 0; i < lim; i++ { - go func() { - defer wg.Done() - if err := fstest.TestFS(sys, fileset...); err != nil { + t.Cleanup(func() { + if err := f.Close(); err != nil { t.Error(err) } - }() - } - wg.Wait() - }) - - t.Run("Sub", func(t *testing.T) { - f, err := os.Open(name) - if err != nil { - t.Error(err) - } - t.Cleanup(func() { - if err := f.Close(); err != nil { - t.Error(err) - } - }) - sys, err := New(f) - if err != nil { - t.Error(err) + }) + return f } + } - sub, err := fs.Sub(sys, "testdata") - if err != nil { - t.Error(err) - } - if err := fstest.TestFS(sub, "atroot"); err != nil { - t.Error(err) - } - }) + tt := []fsTestcase{ + { + Name: "Single", + Check: func(ctx context.Context, open, mkBuf func(*testing.T) *os.File) func(*testing.T) { + return func(t *testing.T) { + ctx := zlog.Test(ctx, t) + f := open(t) + sys, err := tarfs.New(ctx, f, -1, mkBuf(t)) + if err != nil { + t.Fatal(err) + } + defer sys.Close() - t.Run("Checksum", func(t *testing.T) { - f, err := os.Open(name) - if err != nil { - t.Error(err) - } - t.Cleanup(func() { - if err := f.Close(); err != nil { - t.Error(err) - } - }) - sys, err := New(f) - if err != nil { - t.Error(err) - } - for _, n := range fileset { - name := n - t.Run(name, func(t *testing.T) { - h := sha256.New() - f, err := os.Open(name) - if err != nil { - t.Fatal(err) + if err := fstest.TestFS(sys, fileset...); err != nil { + t.Error(err) + } } - defer f.Close() - if _, err := io.Copy(h, f); err != nil { - t.Error(err) + }, + }, + { + Name: "NotAFile", + Check: func(ctx context.Context, open, mkBuf func(*testing.T) *os.File) func(*testing.T) { + return func(t *testing.T) { + ctx := zlog.Test(ctx, t) + f := open(t) + b, err := io.ReadAll(f) + if err != nil { + t.Error(err) + } + rd := bytes.NewReader(b) + sys, err := tarfs.New(ctx, rd, -1, mkBuf(t)) + if err != nil { + t.Fatal(err) + } + defer sys.Close() + + if err := fstest.TestFS(sys, fileset...); err != nil { + t.Error(err) + } } - want := h.Sum(nil) + }, + }, + { + Name: "Concurrent", + Check: func(ctx context.Context, open, mkBuf func(*testing.T) *os.File) func(*testing.T) { + return func(t *testing.T) { + ctx := zlog.Test(ctx, t) + f := open(t) + fi, err := f.Stat() + if err != nil { + t.Fatal(err) + } + sys, err := tarfs.New(ctx, f, fi.Size(), mkBuf(t)) + if err != nil { + t.Fatal(err) + } + defer sys.Close() - h.Reset() - b, err := fs.ReadFile(sys, name) - if err != nil { - t.Error(err) + const lim = 8 + var wg sync.WaitGroup + t.Logf("running %d goroutines", lim) + wg.Add(lim) + for i := 0; i < lim; i++ { + go func() { + defer wg.Done() + if err := fstest.TestFS(sys, fileset...); err != nil { + t.Error(err) + } + }() + } + wg.Wait() } - if _, err := h.Write(b); err != nil { - t.Error(err) + }, + }, + { + Name: "Sub", + Check: func(ctx context.Context, open, mkBuf func(*testing.T) *os.File) func(*testing.T) { + return func(t *testing.T) { + ctx := zlog.Test(ctx, t) + f := open(t) + sys, err := tarfs.New(ctx, f, -1, mkBuf(t)) + if err != nil { + t.Fatal(err) + } + defer sys.Close() + + sub, err := fs.Sub(sys, "testdata") + if err != nil { + t.Error(err) + } + if err := fstest.TestFS(sub, "atroot"); err != nil { + t.Error(err) + } } - got := h.Sum(nil) + }, + }, + { + Name: "Checksum", + Check: func(ctx context.Context, open, mkBuf func(*testing.T) *os.File) func(*testing.T) { + return func(t *testing.T) { + ctx := zlog.Test(ctx, t) + f := open(t) + sys, err := tarfs.New(ctx, f, -1, mkBuf(t)) + if err != nil { + t.Fatal(err) + } + defer sys.Close() + for _, n := range fileset { + name := n + t.Run(name, func(t *testing.T) { + h := sha256.New() + f, err := os.Open(name) + if err != nil { + t.Fatal(err) + } + defer f.Close() + if _, err := io.Copy(h, f); err != nil { + t.Error(err) + } + want := h.Sum(nil) - if !bytes.Equal(got, want) { - t.Errorf("got: %x, want: %x", got, want) + h.Reset() + b, err := fs.ReadFile(sys, name) + if err != nil { + t.Error(err) + } + if _, err := h.Write(b); err != nil { + t.Error(err) + } + got := h.Sum(nil) + + if !bytes.Equal(got, want) { + t.Errorf("got: %x, want: %x", got, want) + } + }) + } } - }) + }, + }, + } + + t.Run("Uncompressed", func(t *testing.T) { + t.Parallel() + bufHack := func(t *testing.T) *os.File { + t.Helper() + if path.Base(t.Name()) != "NotAFile" { + return nil + } + return mkBuf(t) + } + for _, tc := range tt { + t.Run(tc.Name, tc.Check(ctx, openFile(name), bufHack)) + } + }) + t.Run("Gzip", func(t *testing.T) { + t.Parallel() + for _, tc := range tt { + t.Run(tc.Name, tc.Check(ctx, openFile(name+".gz"), mkBuf)) + } + }) + t.Run("Zstd", func(t *testing.T) { + t.Parallel() + for _, tc := range tt { + t.Run(tc.Name, tc.Check(ctx, openFile(name+".zstd"), mkBuf)) } }) } -// TestEmpty tests that a wholly empty tar still creates an empty root. -func TestEmpty(t *testing.T) { - // Two zero blocks is the tar footer, so just make one up. - rd := bytes.NewReader(make([]byte, 2*512)) - sys, err := New(rd) - if err != nil { - t.Error(err) - } - if _, err := fs.Stat(sys, "."); err != nil { - t.Error(err) - } - ent, err := fs.ReadDir(sys, ".") - if err != nil { - t.Error(err) - } - for _, e := range ent { - t.Log(e) - } - if len(ent) != 0 { - t.Errorf("got: %d, want: 0", len(ent)) - } +type fsTestcase struct { + Check func(ctx context.Context, open, mkBuf func(*testing.T) *os.File) func(*testing.T) + Name string } -func checktar(t *testing.T, name string) { - t.Helper() - out, err := os.Create(name) - if err != nil { - t.Fatal(err) - } - defer out.Close() - tw := tar.NewWriter(out) - defer tw.Close() +// MakeFixture makes the expected tar for TestFS, with the compression "cmp". +// +// "Cmp" must be one of: +// - "" +// - gz +// - zstd +func makeFixture(cmp string) func(testing.TB, *os.File) { + return func(t testing.TB, f *os.File) { + var w io.Writer + buf := bufio.NewWriter(f) + defer func() { + if err := buf.Flush(); err != nil { + t.Error(err) + } + }() + switch cmp { + case "": + w = buf + case "gz": + z := gzip.NewWriter(buf) + defer z.Close() + w = z + case "zstd": + z, err := zstd.NewWriter(buf) + if err != nil { + t.Fatal(err) + } + defer z.Close() + w = z + default: + t.Fatalf("unknown compression scheme: %q", cmp) + } - in := os.DirFS(".") - if err := fs.WalkDir(in, ".", mktar(t, in, tw)); err != nil { - t.Fatal(err) + tw := tar.NewWriter(w) + defer tw.Close() + in := os.DirFS(".") + if err := fs.WalkDir(in, ".", mktar(t, filepath.Base(f.Name()), in, tw)); err != nil { + t.Fatal(err) + } } } -func mktar(t *testing.T, in fs.FS, tw *tar.Writer) fs.WalkDirFunc { +// Mktar is a [fs.WalkDirFunc] to copy files from "in" to "tw". +// +// "Name" is supplied for logging only. +func mktar(t testing.TB, name string, in fs.FS, tw *tar.Writer) fs.WalkDirFunc { return func(p string, d fs.DirEntry, err error) error { if err != nil { return err } - switch { - case filepath.Ext(d.Name()) == ".tar": + switch ext := path.Ext(d.Name()); { + case ext == ".tar" || ext == ".gz" || ext == ".zstd": + // Skip all these. return nil case d.Name() == "." && d.IsDir(): return nil @@ -208,7 +339,7 @@ func mktar(t *testing.T, in fs.FS, tw *tar.Writer) fs.WalkDirFunc { return fs.SkipDir default: } - t.Logf("adding %q", p) + t.Logf("%s: adding %q", name, p) i, err := d.Info() if err != nil { return err @@ -236,33 +367,80 @@ func mktar(t *testing.T, in fs.FS, tw *tar.Writer) fs.WalkDirFunc { } } +// TestEmpty tests that a wholly empty tar still creates an empty root. +func TestEmpty(t *testing.T) { + t.Parallel() + ctx := zlog.Test(context.Background(), t) + f, err := os.Create(filepath.Join(t.TempDir(), filepath.Base(t.Name()))) + if err != nil { + t.Error(err) + } + t.Cleanup(func() { + if err := f.Close(); err != nil { + t.Error(err) + } + }) + // Two zero blocks is the tar footer, so just make one up. + if err := f.Truncate(2 * 512); err != nil { + t.Error(err) + } + sys, err := tarfs.New(ctx, f, -1, nil) + if err != nil { + t.Fatal(err) + } + defer sys.Close() + if _, err := fs.Stat(sys, "."); err != nil { + t.Error(err) + } + ent, err := fs.ReadDir(sys, ".") + if err != nil { + t.Error(err) + } + for _, e := range ent { + t.Log(e) + } + if len(ent) != 0 { + t.Errorf("got: %d, want: 0", len(ent)) + } +} + func TestSymlinks(t *testing.T) { - tmp := t.TempDir() + t.Parallel() run := func(openErr bool, hs []tar.Header, chk func(*testing.T, fs.FS)) func(*testing.T) { return func(t *testing.T) { + ctx := zlog.Test(context.Background(), t) t.Helper() - // This is a perfect candidate for using test.GenerateFixture, but - // creates an import cycle. - f, err := os.Create(filepath.Join(tmp, path.Base(t.Name()))) - if err != nil { - t.Fatal(err) - } - defer f.Close() - - tw := tar.NewWriter(f) - for i := range hs { - if err := tw.WriteHeader(&hs[i]); err != nil { + name := test.GenerateFixture(t, path.Base(t.Name())+".tar", modTime, func(t testing.TB, f *os.File) { + tw := tar.NewWriter(f) + for i := range hs { + if err := tw.WriteHeader(&hs[i]); err != nil { + t.Error(err) + } + } + if err := tw.Close(); err != nil { t.Error(err) } + }) + f, err := os.Open(name) + if err != nil { + t.Fatal(err) } - if err := tw.Close(); err != nil { - t.Error(err) + fi, err := f.Stat() + if err != nil { + t.Fatal(err) } - sys, err := New(f) + sys, err := tarfs.New(ctx, f, fi.Size(), nil) t.Log(err) + t.Cleanup(func() { + if sys != nil { + if err := sys.Close(); err != nil { + t.Error(err) + } + } + }) if (err != nil) != openErr { - t.Fail() + t.FailNow() } if chk != nil { @@ -461,6 +639,8 @@ func TestSymlinks(t *testing.T) { } func TestKnownLayers(t *testing.T) { + t.Parallel() + ctx := zlog.Test(context.Background(), t) ents, err := os.ReadDir(`testdata/known`) if err != nil { t.Fatal(err) @@ -471,15 +651,17 @@ func TestKnownLayers(t *testing.T) { continue } t.Run(n, func(t *testing.T) { + ctx := zlog.Test(ctx, t) f, err := os.Open(filepath.Join(`testdata/known`, n)) if err != nil { t.Fatal(err) } defer f.Close() - sys, err := New(f) + sys, err := tarfs.New(ctx, f, -1, nil) if err != nil { t.Fatal(err) } + defer sys.Close() if err := fs.WalkDir(sys, ".", func(p string, d fs.DirEntry, err error) error { if err != nil { t.Error(err) @@ -534,6 +716,8 @@ func TestKnownLayers(t *testing.T) { // `var/run/console/algo.txt` and `log.txt` can be accessed via `run/logs/log.txt` // or `var/run/logs/log.txt`. func TestTarConcatenate(t *testing.T) { + t.Parallel() + ctx := zlog.Test(context.Background(), t) tests := []struct { expectedFS map[string]bool testFile string @@ -562,10 +746,11 @@ func TestTarConcatenate(t *testing.T) { if err != nil { t.Fatalf("failed to open test tar: %v", err) } - sys, err := New(f) + sys, err := tarfs.New(ctx, f, -1, nil) if err != nil { t.Fatalf("failed to create tarfs: %v", err) } + defer sys.Close() if err := fs.WalkDir(sys, ".", func(path string, d fs.DirEntry, err error) error { if err != nil { return err @@ -593,12 +778,14 @@ func TestTarConcatenate(t *testing.T) { } func TestInvalidName(t *testing.T) { + t.Parallel() + ctx := zlog.Test(context.Background(), t) f, err := os.Open(`testdata/bad_name.tar`) if err != nil { t.Fatalf("failed to open test tar: %v", err) } defer f.Close() - sys, err := New(f) + sys, err := tarfs.New(ctx, f, -1, nil) if err != nil { t.Fatalf("failed to create tarfs: %v", err) }