From 0b98b9e02b372b67652b1dd35f04b916ca161526 Mon Sep 17 00:00:00 2001
From: Hank Donnay <hdonnay@redhat.com>
Date: Fri, 13 Oct 2023 12:14:19 -0500
Subject: [PATCH 1/5] rpm: add failure case for package tests

Signed-off-by: Hank Donnay <hdonnay@redhat.com>
---
 rpm/packagescanner_test.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/rpm/packagescanner_test.go b/rpm/packagescanner_test.go
index d605539c0..36e001380 100644
--- a/rpm/packagescanner_test.go
+++ b/rpm/packagescanner_test.go
@@ -106,6 +106,9 @@ func (tc PackageTestcase) Run(ctx context.Context, a *test.CachedArena) func(*te
 			t.Error(err)
 		}
 		t.Logf("found %d packages", len(got))
+		if len(got) == 0 {
+			t.FailNow()
+		}
 		if !cmp.Equal(got, want, rpmtest.Options) {
 			t.Error(cmp.Diff(got, want, rpmtest.Options))
 		}

From 91562a429105d983d4323d13b1872fae0539fc53 Mon Sep 17 00:00:00 2001
From: Hank Donnay <hdonnay@redhat.com>
Date: Tue, 24 Oct 2023 16:34:40 -0500
Subject: [PATCH 2/5] fetch: remove http.Client argument

With the `http.DefaultClient` now being poisoned, there's no "good"
http.Client for a test to have. This change removes the argument and
uses a single, package-internal client.

Test-Fail: OK
Signed-off-by: Hank Donnay <hdonnay@redhat.com>
---
 test/fetch/layer.go | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/test/fetch/layer.go b/test/fetch/layer.go
index 294cd94db..bb434c0e6 100644
--- a/test/fetch/layer.go
+++ b/test/fetch/layer.go
@@ -35,8 +35,12 @@ var registry = map[string]*client{
 	"registry.access.redhat.com": {Root: "https://registry.access.redhat.com/"},
 }
 
+var fetchClient = &http.Client{
+	Transport: &http.Transport{},
+}
+
 // Layer returns the specified layer contents, cached in the global layer cache.
-func Layer(ctx context.Context, t testing.TB, c *http.Client, from, repo string, blob claircore.Digest, opt ...Option) (*os.File, error) {
+func Layer(ctx context.Context, t testing.TB, from, repo string, blob claircore.Digest, opt ...Option) (*os.File, error) {
 	t.Helper()
 	opts := make(map[Option]bool)
 	for _, o := range opt {
@@ -63,14 +67,11 @@ func Layer(ctx context.Context, t testing.TB, c *http.Client, from, repo string,
 	})
 	t.Logf("fetching layer into: %s", cachefile)
 
-	if c == nil {
-		c = http.DefaultClient
-	}
 	client, ok := registry[from]
 	if !ok {
 		return nil, fmt.Errorf("unknown registry: %q", from)
 	}
-	rc, err := client.Blob(ctx, c, repo, blob)
+	rc, err := client.Blob(ctx, fetchClient, repo, blob)
 	if err != nil {
 		return nil, err
 	}

From 075e23c0e53e33f680985bdcbb1a5ac19bf3a814 Mon Sep 17 00:00:00 2001
From: Hank Donnay <hdonnay@redhat.com>
Date: Tue, 24 Oct 2023 16:35:29 -0500
Subject: [PATCH 3/5] periodic: update for `fetch` changes

Test-Fail: OK
Signed-off-by: Hank Donnay <hdonnay@redhat.com>
---
 test/periodic/periodic_test.go | 2 +-
 test/periodic/rpm_test.go      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/periodic/periodic_test.go b/test/periodic/periodic_test.go
index b5ac47a9c..1dd3963b1 100644
--- a/test/periodic/periodic_test.go
+++ b/test/periodic/periodic_test.go
@@ -10,7 +10,7 @@ import (
 )
 
 var (
-	pkgClient = &http.Client{}
+	pkgClient = &http.Client{Transport: &http.Transport{}}
 
 	fp driver.Fingerprint
 )
diff --git a/test/periodic/rpm_test.go b/test/periodic/rpm_test.go
index 7388f70c0..80bf43f9b 100644
--- a/test/periodic/rpm_test.go
+++ b/test/periodic/rpm_test.go
@@ -226,7 +226,7 @@ func (doc hydraDoc) Run(dir string) func(*testing.T) {
 		for _, ld := range image.Data[0].Parsed.Layers {
 			// TODO(hank) Need a way to use the nicer API, but pass the
 			// Integration bypass.
-			n, err := fetch.Layer(ctx, t, pkgClient, doc.Registry, doc.Repository, ld, fetch.IgnoreIntegration)
+			n, err := fetch.Layer(ctx, t, doc.Registry, doc.Repository, ld, fetch.IgnoreIntegration)
 			if err != nil {
 				t.Fatal(err)
 			}

From 356515920983ec8c0f3a32ffaec3ad125f17c920 Mon Sep 17 00:00:00 2001
From: Hank Donnay <hdonnay@redhat.com>
Date: Tue, 24 Oct 2023 16:35:59 -0500
Subject: [PATCH 4/5] test: update for `fetch` changes

Signed-off-by: Hank Donnay <hdonnay@redhat.com>
---
 test/fetcher.go        | 3 +--
 test/layer.go          | 2 +-
 test/packagescanner.go | 3 +--
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/test/fetcher.go b/test/fetcher.go
index de79d4ecb..34ab9d51d 100644
--- a/test/fetcher.go
+++ b/test/fetcher.go
@@ -3,7 +3,6 @@ package test
 import (
 	"context"
 	"fmt"
-	"net/http"
 	"net/url"
 	"os"
 	"path/filepath"
@@ -47,7 +46,7 @@ func (a *CachedArena) LoadLayerFromRegistry(ctx context.Context, t testing.TB, r
 	if err != nil {
 		t.Fatal(err)
 	}
-	_, err = fetch.Layer(ctx, t, http.DefaultClient, ref.Registry, ref.Name, d)
+	_, err = fetch.Layer(ctx, t, ref.Registry, ref.Name, d)
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/test/layer.go b/test/layer.go
index e71f11691..9649d6440 100644
--- a/test/layer.go
+++ b/test/layer.go
@@ -125,7 +125,7 @@ func RealizeLayers(ctx context.Context, t *testing.T, refs ...LayerRef) []clairc
 					t.Error(err)
 					continue
 				}
-				f, err := fetch.Layer(ctx, t, nil, refs[n].Registry, refs[n].Name, id)
+				f, err := fetch.Layer(ctx, t, refs[n].Registry, refs[n].Name, id)
 				if err != nil {
 					t.Error(err)
 					continue
diff --git a/test/packagescanner.go b/test/packagescanner.go
index 7807912db..000caa664 100644
--- a/test/packagescanner.go
+++ b/test/packagescanner.go
@@ -2,7 +2,6 @@ package test
 
 import (
 	"context"
-	"net/http"
 	"sort"
 	"strings"
 	"testing"
@@ -113,7 +112,7 @@ func (tc ScannerTestcase) RunSubset(ctx context.Context, n int) func(*testing.T)
 
 func (tc *ScannerTestcase) getLayer(ctx context.Context, t *testing.T) *claircore.Layer {
 	d := tc.Digest()
-	n, err := fetch.Layer(ctx, t, http.DefaultClient, tc.Domain, tc.Name, d)
+	n, err := fetch.Layer(ctx, t, tc.Domain, tc.Name, d)
 	if err != nil {
 		t.Fatal(err)
 	}

From 8f53edd1be7a21bcc9d389d4c6a7c39481ae6703 Mon Sep 17 00:00:00 2001
From: Hank Donnay <hdonnay@redhat.com>
Date: Thu, 26 Oct 2023 13:50:29 -0500
Subject: [PATCH 5/5] tarfs: rewrite to support random access archives

This change adds support for some schemes of encoding a table of
contents and allowing individual files to be accessed independently.
It breaks the API by adding a "size" parameter (like the archive/zip
package) but attempts to autodetect when given a nonsense value.

Signed-off-by: Hank Donnay <hdonnay@redhat.com>
---
 go.mod                     |   2 +-
 layer.go                   |   3 +-
 pkg/tarfs/file.go          | 173 ++++++++----
 pkg/tarfs/fs.go            | 284 +++++++++++++++++++
 pkg/tarfs/metrics.go       |  37 +++
 pkg/tarfs/parse.go         | 363 +++++++++++++-----------
 pkg/tarfs/pool.go          |  60 ++++
 pkg/tarfs/randomaccess.go  | 203 ++++++++++++++
 pkg/tarfs/seekable_test.go | 122 ++++++++
 pkg/tarfs/srv.go           | 553 +++++++++++++++++++++++++++++++++++++
 pkg/tarfs/tarfs.go         | 508 ++--------------------------------
 pkg/tarfs/tarfs_test.go    | 501 ++++++++++++++++++++++-----------
 12 files changed, 1943 insertions(+), 866 deletions(-)
 create mode 100644 pkg/tarfs/fs.go
 create mode 100644 pkg/tarfs/metrics.go
 create mode 100644 pkg/tarfs/pool.go
 create mode 100644 pkg/tarfs/randomaccess.go
 create mode 100644 pkg/tarfs/seekable_test.go
 create mode 100644 pkg/tarfs/srv.go

diff --git a/go.mod b/go.mod
index 2970ea268..c5ef1a7d2 100644
--- a/go.mod
+++ b/go.mod
@@ -24,6 +24,7 @@ require (
 	github.com/rs/zerolog v1.30.0
 	github.com/ulikunitz/xz v0.5.11
 	go.opentelemetry.io/otel v1.19.0
+	go.opentelemetry.io/otel/metric v1.19.0
 	go.opentelemetry.io/otel/trace v1.19.0
 	golang.org/x/crypto v0.14.0
 	golang.org/x/sync v0.4.0
@@ -56,7 +57,6 @@ require (
 	github.com/prometheus/common v0.44.0 // indirect
 	github.com/prometheus/procfs v0.11.1 // indirect
 	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
-	go.opentelemetry.io/otel/metric v1.19.0 // indirect
 	golang.org/x/mod v0.12.0 // indirect
 	google.golang.org/protobuf v1.31.0 // indirect
 	lukechampine.com/uint128 v1.2.0 // indirect
diff --git a/layer.go b/layer.go
index 7ad61e744..fa8db62a8 100644
--- a/layer.go
+++ b/layer.go
@@ -110,13 +110,14 @@ func (l *Layer) Init(ctx context.Context, desc *LayerDescription, r io.ReaderAt)
 		`application/vnd.oci.image.layer.nondistributable.v1.tar`,
 		`application/vnd.oci.image.layer.nondistributable.v1.tar+gzip`,
 		`application/vnd.oci.image.layer.nondistributable.v1.tar+zstd`:
-		sys, err := tarfs.New(r)
+		sys, err := tarfs.New(ctx, r, -1, nil)
 		switch {
 		case errors.Is(err, nil):
 		default:
 			return fmt.Errorf("claircore: layer %v: unable to create fs.FS: %w", desc.Digest, err)
 		}
 		l.sys = sys
+		l.cleanup = append(l.cleanup, sys)
 	default:
 		return fmt.Errorf("claircore: layer %v: unknown MediaType %q", desc.Digest, desc.MediaType)
 	}
diff --git a/pkg/tarfs/file.go b/pkg/tarfs/file.go
index 0aca24ee7..0a04b6a36 100644
--- a/pkg/tarfs/file.go
+++ b/pkg/tarfs/file.go
@@ -1,84 +1,141 @@
 package tarfs
 
 import (
-	"archive/tar"
 	"io"
 	"io/fs"
-	"path/filepath"
+	"path"
 	"strings"
+	"time"
 )
 
-var _ fs.File = (*file)(nil)
-
-// File implements fs.File.
-type file struct {
-	h *tar.Header
-	r *tar.Reader
+// Entry is an entry describing a file or a file chunk.
+//
+// This is the concrete type backing [fs.FileInfo] interfaces returned by
+// this package.
+type Entry struct {
+	Xattrs      map[string]string `json:"xattrs"`
+	Type        string            `json:"type"`
+	Name        string            `json:"name"` // NB This is actually the path.
+	Linkname    string            `json:"linkName"`
+	Digest      string            `json:"digest"`
+	ChunkDigest string            `json:"chunkDigest"`
+	UserName    string            `json:"userName"`  // eStargz only
+	GroupName   string            `json:"groupName"` // eStargz only
+	ModTime     time.Time         `json:"modtime"`
+	AccessTime  time.Time         `json:"accesstime"` // Zstd chunked only
+	ChangeTime  time.Time         `json:"changetime"` // Zstd chunked only
+	Mode        int64             `json:"mode"`
+	Size        int64             `json:"size"`
+	Devmajor    int64             `json:"devMajor"`
+	Devminor    int64             `json:"devMinor"`
+	Offset      int64             `json:"offset"`
+	EndOffset   int64             `json:"endOffset"` // Zstd chunked only
+	ChunkSize   int64             `json:"chunkSize"`
+	ChunkOffset int64             `json:"chunkOffset"`
+	UID         int               `json:"uid"`
+	GID         int               `json:"gid"`
 }
 
-func (f *file) Close() error {
-	return nil
-}
+// Entry types.
+const (
+	typeDir      = `dir`
+	typeReg      = `reg`
+	typeSymlink  = `symlink`
+	typeHardlink = `hardlink`
+	typeChar     = `char`
+	typeBlock    = `block`
+	typeFifo     = `fifo`
+	typeChunk    = `chunk`
+)
 
-func (f *file) Read(b []byte) (int, error) {
-	return f.r.Read(b)
+// NewEntryDir returns a new Entry describing a directory at the path "n".
+func newEntryDir(n string) Entry {
+	return Entry{
+		Name: n,
+		Mode: int64(fs.ModeDir | 0o644),
+		Type: typeDir,
+	}
 }
 
-func (f *file) Stat() (fs.FileInfo, error) {
-	return f.h.FileInfo(), nil
+// SortDirent returns a function suitable to pass to [sort.Slice] as a "cmp"
+// function.
+//
+// This is needed because the [io/fs] interfaces specify that [fs.DirEntry]
+// slices returned by the ReadDir methods are sorted lexically.
+func sortDirent(s []fs.DirEntry) func(i, j int) bool {
+	return func(i, j int) bool {
+		return strings.Compare(s[i].Name(), s[j].Name()) == -1
+	}
 }
 
-var _ fs.ReadDirFile = (*dir)(nil)
+// Dirent implements [fs.DirEntry] using a backing [*Entry].
+type dirent struct{ *Entry }
 
-// Dir implements fs.ReadDirFile.
-type dir struct {
-	h   *tar.Header
-	es  []fs.DirEntry
-	pos int
-}
+// Interface assertion for dirent.
+var _ fs.DirEntry = dirent{}
 
-func (*dir) Close() error                 { return nil }
-func (*dir) Read(_ []byte) (int, error)   { return 0, io.EOF }
-func (d *dir) Stat() (fs.FileInfo, error) { return d.h.FileInfo(), nil }
-func (d *dir) ReadDir(n int) ([]fs.DirEntry, error) {
-	es := d.es[d.pos:]
-	if len(es) == 0 {
-		if n == -1 {
-			return nil, nil
-		}
-		return nil, io.EOF
-	}
-	end := min(len(es), n)
-	if n == -1 {
-		end = len(es)
-	}
-	d.pos += end
-	return es[:end], nil
+// Name implements [fs.DirEntry].
+func (d dirent) Name() string { return path.Base(d.Entry.Name) }
+
+// IsDir implements [fs.DirEntry].
+func (d dirent) IsDir() bool { return d.Entry.Type == typeDir }
+
+// Type implements [fs.DirEntry].
+func (d dirent) Type() fs.FileMode { return fs.FileMode(d.Entry.Mode) & fs.ModeType }
+
+// Info implements [fs.DirEntry].
+func (d dirent) Info() (fs.FileInfo, error) {
+	return &inode{Entry: d.Entry}, nil
 }
 
-func min(a, b int) int {
-	if a < b {
-		return a
-	}
-	return b
+// File implements [fs.File] and [fs.ReadDirFile].
+//
+// The ReadDir method errors if called on a non-dir file.
+// The Read methods are implemented by a shared 0-size SectionReader for dir files.
+type file struct {
+	inode
+	*io.SectionReader
+	dirent []fs.DirEntry
+	dirpos int
 }
 
-type dirent struct{ *tar.Header }
+// Interface assertions for file.
+var (
+	_ fs.ReadDirFile = (*file)(nil)
+	_ fs.File        = (*file)(nil)
 
-var _ fs.DirEntry = dirent{}
+	// Extra interfaces that we don't *need* to implement, but do for certain
+	// important use cases (namely reading sqlite databases).
+	_ io.Seeker   = (*file)(nil)
+	_ io.ReaderAt = (*file)(nil)
+)
 
-func (d dirent) Name() string               { return filepath.Base(d.Header.Name) }
-func (d dirent) IsDir() bool                { return d.Header.FileInfo().IsDir() }
-func (d dirent) Type() fs.FileMode          { return d.Header.FileInfo().Mode() & fs.ModeType }
-func (d dirent) Info() (fs.FileInfo, error) { return d.FileInfo(), nil }
+// Close implements [fs.File].
+func (f *file) Close() error { return nil }
 
-// SortDirent returns a function suitable to pass to sort.Slice as a "cmp"
-// function.
-//
-// This is needed because the fs interfaces specify that DirEntry slices
-// returned by the ReadDir methods are sorted lexically.
-func sortDirent(s []fs.DirEntry) func(i, j int) bool {
-	return func(i, j int) bool {
-		return strings.Compare(s[i].Name(), s[j].Name()) == -1
+// Stat implements [fs.File].
+func (f *file) Stat() (fs.FileInfo, error) { return &f.inode, nil }
+
+// ReadDir implements [fs.ReadDirFile].
+func (f *file) ReadDir(n int) ([]fs.DirEntry, error) {
+	if f.Type != `dir` {
+		return nil, &fs.PathError{
+			Op:   `readdir`,
+			Path: f.Entry.Name,
+			Err:  fs.ErrInvalid,
+		}
 	}
+	es := f.dirent[f.dirpos:]
+	end := min(len(es), n)
+	switch {
+	case len(es) == 0 && n <= 0:
+		return nil, nil
+	case len(es) == 0 && n > 0:
+		return nil, io.EOF
+	case n <= 0:
+		end = len(es)
+	default:
+	}
+	f.dirpos += end
+	return es[:end], nil
 }
diff --git a/pkg/tarfs/fs.go b/pkg/tarfs/fs.go
new file mode 100644
index 000000000..aca2d8e1c
--- /dev/null
+++ b/pkg/tarfs/fs.go
@@ -0,0 +1,284 @@
+package tarfs
+
+import (
+	"bytes"
+	"context"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"io"
+	"io/fs"
+	"os"
+	"runtime"
+
+	"github.com/quay/zlog"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/codes"
+	"go.opentelemetry.io/otel/metric"
+)
+
+// FS implements [fs.FS] over an optionally compressed tar file.
+//
+// FS buffers contents as needed.
+// [FS.Close] must be called to release any held resources.
+type FS struct {
+	cleanup io.Closer
+	srv
+}
+
+// Decompressor is an interface that abstracts over the exact compression scheme used to compress chunks.
+type decompressor interface {
+	io.Reader
+	io.WriterTo
+	Reset(io.Reader) error
+}
+
+// A bunch of magic constants for zstd.
+const (
+	zstdFrame             = 0xFD2FB528
+	zstdSkippableMask     = 0xFFFFFFF0
+	zstdSkippableFrame    = 0x184D2A50
+	zstdChunkedFrameMagic = 0x78556E496C556E47
+)
+
+// Fixed bytes for the gzip member containing the eStargz's TOC.
+var gzipHeader = []byte{0x1f, 0x8b, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF}
+
+// New implements a filesystem abstraction over an [io.ReaderAt] containing:
+//
+//   - An eStargz-compatible tar
+//   - A zstd:chunked-compatible tar
+//   - An optionally compressed tar
+//
+// See the links in the package documentation for descriptions of the "eStargz" and "zstd:chunked" schemes.
+// Prioritized files specified by eStargz are ignored by this implementation; all file contents are fetched lazily.
+// Contents may be compressed via gzip or zstd.
+//
+// As an optimization, if "r" is an [os.File] containing an uncompressed tar, it will be used directly without internal buffering.
+// If the passed "size" is less than zero, the size of the underlying data will attempt to be automatically determined.
+// A nil "buf" can be used, but [ErrFileNeeded] will be returned if a backing file is needed.
+func New(ctx context.Context, r io.ReaderAt, size int64, buf *os.File) (*FS, error) {
+	var (
+		// Returned FS
+		sys FS
+		// Does this constructor need to bail out? Also used for metrics.
+		bail = true
+		// Some metrics:
+		seekable        = false
+		compressionKind = `unknown`
+	)
+	ctx = zlog.ContextWithValues(ctx, "component", `pkg/tarfs.New`)
+	ctx, span := tracer.Start(ctx, "New")
+	defer func() {
+		// This is gigantic, sorry.
+		attrs := []attribute.KeyValue{
+			attribute.String("compression", compressionKind),
+			attribute.Bool("seekable", seekable),
+			attribute.Bool("success", !bail),
+		}
+		span.SetAttributes(attrs...)
+
+		if bail {
+			span.SetStatus(codes.Error, "unsuccessful tarfs creation")
+			if sys.cleanup != nil {
+				if err := sys.cleanup.Close(); err != nil {
+					zlog.Warn(ctx).
+						AnErr("cleanup", err).
+						Msg("errors encountered during error return")
+					span.RecordError(err)
+				}
+			}
+		} else {
+			span.SetStatus(codes.Ok, "successful tarfs creation")
+		}
+		fsCounter.Add(ctx, 1, metric.WithAttributes(attrs...))
+		span.End()
+	}()
+
+	// If passed in a negative number, try to autodetect the size of the reader.
+	if size < 0 {
+		switch v := r.(type) {
+		case interface{ Size() int64 }:
+			size = v.Size()
+		case interface{ Stat() (fs.FileInfo, error) }:
+			fi, err := v.Stat()
+			if err != nil {
+				return nil, fmt.Errorf("tarfs: unable to stat: %w", err)
+			}
+			size = fi.Size()
+		case io.Seeker:
+			var err error
+			size, err = v.Seek(0, io.SeekEnd)
+			if err != nil {
+				return nil, fmt.Errorf("tarfs: unable to seek: %w", err)
+			}
+			if _, err := v.Seek(0, io.SeekStart); err != nil {
+				return nil, fmt.Errorf("tarfs: unable to seek: %w", err)
+			}
+		default:
+			return nil, errors.New("tarfs: unable to determine size of ReaderAt")
+		}
+	}
+
+	footer := make([]byte, 64) // Current maximum size for our supported schemes is 51 bytes.
+	n, err := r.ReadAt(footer, size-int64(len(footer)))
+	switch {
+	case errors.Is(err, nil):
+	case n == len(footer) && errors.Is(err, io.EOF):
+	default:
+		return nil, fmt.Errorf("tarfs: unable to read footer: %w", err)
+	}
+
+	var toc *toc
+	var dec decompressor
+	// Examine the footer:
+	zframe := footer[len(footer)-48:]
+	isZstd := zstdSkippableFrame == (binary.LittleEndian.Uint32(zframe)&zstdSkippableMask) &&
+		zstdChunkedFrameMagic == binary.LittleEndian.Uint64(zframe[len(zframe)-8:])
+	gframe := footer[len(footer)-51:]
+	switch {
+	case isZstd:
+		z := getZstd()
+		dec = z
+		compressionKind = `zstd`
+		seekable = true
+		toc, err = extractZstdTOC(ctx, r, z, zframe)
+	case bytes.Equal(gzipHeader, gframe[:len(gzipHeader)]): // isGzip
+		z := getGzip()
+		dec = z
+		compressionKind = `gzip`
+		seekable = true
+		toc, err = extractGzipTOC(ctx, r, z, gframe)
+	default:
+		// So this isn't a seekable variant we're aware of.
+		//
+		// To be extremely cool, try reading a block and see if we can make
+		// sense of what's there and handle it as a "normal" tar.
+		header := make([]byte, 512)
+		switch n, err := r.ReadAt(header, 0); {
+		case errors.Is(err, nil): // OK
+		case n == len(header) && errors.Is(err, io.EOF): // Single member? Odd, but not _not_ OK.
+		default:
+			return nil, fmt.Errorf("tarfs: unable to read header: %w", err)
+		}
+		var unz bool
+	Loop:
+		switch {
+		case bytes.Equal(header[magicOff:][:8], magicOldGNU) ||
+			bytes.Equal(header[magicOff:][:6], magicGNU) ||
+			bytes.Equal(header[magicOff:][:6], magicPAX):
+			if dec == nil {
+				compressionKind = `none`
+			}
+		case !unz && dec != nil:
+			// A previous time around this switch populated a decompressor, so
+			// load a new block.
+			b := make([]byte, 512)
+			dec.Reset(io.NewSectionReader(r, 0, -1))
+			if _, err = io.ReadFull(dec, b); err != nil {
+				// err is set
+				break
+			}
+			header = b
+			unz = true
+			goto Loop
+		case unz:
+			err = parseErr("unknown kind")
+		case zstdFrame == binary.LittleEndian.Uint32(header):
+			dec = getZstd()
+			compressionKind = `zstd`
+			goto Loop
+		case bytes.Equal(gzipHeader[:2], header[:2]): // See RFC1952 2.3.1 for why "2".
+			dec = getGzip()
+			compressionKind = `gzip`
+			goto Loop
+		}
+		if err != nil {
+			return nil, fmt.Errorf("tarfs: error examining standard tar: %w", err)
+		}
+		// Now that we're here, the following invariants hold:
+		//
+		// - The uncompressed data from the passed-in ReaderAt *is* a tar, to
+		//   our satisfaction.
+		// - We need to construct the TOC.
+		//
+		// Since we need to do a linear read to construct the TOC (as there's no
+		// way to tell if the compression frames are amenable to our access
+		// pattern), we may as well buffer the whole thing. This is
+		// special-cased so that we can read through the stream as it's being
+		// decompressed.
+		toc, buf, err = buildTOC(ctx, r, dec, buf)
+		if err != nil {
+			return nil, fmt.Errorf("tarfs: unable to build TOC: %w", err)
+		}
+		err = sys.init(buf, toc.Entry, inodeIdent)
+	}
+	if err != nil {
+		return nil, fmt.Errorf("tarfs: error initializing FS: %w", err)
+	}
+	if toc.Version != 1 {
+		return nil, errors.New("unsupported version")
+	}
+
+	if seekable {
+		if buf == nil {
+			return nil, ErrFileNeeded
+		}
+		d, err := newDiskBuf(r, dec, sys.inodeByIdx, buf)
+		if err != nil {
+			return nil, fmt.Errorf("tarfs: unable to create disk buffer: %w", err)
+		}
+		sys.cleanup = d
+		if err := sys.init(d.buf, toc.Entry, d.fetchFile); err != nil {
+			return nil, fmt.Errorf("tarfs: error initializing FS: %w", err)
+		}
+	}
+
+	_, file, line, _ := runtime.Caller(1)
+	runtime.SetFinalizer(&sys, func(sys *FS) {
+		panic(fmt.Sprintf("%s:%d: FS not closed", file, line))
+	})
+	profile.Add(&sys, 1)
+	bail = false
+	return &sys, nil
+}
+
+// Close releases any held resources and reports errors encountered while doing so.
+//
+// Failing to call Close or calling Close on an instance that was not returned by [New] may result in the program panicing.
+func (s *FS) Close() error {
+	runtime.SetFinalizer(s, nil)
+	profile.Remove(s)
+	if s.cleanup != nil {
+		return s.cleanup.Close()
+	}
+	return nil
+}
+
+// Interface assertions for FS.
+var (
+	_ fs.FS         = (*FS)(nil)
+	_ fs.GlobFS     = (*FS)(nil)
+	_ fs.ReadDirFS  = (*FS)(nil)
+	_ fs.ReadFileFS = (*FS)(nil)
+	_ fs.StatFS     = (*FS)(nil)
+	// Skipped implementing [fs.SubFS], as sharing the backing buffer
+	// would be complicated and probably end up needing a file lock on it.
+)
+
+// Assert the two openFunc implementations fulfill the type.
+var (
+	_ openFunc = (*diskBuf)(nil).fetchFile
+	_ openFunc = inodeIdent
+)
+
+// InodeIdent is an [openFunc] that uses the values in the passed [inode].
+func inodeIdent(r inode) (off, sz int64, err error) {
+	return r.Offset, r.Entry.Size, nil
+}
+
+// Toc is a table of contents.
+type toc struct {
+	Entry   []Entry `json:"entries"`
+	Version int     `json:"version"`
+}
diff --git a/pkg/tarfs/metrics.go b/pkg/tarfs/metrics.go
new file mode 100644
index 000000000..62a59b94f
--- /dev/null
+++ b/pkg/tarfs/metrics.go
@@ -0,0 +1,37 @@
+package tarfs
+
+import (
+	"runtime/pprof"
+
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/metric"
+	"go.opentelemetry.io/otel/trace"
+)
+
+// Metrics singletons.
+var (
+	tracer trace.Tracer
+	meter  metric.Meter
+)
+
+// FsCounter is the metrics for the [New] function.
+var fsCounter metric.Int64Counter
+
+// Profile is a [pprof.Profile] for tracking FS objects.
+var profile *pprof.Profile
+
+func init() {
+	const pkgname = `github.com/quay/claircore/pkg/tarfs`
+	tracer = otel.Tracer(pkgname)
+	meter = otel.Meter(pkgname)
+	profile = pprof.NewProfile(pkgname + ".FS")
+
+	var err error
+	fsCounter, err = meter.Int64Counter("fs.creation.count",
+		metric.WithDescription("total number of tarfs.FS objects constructed"),
+		metric.WithUnit("{instance}"),
+	)
+	if err != nil {
+		panic(err)
+	}
+}
diff --git a/pkg/tarfs/parse.go b/pkg/tarfs/parse.go
index 30c71f498..93f095c1e 100644
--- a/pkg/tarfs/parse.go
+++ b/pkg/tarfs/parse.go
@@ -3,203 +3,246 @@ package tarfs
 import (
 	"archive/tar"
 	"bytes"
+	"context"
+	"encoding/binary"
+	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
+	"io/fs"
+	"os"
 	"strconv"
-)
 
-// The value we should find in the "magic" position of the tar header.
-var (
-	magicPAX    = []byte("ustar\x00")
-	magicGNU    = []byte("ustar ")
-	magicOldGNU = []byte("ustar  \x00")
+	"github.com/klauspost/compress/gzip"
+	"github.com/klauspost/compress/zstd"
+	"go.opentelemetry.io/otel/attribute"
 )
 
-// ErrFormat can be compared via [errors.Is] against errors reported by [New]
-// to determine if the tar fail is considered well-formed.
+// ErrFormat can be compared via [errors.Is] against errors reported by
+// [New] to determine if the tar file or relevant footer is considered
+// well-formed.
 var ErrFormat = errors.New("tarfs: format error reading file")
 
-// ParseErr returns an error that .Is reports true for ErrFormat.
+// ParseErr returns an error that [errors.Is] reports true for [ErrFormat].
 //
 // The `%w` verb does not work.
 func parseErr(f string, v ...interface{}) error {
 	return parseError(fmt.Sprintf(f, v...))
 }
 
+// ParseError is the concrete type out of [parseErr].
 type parseError string
 
 func (e parseError) Is(tgt error) bool { return tgt == ErrFormat }
 func (e parseError) Error() string     { return string(e) }
 
-// FindSegments looks at a tar blockwise to establish where individual files and
-// their headers are stored. Each returned segment describes a region that is
-// not a complete tar file, but can have exactly one file read from it.
-func findSegments(r io.ReaderAt) ([]segment, error) {
-	// Constants and offsets from POSIX.
-	const (
-		blockSz    = 512
-		magicOff   = 257
-		versionOff = 263
-		typeflag   = 156
-		sizeOff    = 124
-	)
-	b := make([]byte, blockSz)
-	var ret []segment
-	// Start block of the current segment.
-	var cur int64
-	// Block number being read.
-	var blk int64
-	// Has the parser seen a zeroes block.
-	var zeroes bool
-
-Scan:
-	for {
-		off := blk * blockSz
-		n, err := r.ReadAt(b, off)
-		switch {
-		case errors.Is(err, nil) && n != blockSz:
-			// Should be impossible with a well-formed archive, so raise an
-			// error. Should also be impossible with a conforming [io.ReaderAt].
-			return nil, parseErr("short read at offset: %d (got: %d, want: %d)", off, n, blockSz)
-		case errors.Is(err, nil): // OK
-		case errors.Is(err, io.EOF):
-			switch {
-			case n == 0:
-				// Early EOF on a block boundary. Let it slide.
-				//
-				// Handle this case because some layers in the wild are a single
-				// directory block, with no trailer.
-				break Scan
-			case n == blockSz:
-				// Make sure to process the read, even if EOF was returned.
-			default:
-				return nil, parseErr("unexpected EOF at %d: %v", off, err)
-			}
-		default:
-			return nil, err
-		}
+// ErrFileNeeded is reported when an [os.File] is needed for buffering tar contents, but has not been provided.
+var ErrFileNeeded = errors.New("tarfs: *os.File needed but not provided")
+
+// BuildTOC builds a table of contents from the optionally-compressed bytes in the [io.ReaderAt] pointed to by r.
+// Because we may have to decompress the bytes and have to do a linear read anyway, immediately buffer the contents.
+//
+// This passes the result back out to re-use the codepath for when the TOC is separable from the archive.
+// The returned [os.File] is either "r" or "buf", depending on if "r" is backed by an [os.File].
+func buildTOC(ctx context.Context, r io.ReaderAt, dec decompressor, buf *os.File) (*toc, *os.File, error) {
+	_, span := tracer.Start(ctx, "buildTOC")
+	defer span.End()
+	// This copies the passed in bytes, optionally eliding the copy entirely if
+	// an *os.File was passed in at the start.
+	_, isFile := r.(*os.File)
+	if !isFile && buf == nil {
+		return nil, nil, ErrFileNeeded
+	}
 
-		magic := b[magicOff:][:6]
-		zeroBlock := true
-		for _, b := range b {
-			if b != 0x00 {
-				zeroBlock = false
-				break
-			}
+	var pos io.Seeker = buf
+	var tr *tar.Reader
+	switch {
+	case dec != nil:
+		if err := dec.Reset(io.NewSectionReader(r, 0, -1)); err != nil {
+			return nil, nil, err
 		}
-		switch {
-		// Tar files end with two blocks of zeroes. These two arms track that.
-		case !zeroes && zeroBlock:
-			zeroes = true
-			continue
-		case zeroes && zeroBlock:
-			// Check for a valid second zeroes block.
-			break Scan
-		case zeroes && !zeroBlock:
-			// Found the first trailer block, but not the second.
-			return nil, parseErr("bad block at %d: expected second trailer block", off)
-		// These arms are belt-and-suspenders to make sure we're reading a
-		// header block and not a contents block, somehow.
-		case bytes.Equal(b[magicOff:][:8], magicOldGNU):
-			// OldGNU madness. This arm matching means the headers aren't
-			// actually POSIX conforming, but hopefully it's not an issue. Just
-			// roll with it. USTAR was standardized in 1988; frankly, it's the
-			// creator's fault if something doesn't work right because there's
-			// some incompatibility.
-		case !bytes.Equal(magic, magicPAX) && !bytes.Equal(magic, magicGNU):
-			return nil, parseErr("bad block at %d: got magic %+q", off, magic)
-		case !bytes.Equal(b[versionOff:][:2], []byte("00")):
-			return nil, parseErr("bad block at %d: got version %+q", off, b[versionOff:][:2])
+		tr = tar.NewReader(io.TeeReader(dec, buf))
+	case isFile:
+		span.AddEvent("using ReaderAt directly")
+		// Use the backing File directly.
+		buf = r.(*os.File)
+		pos = buf
+		if _, err := pos.Seek(0, io.SeekStart); err != nil {
+			return nil, nil, err
 		}
-		encSz := b[sizeOff:][:12]
-		sz, err := parseNumber(encSz)
+		tr = tar.NewReader(buf)
+	default:
+		span.AddEvent("no decompression")
+		tr = tar.NewReader(io.TeeReader(io.NewSectionReader(r, 0, -1), buf))
+	}
+
+	var toc toc
+	toc.Version = 1
+	toc.Entry = make([]Entry, 0, 4096) // Guess at initial capacity.
+	h, err := tr.Next()
+	for ; err == nil; h, err = tr.Next() {
+		off, err := pos.Seek(0, io.SeekCurrent)
 		if err != nil {
-			return nil, parseErr("invalid number: %024x: %v", encSz, err)
+			return nil, nil, parseErr("tarfs: error reading header: %v", err)
 		}
-		nBlk := sz / blockSz
-		if sz%blockSz != 0 {
-			nBlk++
-		}
-		blk++       // Current header block
-		blk += nBlk // File contents
-		switch b[typeflag] {
-		case tar.TypeXHeader, tar.TypeGNULongLink, tar.TypeGNULongName, tar.TypeGNUSparse:
-			// All these are prepended to a "real" entry.
-		case tar.TypeBlock, tar.TypeChar, tar.TypeCont, tar.TypeDir, tar.TypeFifo, tar.TypeLink, tar.TypeReg, tar.TypeRegA, tar.TypeSymlink:
-			// Found a data block, emit it:
-			ret = append(ret, segment{start: cur * blockSz, size: (blk - cur) * blockSz})
-			fallthrough
+		// Turn the tar.Header into an Entry
+		i := len(toc.Entry)
+		toc.Entry = append(toc.Entry, Entry{})
+		e := &toc.Entry[i]
+		e.Offset = off
+		// Copy everything over:
+		e.Size = h.Size
+		// Callee does path normalization.
+		e.Name = h.Name
+		e.Linkname = h.Linkname
+		e.ModTime = h.ModTime
+		e.AccessTime = h.AccessTime
+		e.ChangeTime = h.ChangeTime
+		e.UserName = h.Uname
+		e.GroupName = h.Gname
+		e.UID = h.Uid
+		e.GID = h.Gid
+		e.Mode = h.Mode
+		e.Devmajor = h.Devmajor
+		e.Devminor = h.Devminor
+		switch h.Typeflag {
+		case tar.TypeReg:
+			e.Type = typeReg
+		case tar.TypeLink:
+			e.Type = typeHardlink
+		case tar.TypeDir:
+			e.Type = typeDir
+			e.Mode |= int64(fs.ModeDir)
+		case tar.TypeSymlink:
+			e.Type = typeSymlink
+			e.Mode |= int64(fs.ModeSymlink)
+		case tar.TypeChar:
+			e.Type = typeChar
+			e.Mode |= int64(fs.ModeDevice)
+			e.Mode |= int64(fs.ModeCharDevice)
+		case tar.TypeBlock:
+			e.Type = typeBlock
+			e.Mode |= int64(fs.ModeDevice)
+		case tar.TypeFifo:
+			e.Type = typeFifo
+			e.Mode |= int64(fs.ModeNamedPipe)
 		default:
-			// any blocks not enumerated are not handled.
-			cur = blk
+			return nil, nil, fmt.Errorf("tarfs: unknown kind: %v", h.Typeflag)
 		}
 	}
-	return ret, nil
-}
+	if !errors.Is(err, io.EOF) {
+		return nil, nil, parseErr("tarfs: error reading header: %v", err)
+	}
+	span.SetAttributes(attribute.Int("entries", len(toc.Entry)))
 
-// Segment describes one file in a tar, including relevant headers.
-type segment struct {
-	start int64
-	size  int64
+	return &toc, buf, nil
 }
 
-// ParseNumber extracts a number from the encoded form in the tar header.
-//
-// This is based on the internal version in archive/tar.
-func parseNumber(b []byte) (int64, error) {
-	// If in binary format, decode it.
-	if len(b) > 0 && b[0]&0x80 != 0 {
-		// See also: src/archive/tar/strconv.go
-		// Handling negative numbers relies on the following identity:
-		//	-a-1 == ^a
-		//
-		// If the number is negative, we use an inversion mask to invert the
-		// data bytes and treat the value as an unsigned number.
-		var inv byte // 0x00 if positive or zero, 0xff if negative
-		if b[0]&0x40 != 0 {
-			inv = 0xff
-		}
+/*
+NOTE(hank) The eStargz format differs from the stargz format in where the tar headers are placed relative to the gzip headers.
+In stargz, the tar headers are inside the gzip header, so readers can extract individual members knowing only the offset.
+The eStargz format implemented here has gzip headers "inside" the tar members, and so the length needs to be known beforehand.
+This format results in every member having at least one extra tar block describing the next member, except for the first (just a header) and last (just data).
+The TOC is specially defined to be a whole tar by itself.
+The zstd chunked scheme is largely the same, but places the TOC in a skippable frame (a concept nonexistent in gzip) instead of as a tar member.
+*/
 
-		var x uint64
-		for i, c := range b {
-			c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
-			if i == 0 {
-				c &= 0x7f // Ignore signal bit in first byte
-			}
-			if (x >> 56) > 0 {
-				return 0, errors.New("integer overflow")
-			}
-			x = x<<8 | uint64(c)
-		}
-		if (x >> 63) > 0 {
-			return 0, errors.New("integer overflow")
-		}
-		if inv == 0xff {
-			return ^int64(x), nil
-		}
-		return int64(x), nil
+// This limit copied out of the containers/storage code.
+// 50MiB seems way too big but it's better than not having it.
+// 🤷
+const bufLimit = (1 << 20) * 50
+
+// ExtractZstdTOC pulls the table of contents out of the skippable frame as indicated in the footer.
+func extractZstdTOC(ctx context.Context, r io.ReaderAt, z *zstd.Decoder, footer []byte) (*toc, error) {
+	const crfsKind = 1
+	ctx, span := tracer.Start(ctx, "extractZstdTOC")
+	defer span.End()
+
+	var h zstd.Header
+	if err := h.Decode(footer); err != nil {
+		return nil, err
+	}
+	if !h.Skippable || h.SkippableID != 0 {
+		return nil, parseErr("martian zstd frame")
 	}
-	// Otherwise, it's stringified.
-	b = bytes.Trim(b, " \x00")
-	if len(b) == 0 {
-		return 0, nil
+	if h.SkippableSize != 40 {
+		return nil, parseErr("unexpected frame size")
 	}
-	n, err := strconv.ParseUint(cstring(b), 8, 64)
+
+	b := footer[h.HeaderSize:]
+	offset := int64(binary.LittleEndian.Uint64(b[0:8]))
+	length := binary.LittleEndian.Uint64(b[8:16])
+	if length > bufLimit {
+		return nil, errors.New("manifest too big")
+	}
+	lengthUncompressed := binary.LittleEndian.Uint64(b[16:24])
+	if lengthUncompressed > bufLimit {
+		return nil, errors.New("manifest too big")
+	}
+	if kind := binary.LittleEndian.Uint64(b[24:32]); kind != crfsKind {
+		return nil, parseErr("invalid manifest kind")
+	}
+
+	src, dst := make([]byte, length), make([]byte, 0, lengthUncompressed)
+	n, err := r.ReadAt(src, offset)
+	switch {
+	case errors.Is(err, nil):
+	case n == len(src) && errors.Is(err, io.EOF):
+	default:
+		return nil, fmt.Errorf("tarfs: unable to read manifest: %w", err)
+	}
+
+	dst, err = z.DecodeAll(src, dst)
 	if err != nil {
-		return 0, err
+		return nil, fmt.Errorf("tarfs: unable to decompress manifest: %w", err)
 	}
-	return int64(n), nil
+
+	var toc toc
+	if err := json.Unmarshal(dst, &toc); err != nil {
+		return nil, fmt.Errorf("tarfs: unable to decode manifest: %w", err)
+	}
+	span.SetAttributes(attribute.Int("entries", len(toc.Entry)))
+	return &toc, nil
 }
 
-// Cstring interprets the byte slice as a C string. If there is no NULL, it
-// returns the entire slice as a string.
-//
-// The entire-slice behavior handles the case where a fixed size header field is
-// fully populated.
-func cstring(b []byte) string {
-	if i := bytes.IndexByte(b, 0); i >= 0 {
-		return string(b[:i])
+// ExtractGzipTOC pulls the table of contents out of the tar archive as indicated in the footer.
+func extractGzipTOC(ctx context.Context, r io.ReaderAt, z *gzip.Reader, footer []byte) (*toc, error) {
+	ctx, span := tracer.Start(ctx, "extractGzipTOC")
+	defer span.End()
+	if err := z.Reset(bytes.NewReader(footer)); err != nil {
+		return nil, err
+	}
+
+	b := z.Extra
+	// The main deviation from the stargz footer is the addition of the following 4 bytes to make the extra data RFC1952 compliant.
+	// The go "gzip" package doesn't bother with this framing, so that's probably where the initial (mis)use came from.
+	if b[0] != 'S' || b[1] != 'G' ||
+		binary.LittleEndian.Uint16(b[2:4]) != 22 ||
+		!bytes.Equal(b[20:26], []byte("STARGZ")) {
+		return nil, parseErr("invalid extra field")
+	}
+	offset, err := strconv.ParseInt(string(b[4:20]), 16, 64)
+	if err != nil {
+		return nil, parseErr("unable to parse offset: %v", err)
+	}
+
+	// Abuse a section reader to get a cursor over the ReaderAt.
+	if err := z.Reset(io.NewSectionReader(r, offset, bufLimit)); err != nil {
+		return nil, err
+	}
+	tr := tar.NewReader(z)
+	h, err := tr.Next()
+	switch {
+	case err != nil:
+		return nil, err
+	case h.Name != `stargz.index.json`:
+		return nil, parseErr("found unexpected file: %v", h.Name)
+	}
+
+	var toc toc
+	if err := json.NewDecoder(tr).Decode(&toc); err != nil {
+		return nil, fmt.Errorf("tarfs: unable to decode manifest: %w", err)
 	}
-	return string(b)
+	span.SetAttributes(attribute.Int("entries", len(toc.Entry)))
+	return &toc, nil
 }
diff --git a/pkg/tarfs/pool.go b/pkg/tarfs/pool.go
new file mode 100644
index 000000000..cd2aef005
--- /dev/null
+++ b/pkg/tarfs/pool.go
@@ -0,0 +1,60 @@
+package tarfs
+
+import (
+	"fmt"
+	"sync"
+
+	"github.com/klauspost/compress/gzip"
+	"github.com/klauspost/compress/zstd"
+)
+
+// TODO(hank) Remove these wrapper functions when a generic sync.Pool lands.
+
+// GetCopyBuf pulls a buffer from the pool.
+func getCopyBuf() []byte {
+	b := bufpool.Get()
+	if b == nil {
+		// Allocate 1 MiB buffers to start.
+		// This is much too big for small files and much too small for big files.
+		return make([]byte, 1024*1024)
+	}
+	return b.([]byte)
+}
+
+// PutCopyBuf returns a buffer to the pool.
+func putCopyBuf(b []byte) { bufpool.Put(b) }
+
+// Getzstd pulls an initialized decoder from the pool.
+func getZstd() *zstd.Decoder {
+	d := zstdpool.Get()
+	if d == nil {
+		var err error
+		if d, err = zstd.NewReader(nil); err != nil {
+			// Should *never* happen -- a nil Reader causes only internal setup allocations.
+			panic(fmt.Sprintf("error creating zstd reader: %v", err))
+		}
+	}
+	return d.(*zstd.Decoder)
+}
+
+// PutZstd returns a decoder to the pool.
+func putZstd(d *zstd.Decoder) { zstdpool.Put(d) }
+
+// GetGzip pulls an initialized reader from the pool.
+func getGzip() *gzip.Reader {
+	r := gzippool.Get()
+	if r == nil {
+		return new(gzip.Reader)
+	}
+	return r.(*gzip.Reader)
+}
+
+// PutGzip returns a reader to the pool.
+func putGzip(r *gzip.Reader) { gzippool.Put(r) }
+
+// Package-level pools for the respective objects.
+var (
+	bufpool  sync.Pool
+	zstdpool sync.Pool
+	gzippool sync.Pool
+)
diff --git a/pkg/tarfs/randomaccess.go b/pkg/tarfs/randomaccess.go
new file mode 100644
index 000000000..60829805a
--- /dev/null
+++ b/pkg/tarfs/randomaccess.go
@@ -0,0 +1,203 @@
+package tarfs
+
+import (
+	"bytes"
+	"context"
+	"crypto/sha256"
+	"crypto/sha512"
+	"encoding/hex"
+	"fmt"
+	"hash"
+	"io"
+	"io/fs"
+	"os"
+	"runtime"
+	"strings"
+	"sync"
+
+	"github.com/klauspost/compress/gzip"
+	"github.com/klauspost/compress/zstd"
+	"golang.org/x/sync/semaphore"
+)
+
+// DiskBuf encapsulates all the logic for random access and decompression from
+// an upstream [io.ReaderAt] into a locally-backed buffer.
+type diskBuf struct {
+	// All "buf" reads/writes are done by *At methods, meaning the file only needs
+	// to be locked when resizing it.
+	resizeMu sync.Mutex
+	upstream io.ReaderAt
+	dec      decompressor
+	chunk    func(int) inode
+	buf      *os.File
+	sem      *semaphore.Weighted
+}
+
+// NewDiskBuf allocates a disk-backed buffer.
+//
+// The passed-in [os.File] is not closed with the diskBuf.
+func newDiskBuf(r io.ReaderAt, z decompressor, chunk func(int) inode, spool *os.File) (*diskBuf, error) {
+	d := diskBuf{
+		upstream: r,
+		dec:      z,
+		chunk:    chunk,
+		buf:      spool,
+		sem:      semaphore.NewWeighted(int64(runtime.GOMAXPROCS(0))),
+	}
+	return &d, nil
+}
+
+// FetchFile reads compressed data from the backing [io.ReaderAt] and decompresses
+// it into a contiguous section of the backing buffer. The buffer has no maximum
+// size and is never re-used; that is to say, the maximum size is about the same
+// as if the data were downloaded and decompressed in its entirety.
+//
+// The buffering could be implemented a few different ways transparently:
+//
+//   - Each opened file could get its own backing file buffer that's removed once all
+//     opened [fs.File] implementations for it are Closed.
+//   - The backing file could be used as a ring buffer once it hits some size,
+//     at the cost of inserting file headers and then needing to seek to the next open
+//     section.
+//   - The ring buffer idea could also shrink down, if a large file pushed it over
+//     the target size and was then closed.
+//
+// All of these would require some reference counting and in the case of the ring-buffer,
+// a scavenger routine to collapse and potentially move segments. The execution time
+// may be able to be mitigated with fs-specific optimizations, but the need for any
+// of these strategies should be demonstrated first.
+//
+// Multi-chunk files are handled transparently, but not implemented in the zstd:chunked
+// format as implemented by the github.com/containers modules.
+func (d *diskBuf) fetchFile(r inode) (off, sz int64, err error) {
+	const op = `fetchfile`
+	d.sem.Acquire(context.Background(), 1)
+	defer d.sem.Release(1)
+
+	// NB Weirdness for correct locking without extra scope.
+	d.resizeMu.Lock()
+	off, err = d.buf.Seek(0, io.SeekCurrent)
+	if err == nil {
+		err = d.buf.Truncate(off + int64(r.Entry.Size))
+	}
+	if err == nil {
+		_, err = d.buf.Seek(0, io.SeekEnd)
+	}
+	d.resizeMu.Unlock()
+	if err != nil {
+		return -1, -1, &fs.PathError{
+			Op:   op,
+			Path: r.Name(),
+			Err:  err,
+		}
+	}
+	sz = int64(r.Entry.Size)
+
+	fhash := io.Discard
+	cksum, hasher, err := toCheck(r.Entry.Digest)
+	// Any parse error from this optional property should be ignored.
+	// Any use of "cksum" or "hasher" needs to be checked that they're
+	// not nil.
+	if err == nil {
+		fhash = hasher
+	}
+	// From here on out, reset the file cursor on error cases.
+	chunks := []inode{r}
+	for _, ei := range r.chunk {
+		chunks = append(chunks, d.chunk(ei))
+	}
+
+	// Using a bigger buffer here allows for the underlying ReaderAt to be
+	// dumber. This is expected to be backed by making HTTP requests, where the
+	// latency is much worse than disk. As such, it makes sense to do fewer,
+	// bigger reads rather than many tiny reads. A bigger buffer here results
+	// in bigger range requests to the underlying ReaderAt, and means it
+	// (hopefully) won't need prefetching logic in an HTTP-backed
+	// implementation.
+	cp := getCopyBuf()
+	defer putCopyBuf(cp)
+
+	// This src, dst pair works by swapping out what compressed bytes are being
+	// fed into the decoder. Every chunk resets the reader to pull from a new
+	// section of compressed source, then opportunistically validates the
+	// decompressed data chunk-wise.
+	src := io.TeeReader(io.LimitReader(d.dec, sz), fhash)
+	dst := io.NewOffsetWriter(d.buf, off)
+	for n, e := range chunks {
+		zsrc := io.NewSectionReader(d.upstream, e.Offset, -1) // abuse a section reader to get a cursor
+		if err := d.dec.Reset(zsrc); err != nil {
+			return -1, -1, &fs.PathError{
+				Op:   op,
+				Path: r.Name(),
+				Err:  err,
+			}
+		}
+		hasher := io.Discard
+		cksum, hash, err := toCheck(e.ChunkDigest)
+		if err == nil { // Zstd:chunked doesn't treat this as a mandatory property.
+			hasher = hash
+		}
+		if _, err := io.CopyBuffer(dst, io.TeeReader(src, hasher), cp); err != nil {
+			return -1, -1, &fs.PathError{
+				Op:   op,
+				Path: r.Name(),
+				Err:  err,
+			}
+		}
+		if cksum != nil && !bytes.Equal(cksum, hash.Sum(nil)) {
+			return -1, -1, &fs.PathError{
+				Op:   op,
+				Path: r.Name(),
+				Err: fmt.Errorf("failed to validate chunk %d: got: %q, want: %q",
+					n, hex.EncodeToString(hash.Sum(nil)), hex.EncodeToString(cksum)),
+			}
+		}
+	}
+
+	if cksum != nil && !bytes.Equal(cksum, hasher.Sum(nil)) {
+		return -1, -1, &fs.PathError{
+			Op:   op,
+			Path: r.Name(),
+			Err: fmt.Errorf("failed to validate file: got: %q, want: %q",
+				hex.EncodeToString(hasher.Sum(nil)), hex.EncodeToString(cksum)),
+		}
+	}
+
+	return off, sz, nil
+}
+
+// Close puts decompressors back into their pools and closes the buffer file.
+func (d *diskBuf) Close() error {
+	switch z := d.dec.(type) {
+	case *gzip.Reader:
+		putGzip(z)
+	case *zstd.Decoder:
+		putZstd(z)
+	default:
+		panic(fmt.Sprintf("programmer error: unknown decompressor type %T", z))
+	}
+	return nil
+}
+
+// ToCheck takes an OCI-like digest string and returns the binary checksum and a
+// hasher using the same algorithm.
+func toCheck(d string) ([]byte, hash.Hash, error) {
+	alg, enc, ok := strings.Cut(d, ":")
+	if !ok {
+		return nil, nil, fmt.Errorf("invalid digest %q", d)
+	}
+	var h hash.Hash
+	switch alg {
+	case `sha256`:
+		h = sha256.New()
+	case `sha512`:
+		h = sha512.New()
+	default:
+		return nil, nil, fmt.Errorf("unknown algorithm: %q", alg)
+	}
+	b, err := hex.DecodeString(enc)
+	if err != nil {
+		return nil, nil, err
+	}
+	return b, h, nil
+}
diff --git a/pkg/tarfs/seekable_test.go b/pkg/tarfs/seekable_test.go
new file mode 100644
index 000000000..b8407e7d6
--- /dev/null
+++ b/pkg/tarfs/seekable_test.go
@@ -0,0 +1,122 @@
+package tarfs_test
+
+// These tests need to be in a separate package in order to prevent a cycle.
+
+import (
+	"bytes"
+	"context"
+	"io/fs"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/quay/zlog"
+
+	"github.com/quay/claircore"
+	"github.com/quay/claircore/pkg/tarfs"
+	"github.com/quay/claircore/test/fetch"
+)
+
+type seekableTestcase struct {
+	Name                string
+	Registry, Namespace string
+	Layer               claircore.Digest
+	Check               []checkFunc
+}
+type checkFunc func(*testing.T, fs.FS)
+
+func (tc seekableTestcase) Run(ctx context.Context) func(*testing.T) {
+	return func(t *testing.T) {
+		t.Helper()
+		t.Parallel()
+		ctx = zlog.Test(ctx, t)
+		f, err := fetch.Layer(ctx, t,
+			tc.Registry, tc.Namespace, tc.Layer,
+			fetch.NoDecompression)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer f.Close()
+		fi, err := f.Stat()
+		if err != nil {
+			t.Fatal(err)
+		}
+		buf, err := os.Create(filepath.Join(t.TempDir(), filepath.Base(t.Name())))
+		if err != nil {
+			t.Error(err)
+		}
+		t.Cleanup(func() {
+			if err := buf.Close(); err != nil {
+				t.Error(err)
+			}
+		})
+
+		sys, err := tarfs.New(ctx, f, fi.Size(), buf)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer sys.Close()
+		// Do a walk, unconditionally.
+		fs.WalkDir(sys, ".", func(p string, d fs.DirEntry, err error) error {
+			if err != nil {
+				t.Error(err)
+			}
+			// t.Log(p)
+			return nil
+		})
+		for _, f := range tc.Check {
+			f(t, sys)
+		}
+	}
+}
+
+func TestSeekable(t *testing.T) {
+	ctx := context.Background()
+	t.Parallel()
+	for _, tc := range []seekableTestcase{
+		{
+			Name: "eStargz",
+			// layer from docker://ghcr.io/stargz-containers/fedora:30-esgz
+			Registry:  "ghcr.io",
+			Namespace: "stargz-containers/fedora",
+			Layer:     claircore.MustParseDigest(`sha256:a29c6008f8735dd289a374dabb8a277f6bbb8922d921a9c89861794196d6074c`),
+			Check: []checkFunc{
+				func(t *testing.T, sys fs.FS) {
+					b, err := fs.ReadFile(sys, `etc/os-release`)
+					if err != nil {
+						t.Error(err)
+						return
+					}
+					if !bytes.Contains(b, []byte(`CPE_NAME="cpe:/o:fedoraproject:fedora:30"`)) {
+						t.Logf("seemingly garbled contents: %+q", string(b))
+						t.Fail()
+					}
+					t.Log("etc/os-release: OK")
+				},
+			},
+		},
+		{
+			Name: "zstd:chunked",
+			// layer from docker://docker.io/gscrivano/zstd-chunked:fedora
+			Registry:  "docker.io",
+			Namespace: "gscrivano/zstd-chunked",
+			Layer:     claircore.MustParseDigest(`sha256:9970d86e7cb7a3c7ee0a3c8fc2131880b387bc5fe8022a258b456ab2cda4303f`),
+			Check: []checkFunc{
+				func(t *testing.T, sys fs.FS) {
+					b, err := fs.ReadFile(sys, `etc/os-release`)
+					if err != nil {
+						t.Error(err)
+						return
+					}
+					if !bytes.Contains(b, []byte(`CPE_NAME="cpe:/o:fedoraproject:fedora:35"`)) {
+						t.Logf("seemingly garbled contents: %+q", string(b))
+						t.Fail()
+					}
+					t.Log("etc/os-release: OK")
+				},
+			},
+		},
+	} {
+		t.Run(tc.Name, tc.Run(ctx))
+	}
+}
diff --git a/pkg/tarfs/srv.go b/pkg/tarfs/srv.go
new file mode 100644
index 000000000..bb37b48c9
--- /dev/null
+++ b/pkg/tarfs/srv.go
@@ -0,0 +1,553 @@
+package tarfs
+
+import (
+	"fmt"
+	"io"
+	"io/fs"
+	"math/big"
+	"path"
+	"path/filepath"
+	"sort"
+	"strings"
+	"sync"
+	"time"
+)
+
+// Srv is the struct handling all the mapping for file data.
+//
+// I couldn't think of a good name for this, so "server" because it serves file data.
+// This is meant to be embedded, then filled in with hooks depending on the format that's being read out of the backing reader.
+//
+// This whole dance is complicated, because we have subtly different data flows that we want to abstract away:
+//   - block-based "dumb" data requests
+//   - whole-file requests.
+//
+// We also need to optionally do decompresison which means in the dumb case, there's no knowing where block boundaries actually lie.
+type srv struct {
+	// Open is a hook function that's called once for every regular file in
+	// order for the embedding struct to make file data available in "r" and
+	// report the offset and size.
+	open openFunc
+	// Raw contains uncompressed, ready-to-read file data.
+	raw io.ReaderAt
+	// Lookup is a map of strings to indexes in the following slices.
+	lookup map[string]int
+	// Mu guards the following members.
+	// This is only used once the object is fully initialized.
+	mu sync.RWMutex
+	// Initd is a bitset tracking initialized meta elements.
+	initd big.Int
+	// The following slices are file metadata, split into externally-visible and
+	// internal-only components.
+	entry []Entry
+	meta  []meta
+}
+
+// Interface assertions for srv.
+var (
+	_ fs.FS         = (*srv)(nil)
+	_ fs.GlobFS     = (*srv)(nil)
+	_ fs.ReadDirFS  = (*srv)(nil)
+	_ fs.ReadFileFS = (*srv)(nil)
+	_ fs.StatFS     = (*srv)(nil)
+	// Skipped implementing [fs.SubFS], as sharing the backing buffer would be complicated.
+)
+
+// OpenFunc is the hook for embedders of srv to make data ready in the ReaderAt and report the size and offset.
+type openFunc func(inode) (offset, size int64, err error)
+
+// Inode is a fake inode(7)-like structure for keeping track of filesystem entries.
+//
+// Inode implements [fs.FileInfo].
+//
+// Inode ties together the immutable [Entry] and the internal book-keeping [meta] structs.
+// Any given inode is not unique, but the structs pointed to are unique and are shared.
+type inode struct {
+	*Entry
+	*meta
+	// N is the index data is kept at in the parent srv's members.
+	N int
+}
+
+// Interface assertion for inode.
+var _ fs.FileInfo = (*inode)(nil)
+
+// Name implements [fs.FileInfo].
+func (i *inode) Name() string { return path.Base(i.Entry.Name) }
+
+// Size implements [fs.FileInfo].
+func (i *inode) Size() int64 { return int64(i.Entry.Size) }
+
+// Mode implements [fs.FileInfo].
+func (i *inode) Mode() fs.FileMode { return fs.FileMode(i.Entry.Mode) }
+
+// ModTime implements [fs.FileInfo].
+func (i *inode) ModTime() time.Time { return i.Entry.ModTime }
+
+// IsDir implements [fs.FileInfo].
+func (i *inode) IsDir() bool { return i.Type == typeDir }
+
+// Sys implements [fs.FileInfo].
+func (i *inode) Sys() interface{} { return i.Entry }
+
+// Meta is internal book-keeping for file entries.
+//
+// This is kept alongside the entries to cut-down on duplication between the external [Entry] type and an internal type that would have the same information along with book-keeping fields.
+type meta struct {
+	children map[int]struct{}
+	chunk    []int
+	off, sz  int64
+}
+
+// Init initializes the srv.
+func (s *srv) init(r io.ReaderAt, es []Entry, open openFunc) error {
+	sz := len(es) + 1
+	s.lookup = make(map[string]int, sz)
+	s.entry = make([]Entry, 0, sz)
+	s.meta = make([]meta, 0, sz)
+	s.initd.SetUint64(0)
+	s.initd.SetBit(&s.initd, sz, 0)
+	s.raw = r
+	s.open = open
+
+	links := make(map[string][]string)
+	// Mkdir the root:
+	const root = `.`
+	i := len(s.entry)
+	s.entry = append(s.entry, newEntryDir(root))
+	s.meta = append(s.meta, meta{
+		children: make(map[int]struct{}),
+	})
+	s.lookup[root] = i
+	// This loop needs to copy the Entries, because we may need to create entries.
+	// That shouldn't happen in the seekable variants but *does* happen in normal tars.
+	for _, e := range es {
+		e.Name = normPath(e.Name) // Normpath for good measure.
+		switch e.Type {
+		case typeDir:
+			// Hack to avoid going into the whole add path.
+			if _, ok := s.lookup[e.Name]; ok {
+				continue
+			}
+		case typeSymlink:
+			// Unsure what's allowed in this field.
+			if !path.IsAbs(e.Linkname) {
+				e.Linkname = path.Join(path.Dir(e.Name), e.Linkname)
+			}
+			fallthrough
+		case typeHardlink:
+			e.Linkname = normPath(e.Linkname)
+		}
+		if err := s.add(e, links); err != nil {
+			return err
+		}
+	}
+	// Cleanup any dangling hardlinks.
+	for _, rms := range links {
+		for _, rm := range rms {
+			idx := s.lookup[rm]
+			delete(s.lookup, rm)
+			p := s.meta[s.lookup[path.Dir(rm)]]
+			delete(p.children, idx)
+		}
+	}
+	return nil
+}
+
+// Add does what it says on the tin.
+//
+// In addition, it creates any needed leading directory elements.
+// The caller should check for the existence of an "out of order" directory, as this function attempts to follow the POSIX spec on actions when "creating" a file that already exists:
+// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap01.html#tagtcjh_14
+//
+// The "hardlink" map is used for deferring hardlink creation.
+func (s *srv) add(ne Entry, hardlink map[string][]string) error {
+	const op = `create`
+	name := ne.Name
+Again:
+	if i, ok := s.lookup[name]; ok {
+		e := &s.entry[i]
+		et, nt := e.Type, ne.Type
+		switch {
+		case nt == typeChunk:
+			// Chunks only exist in the seekable variants.
+			ni := len(s.entry)
+			s.entry = append(s.entry, ne)
+			s.meta = append(s.meta, meta{})
+			m := &s.meta[i]
+			m.chunk = append(m.chunk, ni)
+		case nt != typeReg:
+			// If the new type isn't a regular file, fail.
+			return &fs.PathError{
+				Op:   op,
+				Path: name,
+				Err:  fmt.Errorf("new type (%s) cannot replace existing type (%s): %w", nt, et, fs.ErrExist),
+			}
+		case et == typeDir:
+			// If the existing type is a directory, fail.
+			return &fs.PathError{
+				Op:   op,
+				Path: name,
+				Err:  fmt.Errorf("new file cannot replace directory: %w", fs.ErrExist),
+			}
+		case et == typeSymlink: // Follow the link target.
+			name = e.Linkname
+			goto Again
+		}
+		// Should be OK to replace now.
+		// Shadow the previous inode so we don't have to renumber everything.
+		s.entry[i] = ne
+		return nil
+	}
+
+	// Hardlink handling: if the target doesn't exist yet, make a note in passed-in map.
+	if ne.Type == typeHardlink {
+		tgt := ne.Linkname
+		if _, ok := s.lookup[tgt]; !ok {
+			hardlink[tgt] = append(hardlink[tgt], name)
+		}
+	}
+	delete(hardlink, name)
+	i := len(s.entry)
+	s.entry = append(s.entry, ne)
+	s.meta = append(s.meta, meta{})
+	if ne.Type == typeDir {
+		s.meta[i].children = make(map[int]struct{})
+	}
+	s.lookup[name] = i
+
+	cycle := make(map[int]struct{})
+	dir := filepath.Dir(name)
+AddEnt:
+	switch dir {
+	case name:
+		// Skip
+	case ".":
+		// Add was called with a root entry, like "a" -- make sure to link this to the root directory.
+		root := &s.meta[s.lookup["."]]
+		root.children[i] = struct{}{}
+	default:
+		parent, err := s.getInode(op, dir)
+		if err != nil {
+			parent, err = s.walkTo(dir, true)
+		}
+		if err != nil {
+			return err
+		}
+		if _, ok := cycle[parent.N]; ok {
+			return &fs.PathError{
+				Op:   op,
+				Path: dir,
+				Err:  fmt.Errorf("found cycle when resolving member: %w", fs.ErrInvalid),
+			}
+		}
+		cycle[parent.N] = struct{}{}
+		switch parent.Type {
+		case typeDir:
+			// OK
+		case typeHardlink:
+			// This is annoying -- hard linking to directories is weird
+			fallthrough
+		case typeSymlink:
+			dir = parent.Linkname
+			goto AddEnt
+		default:
+			return &fs.PathError{
+				Op:   op,
+				Path: parent.Entry.Name,
+				Err:  fmt.Errorf("error while connecting child %q: %w", name, fs.ErrExist),
+			}
+		}
+		parent.children[i] = struct{}{}
+	}
+	return nil
+}
+
+// GetInode returns an inode backing "name".
+//
+// The "op" parameter is used in error reporting.
+func (s *srv) getInode(op, name string) (inode, error) {
+	if !fs.ValidPath(name) {
+		return inode{}, &fs.PathError{
+			Op:   op,
+			Path: name,
+			Err:  fs.ErrInvalid,
+		}
+	}
+	if i, ok := s.lookup[name]; ok {
+		return s.inodeByIdx(i), nil
+	}
+	i, err := s.walkTo(name, false)
+	if err != nil {
+		return inode{}, &fs.PathError{
+			Op:   op,
+			Path: name,
+			Err:  fs.ErrNotExist,
+		}
+	}
+	return i, nil
+}
+
+// InodeByIdx constructs an inode without checking the provided index.
+func (s *srv) inodeByIdx(i int) inode {
+	return inode{
+		Entry: &s.entry[i],
+		meta:  &s.meta[i],
+		N:     i,
+	}
+}
+
+// WalkTo does a walk from the root as far along the provided path as possible, resolving symlinks as necessary.
+// If any segments are missing (including the final segments), they are created as directories if the "create" bool is passed.
+func (s *srv) walkTo(p string, create bool) (inode, error) {
+	w := strings.Split(p, "/")
+	var cur inode
+	var err error
+	var b strings.Builder
+
+	cur, err = s.getInode(`walk`, ".")
+	if err != nil {
+		return cur, err
+	}
+	i := 0
+	for lim := len(w); i < lim; i++ {
+		n := w[i]
+		if i != 0 {
+			b.WriteByte('/')
+		}
+		b.WriteString(n)
+		var child inode
+		var found bool
+		for ci := range cur.children {
+			child = s.inodeByIdx(ci)
+			cn := path.Base(child.Entry.Name)
+			if cn != n {
+				continue
+			}
+			cycle := make(map[int]struct{})
+		Resolve:
+			for {
+				if _, ok := cycle[ci]; ok {
+					return inode{}, &fs.PathError{
+						Op:   `walk`,
+						Path: b.String(),
+						Err:  fmt.Errorf("found cycle when resolving member: %w", fs.ErrInvalid),
+					}
+				}
+				cycle[ci] = struct{}{}
+				switch child.Type {
+				case typeDir:
+					break Resolve
+				case typeSymlink:
+					tgt := child.Linkname
+					var ok bool
+					ci, ok = s.lookup[tgt]
+					switch {
+					case ok && create, ok && !create:
+						child = s.inodeByIdx(ci)
+						break Resolve
+					case !ok && create:
+						s.add(newEntryDir(tgt), nil)
+						ci = s.lookup[tgt]
+						child = s.inodeByIdx(ci)
+					case !ok && !create:
+						return inode{}, fmt.Errorf("tarfs: walk to %q, but missing segment %q", p, n)
+					}
+				case typeReg:
+					if i == (lim - 1) {
+						break Resolve
+					}
+					return inode{}, &fs.PathError{
+						Op:   `walk`,
+						Path: p,
+						Err:  fmt.Errorf("found symlink to regular file while connecting child %q: %w", b.String(), fs.ErrExist),
+					}
+				}
+			}
+			found = true
+			break
+		}
+		switch {
+		case found && create, found && !create:
+			// OK
+		case !found && create:
+			fp := b.String() // Make sure to use the full path and not just the member name.
+			s.add(newEntryDir(fp), nil)
+			ci := s.lookup[fp]
+			child = s.inodeByIdx(ci)
+		case !found && !create:
+			return inode{}, fmt.Errorf("tarfs: walk to %q, but missing segment %q", p, b.String())
+		}
+		cur = child
+	}
+	return cur, nil
+}
+
+// Initialized reports whether the numbered inode is initialized or not.
+//
+// OpenFunc providers should use this to ensure lock invariants.
+func (s *srv) initialized(i int) (ok bool) {
+	s.mu.RLock()
+	ok = s.initd.Bit(i) != 0
+	s.mu.RUnlock()
+	return ok
+}
+
+// Realize calls [srv.open] and updates metadata as needed.
+//
+// This function is a no-op if a previous call reported a nil error for an equivalent inode.
+func (s *srv) realize(i inode) (err error) {
+	if s.initialized(i.N) {
+		return nil
+	}
+	var off, sz int64
+	switch {
+	case i.Type == typeReg && i.Size() != 0:
+		// Nonzero regular file
+		off, sz, err = s.open(i)
+		if err != nil {
+			return err
+		}
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	// Check if another goroutine has updated the meta struct and the
+	// initialized bitset.
+	if s.initd.Bit(i.N) != 0 {
+		return nil
+	}
+	i.meta.off = off
+	i.meta.sz = sz
+	s.initd.SetBit(&s.initd, i.N, 1)
+	return nil
+}
+
+// Open implements [fs.FS].
+func (s *srv) Open(name string) (fs.File, error) {
+	const op = `open`
+	i, err := s.getInode(op, name)
+	if err != nil {
+		return nil, err
+	}
+	if err := s.realize(i); err != nil {
+		return nil, &fs.PathError{
+			Op:   op,
+			Path: name,
+			Err:  err,
+		}
+	}
+
+	r := file{inode: i}
+	switch i.Type {
+	case typeReg:
+		r.SectionReader = io.NewSectionReader(s.raw, i.off, i.sz)
+	case typeHardlink:
+		i, err = s.getInode(op, name)
+		if err != nil {
+			return nil, err
+		}
+		r.inode = i
+		r.SectionReader = io.NewSectionReader(s.raw, i.off, i.sz)
+	case typeDir:
+		r.dirent = make([]fs.DirEntry, len(r.children))
+		n := 0
+		for i := range r.children {
+			e := &s.entry[i]
+			r.dirent[n] = dirent{e}
+			n++
+		}
+		sort.Slice(r.dirent, sortDirent(r.dirent))
+	case typeSymlink:
+		return s.Open(i.Linkname)
+	default:
+		// Pretend all other kinds of files don't exist.
+		return nil, &fs.PathError{
+			Op:   op,
+			Path: name,
+			Err:  fs.ErrExist,
+		}
+	}
+
+	return &r, nil
+}
+
+// Glob implements [fs.GlobFS].
+//
+// See [path.Match] for the pattern syntax.
+func (s *srv) Glob(pat string) ([]string, error) {
+	// GlobFS is implemented because it can avoid allocating for the walk.
+	//
+	// Path.Match is documented as only returning an error when the pattern is
+	// invalid, so check it here and we can avoid the check in the loop.
+	if _, err := path.Match(pat, ""); err != nil {
+		return nil, err
+	}
+	var ret []string
+	for n := range s.lookup {
+		if ok, _ := path.Match(pat, n); ok {
+			ret = append(ret, n)
+		}
+	}
+	sort.Strings(ret)
+	return ret, nil
+}
+
+// ReadFile implements [fs.ReadFileFS].
+func (s *srv) ReadFile(name string) ([]byte, error) {
+	// ReadFileFS is implemented because it can avoid allocating an intermediate
+	// "file" struct and can immediately allocate a byte slice of the correct
+	// size.
+	const op = `readfile`
+	i, err := s.getInode(op, name)
+	if err != nil {
+		return nil, err
+	}
+	if i.Type == typeSymlink {
+		return s.ReadFile(i.Linkname)
+	}
+	if err := s.realize(i); err != nil {
+		return nil, &fs.PathError{
+			Op:   op,
+			Path: name,
+			Err:  err,
+		}
+	}
+	ret := make([]byte, i.sz)
+	if _, err := io.ReadFull(io.NewSectionReader(s.raw, i.off, i.sz), ret); err != nil {
+		return nil, &fs.PathError{
+			Op:   op,
+			Path: name,
+			Err:  err,
+		}
+	}
+	return ret, nil
+}
+
+// Stat implements [fs.StatFS].
+func (s *srv) Stat(name string) (fs.FileInfo, error) {
+	// StatFS is implemented because it can avoid allocating an intermediate
+	// "file" struct.
+	const op = `stat`
+	i, err := s.getInode(op, name)
+	if err != nil {
+		return nil, err
+	}
+	return &i, nil
+}
+
+// ReadDir implements [fs.ReadDirFS].
+func (s *srv) ReadDir(name string) ([]fs.DirEntry, error) {
+	// ReadDirFS is implemented because it can avoid allocating some
+	// intermediate structs.
+	const op = `readdir`
+	i, err := s.getInode(op, name)
+	if err != nil {
+		return nil, err
+	}
+	ret := make([]fs.DirEntry, 0, len(i.children))
+	for i := range i.children {
+		ret = append(ret, dirent{&s.entry[i]})
+	}
+	sort.Slice(ret, sortDirent(ret))
+	return ret, nil
+}
diff --git a/pkg/tarfs/tarfs.go b/pkg/tarfs/tarfs.go
index 0b72609c9..26eafc7c5 100644
--- a/pkg/tarfs/tarfs.go
+++ b/pkg/tarfs/tarfs.go
@@ -1,34 +1,19 @@
-// Package tarfs implements the fs.FS interface over a tar archive.
+// Package tarfs implements the [fs.FS] interface over a tar archive.
+//
+// It also implements the [fs.FS] interface over the tar-compatible [eStargz] and [zstd:chunked] formats.
+// This package does not support newer [eStargz] layers with the `innerOffset` field.
+//
+// [eStargz]: https://github.com/containerd/stargz-snapshotter/blob/main/docs/estargz.md
+// [zstd:chunked]: https://github.com/containers/image/pull/1084
 package tarfs
 
 import (
-	"archive/tar"
-	"fmt"
-	"io"
-	"io/fs"
-	"path"
 	"path/filepath"
-	"sort"
 	"strconv"
 	"strings"
 	"unicode/utf8"
 )
 
-// FS implements a filesystem abstraction over an io.ReaderAt containing a tar.
-type FS struct {
-	r      io.ReaderAt
-	lookup map[string]int
-	inode  []inode
-}
-
-// Inode is a fake inode(7)-like structure for keeping track of filesystem
-// entries.
-type inode struct {
-	h        *tar.Header
-	children map[int]struct{}
-	off, sz  int64
-}
-
 // NormPath removes relative elements and enforces that the resulting string is
 // utf8-clean.
 //
@@ -38,7 +23,7 @@ func normPath(p string) string {
 	if utf8.ValidString(s) {
 		return s
 	}
-	// Slow path -- need to decode the string an write out escapes.
+	// Slow path -- need to decode the string and write out escapes.
 	// This is roughly modeled on [strings.ToValidUTF8], but without the run
 	// coalescing and the replacement is based on the invalid byte sequence. The
 	// [strings.ToValidUTF8] function only cares if the encoding is valid, not
@@ -69,474 +54,19 @@ func normPath(p string) string {
 	return b.String()
 }
 
-func newDir(n string) inode {
-	return inode{
-		h: &tar.Header{
-			Typeflag: tar.TypeDir,
-			Name:     n,
-			Mode:     int64(fs.ModeDir | 0o644),
-		},
-		children: make(map[int]struct{}),
-	}
-}
-
-// New creates an FS from the tar contained in the ReaderAt.
-//
-// The ReaderAt must remain valid for the entire life of the returned FS and any
-// FSes returned by Sub.
-func New(r io.ReaderAt) (*FS, error) {
-	var err error
-	s := FS{
-		r:      r,
-		lookup: make(map[string]int),
-	}
-	hardlink := make(map[string][]string)
-	if err := s.add(".", newDir("."), hardlink); err != nil {
-		return nil, err
-	}
-
-	segs, err := findSegments(r)
-	if err != nil {
-		return nil, fmt.Errorf("tarfs: error finding segments: %w", err)
-	}
-	for _, seg := range segs {
-		r := io.NewSectionReader(r, seg.start, seg.size)
-		rd := tar.NewReader(r)
-		i := inode{
-			off: seg.start,
-			sz:  seg.size,
-		}
-		i.h, err = rd.Next()
-		if err != nil {
-			return nil, fmt.Errorf("tarfs: error reading header @%d(%d): %w", seg.start, seg.size, err)
-		}
-		i.h.Name = normPath(i.h.Name)
-		n := i.h.Name
-		switch i.h.Typeflag {
-		case tar.TypeDir:
-			// Has this been created this already?
-			if _, ok := s.lookup[n]; ok {
-				continue
-			}
-			i.children = make(map[int]struct{})
-		case tar.TypeSymlink, tar.TypeLink:
-			// If an absolute path, norm the path and it should be fine.
-			// A symlink could dangle, but that's really weird.
-			if path.IsAbs(i.h.Linkname) {
-				i.h.Linkname = normPath(i.h.Linkname)
-				break
-			}
-			if i.h.Typeflag == tar.TypeSymlink {
-				// Assume that symlinks are relative to the directory they're
-				// present in.
-				i.h.Linkname = path.Join(path.Dir(n), i.h.Linkname)
-			}
-			i.h.Linkname = normPath(i.h.Linkname)
-			// Linkname should now be a full path from the root of the tar.
-		case tar.TypeReg:
-		}
-		if err := s.add(n, i, hardlink); err != nil {
-			return nil, err
-		}
+func min(a, b int) int {
+	if a < b {
+		return a
 	}
-	// Cleanup any dangling hardlinks.
-	// This leaves them in the inode slice, but removes them from the observable
-	// tree.
-	for _, rms := range hardlink {
-		for _, rm := range rms {
-			idx := s.lookup[rm]
-			delete(s.lookup, rm)
-			p := s.inode[s.lookup[path.Dir(rm)]]
-			delete(p.children, idx)
-		}
-	}
-	return &s, nil
+	return b
 }
 
-// Add does what it says on the tin.
-//
-// In addition, it creates any needed leading directory elements. The caller
-// should check for the existence of an "out of order" directory, as this
-// function attempts to follow the POSIX spec on actions when "creating" a file
-// that already exists:
-// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap01.html#tagtcjh_14
-//
-// The "hardlink" map is used for deferring hardlink creation.
-func (f *FS) add(name string, ino inode, hardlink map[string][]string) error {
-	const op = `create`
-Again:
-	if i, ok := f.lookup[name]; ok {
-		n := &f.inode[i]
-		et, nt := n.h.FileInfo().Mode()&fs.ModeType, ino.h.FileInfo().Mode()&fs.ModeType
-		switch {
-		case nt != 0:
-			// If the new type isn't a regular file, fail.
-			return &fs.PathError{
-				Op:   op,
-				Path: name,
-				Err:  fmt.Errorf("new type (%x) cannot replace existing type (%x): %w", nt, et, fs.ErrExist),
-			}
-		case et&fs.ModeDir != 0:
-			// If the existing type is a directory, fail.
-			return &fs.PathError{
-				Op:   op,
-				Path: name,
-				Err:  fmt.Errorf("new file cannot replace directory: %w", fs.ErrExist),
-			}
-		case et&fs.ModeSymlink != 0:
-			// Follow the link target.
-			name = n.h.Linkname
-			goto Again
-		}
-		// Should be OK to replace now. Shadow the previous inode so we don't
-		// have to renumber everything.
-		f.inode[i] = ino
-		return nil
-	}
-
-	// Hardlink handling: if the target doesn't exist yet, make a note in passed-in map.
-	if ino.h.Typeflag == tar.TypeLink {
-		tgt := ino.h.Linkname
-		if _, ok := f.lookup[tgt]; !ok {
-			hardlink[tgt] = append(hardlink[tgt], name)
-		}
-	}
-	delete(hardlink, name)
-	i := len(f.inode)
-	f.inode = append(f.inode, ino)
-	f.lookup[name] = i
-
-	cycle := make(map[*inode]struct{})
-	dir := filepath.Dir(name)
-AddEnt:
-	switch dir {
-	case name:
-		// Skip
-	case ".":
-		// Add was called with a root entry, like "a" -- make sure to link this to the root directory.
-		root := &f.inode[f.lookup["."]]
-		root.children[i] = struct{}{}
-	default:
-		parent, err := f.getInode(op, dir)
-		if err != nil {
-			parent, err = f.walkTo(dir, true)
-		}
-		if err != nil {
-			return err
-		}
-		if _, ok := cycle[parent]; ok {
-			return &fs.PathError{
-				Op:   op,
-				Path: dir,
-				Err:  fmt.Errorf("found cycle when resolving member: %w", fs.ErrInvalid),
-			}
-		}
-		cycle[parent] = struct{}{}
-		switch parent.h.Typeflag {
-		case tar.TypeDir:
-			// OK
-		case tar.TypeLink:
-			// This is annoying -- hard linking to directories is weird
-			fallthrough
-		case tar.TypeSymlink:
-			dir = parent.h.Linkname
-			goto AddEnt
-		default:
-			return &fs.PathError{
-				Op:   op,
-				Path: parent.h.Name,
-				Err:  fmt.Errorf("error while connecting child %q: %w", name, fs.ErrExist),
-			}
-		}
-		parent.children[i] = struct{}{}
-	}
-	return nil
-}
-
-// GetInode returns the inode backing "name".
-//
-// The "op" parameter is used in error reporting.
-func (f *FS) getInode(op, name string) (*inode, error) {
-	if !fs.ValidPath(name) {
-		return nil, &fs.PathError{
-			Op:   op,
-			Path: name,
-			Err:  fs.ErrInvalid,
-		}
-	}
-	name = path.Clean(name)
-	if i, ok := f.lookup[name]; ok {
-		return &f.inode[i], nil
-	}
-
-	i, err := f.walkTo(name, false)
-	if err != nil {
-		return nil, &fs.PathError{
-			Op:   op,
-			Path: name,
-			Err:  fs.ErrNotExist,
-		}
-	}
-	return i, nil
-}
-
-// WalkTo does a walk from the root as far along the provided path as possible,
-// resolving symlinks as necesarry. If any segments are missing (including the final
-// segments), they are created as directories if the "create" bool is passed.
-func (f *FS) walkTo(p string, create bool) (*inode, error) {
-	w := strings.Split(p, "/")
-	var cur *inode
-	var b strings.Builder
-
-	cur = &f.inode[f.lookup["."]]
-	i := 0
-	for lim := len(w); i < lim; i++ {
-		n := w[i]
-		if i != 0 {
-			b.WriteByte('/')
-		}
-		b.WriteString(n)
-		var child *inode
-		var found bool
-		for ci := range cur.children {
-			child = &f.inode[ci]
-			cn := path.Base(child.h.Name)
-			if cn != n {
-				continue
-			}
-			cycle := make(map[int]struct{})
-		Resolve:
-			for {
-				if _, ok := cycle[ci]; ok {
-					return nil, &fs.PathError{
-						Op:   `walk`,
-						Path: b.String(),
-						Err:  fmt.Errorf("found cycle when resolving member: %w", fs.ErrInvalid),
-					}
-				}
-				cycle[ci] = struct{}{}
-				switch child.h.Typeflag {
-				case tar.TypeDir:
-					break Resolve
-				case tar.TypeSymlink:
-					tgt := child.h.Linkname
-					var ok bool
-					ci, ok = f.lookup[tgt]
-					switch {
-					case ok && create, ok && !create:
-						child = &f.inode[ci]
-						break Resolve
-					case !ok && create:
-						f.add(tgt, newDir(tgt), nil)
-						ci = f.lookup[tgt]
-						child = &f.inode[ci]
-					case !ok && !create:
-						return nil, fmt.Errorf("tarfs: walk to %q, but missing segment %q", p, n)
-					}
-				case tar.TypeReg:
-					if i == (lim - 1) {
-						break Resolve
-					}
-					return nil, &fs.PathError{
-						Op:   `walk`,
-						Path: p,
-						Err:  fmt.Errorf("found symlink to regular file while connecting child %q: %w", b.String(), fs.ErrExist),
-					}
-				}
-			}
-			found = true
-			break
-		}
-		switch {
-		case found && create, found && !create:
-			// OK
-		case !found && create:
-			fp := b.String() // Make sure to use the full path and not just the member name.
-			f.add(fp, newDir(n), nil)
-			ci := f.lookup[fp]
-			child = &f.inode[ci]
-		case !found && !create:
-			return nil, fmt.Errorf("tarfs: walk to %q, but missing segment %q", p, b.String())
-		}
-		cur = child
-	}
-	return cur, nil
-}
-
-// Open implements fs.FS.
-func (f *FS) Open(name string) (fs.File, error) {
-	const op = `open`
-	i, err := f.getInode(op, name)
-	if err != nil {
-		return nil, err
-	}
-	typ := i.h.FileInfo().Mode().Type()
-	var r *tar.Reader
-	switch {
-	case typ.IsRegular() && i.h.Typeflag != tar.TypeLink:
-		r = tar.NewReader(io.NewSectionReader(f.r, i.off, i.sz))
-	case typ.IsRegular() && i.h.Typeflag == tar.TypeLink:
-		tgt, err := f.getInode(op, i.h.Linkname)
-		if err != nil {
-			return nil, err
-		}
-		r = tar.NewReader(io.NewSectionReader(f.r, tgt.off, tgt.sz))
-	case typ.IsDir():
-		d := dir{
-			h:  i.h,
-			es: make([]fs.DirEntry, len(i.children)),
-		}
-		n := 0
-		for i := range i.children {
-			ct := &f.inode[i]
-			d.es[n] = dirent{ct.h}
-			n++
-		}
-		sort.Slice(d.es, sortDirent(d.es))
-		return &d, nil
-	case typ&fs.ModeSymlink != 0: // typ.IsSymlink()
-		return f.Open(i.h.Linkname)
-	default:
-		// Pretend all other kinds of files don't exist.
-		return nil, &fs.PathError{
-			Op:   op,
-			Path: name,
-			Err:  fs.ErrExist,
-		}
-	}
-	if _, err := r.Next(); err != nil {
-		return nil, &fs.PathError{
-			Op:   op,
-			Path: name,
-			Err:  err,
-		}
-	}
-	return &file{
-		h: i.h,
-		r: r,
-	}, nil
-}
-
-// Stat implements fs.StatFS.
-func (f *FS) Stat(name string) (fs.FileInfo, error) {
-	// StatFS is implemented because it can avoid allocating an intermediate
-	// "file" struct.
-	const op = `stat`
-	i, err := f.getInode(op, name)
-	if err != nil {
-		return nil, err
-	}
-	return i.h.FileInfo(), nil
-}
-
-// ReadDir implements fs.ReadDirFS.
-func (f *FS) ReadDir(name string) ([]fs.DirEntry, error) {
-	// ReadDirFS is implemented because it can avoid allocating an intermediate
-	// "dir" struct.
-	const op = `readdir`
-	i, err := f.getInode(op, name)
-	if err != nil {
-		return nil, err
-	}
-	ret := make([]fs.DirEntry, 0, len(i.children))
-	for ti := range i.children {
-		t := &f.inode[ti]
-		ret = append(ret, dirent{t.h})
-	}
-	sort.Slice(ret, sortDirent(ret))
-	return ret, nil
-}
-
-// ReadFile implements fs.ReadFileFS.
-func (f *FS) ReadFile(name string) ([]byte, error) {
-	// ReadFileFS is implemented because it can avoid allocating an intermediate
-	// "file" struct and can immediately allocate a byte slice of the correct
-	// size.
-	const op = `readfile`
-	i, err := f.getInode(op, name)
-	if err != nil {
-		return nil, err
-	}
-	if i.h.FileInfo().Mode().Type()&fs.ModeSymlink != 0 {
-		return f.ReadFile(i.h.Linkname)
-	}
-	r := tar.NewReader(io.NewSectionReader(f.r, i.off, i.sz))
-	if _, err := r.Next(); err != nil {
-		return nil, &fs.PathError{
-			Op:   op,
-			Path: name,
-			Err:  err,
-		}
-	}
-	ret := make([]byte, i.h.Size)
-	if _, err := io.ReadFull(r, ret); err != nil {
-		return nil, &fs.PathError{
-			Op:   op,
-			Path: name,
-			Err:  err,
-		}
-	}
-	return ret, nil
-}
-
-// Glob implements fs.GlobFS.
-//
-// See path.Match for the patten syntax.
-func (f *FS) Glob(pat string) ([]string, error) {
-	// GlobFS is implemented because it can avoid allocating for the walk.
-	//
-	// Path.Match is documented as only returning an error when the pattern is
-	// invalid, so check it here and we can avoid the check in the loop.
-	if _, err := path.Match(pat, ""); err != nil {
-		return nil, err
-	}
-	var ret []string
-	for n := range f.lookup {
-		if ok, _ := path.Match(pat, n); ok {
-			ret = append(ret, n)
-		}
-	}
-	sort.Strings(ret)
-	return ret, nil
-}
-
-// Sub implements fs.SubFS.
-func (f *FS) Sub(dir string) (fs.FS, error) {
-	// SubFS is implemented because it only requires a single walk and
-	// conditional copy of the lookup table -- the underlying reader and inode
-	// slice can be shared.
-	const op = `sub`
-	n, err := f.getInode(op, dir)
-	if err != nil {
-		return nil, err
-	}
-	bp := n.h.Name
-	ret := FS{
-		r:      f.r,
-		inode:  f.inode,
-		lookup: make(map[string]int),
-	}
-	for n, i := range f.lookup {
-		rel, err := filepath.Rel(bp, n)
-		if err != nil {
-			// Can't be made relative.
-			continue
-		}
-		if strings.HasPrefix(rel, "..") {
-			// Not in this subtree.
-			continue
-		}
-		ret.lookup[rel] = i
-	}
-	return &ret, nil
-}
-
-// A bunch of static assertions for the fs interfaces.
+// Possible values for the "magic" in a tar header.
 var (
-	_ fs.FS         = (*FS)(nil)
-	_ fs.GlobFS     = (*FS)(nil)
-	_ fs.ReadDirFS  = (*FS)(nil)
-	_ fs.ReadFileFS = (*FS)(nil)
-	_ fs.StatFS     = (*FS)(nil)
-	_ fs.SubFS      = (*FS)(nil)
+	magicPAX    = []byte("ustar\x00")
+	magicGNU    = []byte("ustar ")
+	magicOldGNU = []byte("ustar  \x00")
 )
+
+// Where the "magic" value lives.
+const magicOff = 257
diff --git a/pkg/tarfs/tarfs_test.go b/pkg/tarfs/tarfs_test.go
index 52a453b4d..2804080ab 100644
--- a/pkg/tarfs/tarfs_test.go
+++ b/pkg/tarfs/tarfs_test.go
@@ -1,8 +1,10 @@
-package tarfs
+package tarfs_test
 
 import (
 	"archive/tar"
+	"bufio"
 	"bytes"
+	"context"
 	"crypto/sha256"
 	"errors"
 	"io"
@@ -14,29 +16,70 @@ import (
 	"sync"
 	"testing"
 	"testing/fstest"
+	"time"
 
-	"github.com/quay/claircore/test/integration"
+	"github.com/klauspost/compress/gzip"
+	"github.com/klauspost/compress/zstd"
+	"github.com/quay/zlog"
+
+	"github.com/quay/claircore/pkg/tarfs"
+	"github.com/quay/claircore/test"
+)
+
+var (
+	// ModTime reports the newest modtime for files in the current directory.
+	//
+	// TODO(hank) Replace this with [sync.OnceValue] in go1.21.
+	modTime time.Time
 )
 
+func init() {
+	dent, err := os.ReadDir(".")
+	if err != nil {
+		panic(err)
+	}
+	var r time.Time
+	for _, d := range dent {
+		fi, err := d.Info()
+		if err != nil {
+			panic(err)
+		}
+		m := fi.ModTime()
+		if m.After(r) {
+			r = m
+		}
+	}
+	modTime = r
+}
+
 // TestFS runs some sanity checks on a tar generated from this package's
 // directory.
 //
 // The tar is generated on demand and removed if tests fail, so modifying any
-// file in this package *will* cause tests to fail once. Make sure to run tests
-// twice if the Checksum tests fail.
+// file in this package will cause tests slow down on the next run.
 func TestFS(t *testing.T) {
-	var name = filepath.Join(integration.PackageCacheDir(t), `fstest.tar`)
-	checktar(t, name)
+	ctx := zlog.Test(context.Background(), t)
+	t.Parallel()
+	name := test.GenerateFixture(t, `fstest.tar`, modTime, makeFixture(""))
+	test.GenerateFixture(t, `fstest.tar.gz`, modTime, makeFixture("gz"))
+	test.GenerateFixture(t, `fstest.tar.zstd`, modTime, makeFixture("zstd"))
 	fileset := []string{
 		"file.go",
+		"fs.go",
+		"metrics.go",
 		"parse.go",
+		"pool.go",
+		"randomaccess.go",
+		"seekable_test.go",
+		"srv.go",
 		"tarfs.go",
 		"tarfs_test.go",
 		"testdata/atroot",
 	}
 
-	t.Run("Single", func(t *testing.T) {
-		f, err := os.Open(name)
+	mkBuf := func(t *testing.T) *os.File {
+		t.Helper()
+		f, err := os.Create(filepath.Join(t.TempDir(), "buffer"))
 		if err != nil {
 			t.Error(err)
 		}
@@ -45,162 +88,250 @@ func TestFS(t *testing.T) {
 				t.Error(err)
 			}
 		})
-		sys, err := New(f)
-		if err != nil {
-			t.Error(err)
-		}
-
-		if err := fstest.TestFS(sys, fileset...); err != nil {
-			t.Error(err)
-		}
-	})
-
-	t.Run("Concurrent", func(t *testing.T) {
-		f, err := os.Open(name)
-		if err != nil {
-			t.Error(err)
-		}
-		t.Cleanup(func() {
-			if err := f.Close(); err != nil {
-				t.Error(err)
+		return f
+	}
+	openFile := func(name string) func(*testing.T) *os.File {
+		return func(t *testing.T) *os.File {
+			t.Helper()
+			f, err := os.Open(name)
+			if err != nil {
+				t.Fatal(err)
 			}
-		})
-		sys, err := New(f)
-		if err != nil {
-			t.Error(err)
-		}
-
-		const lim = 8
-		var wg sync.WaitGroup
-		t.Logf("running %d goroutines", lim)
-		wg.Add(lim)
-		for i := 0; i < lim; i++ {
-			go func() {
-				defer wg.Done()
-				if err := fstest.TestFS(sys, fileset...); err != nil {
+			t.Cleanup(func() {
+				if err := f.Close(); err != nil {
 					t.Error(err)
 				}
-			}()
-		}
-		wg.Wait()
-	})
-
-	t.Run("Sub", func(t *testing.T) {
-		f, err := os.Open(name)
-		if err != nil {
-			t.Error(err)
-		}
-		t.Cleanup(func() {
-			if err := f.Close(); err != nil {
-				t.Error(err)
-			}
-		})
-		sys, err := New(f)
-		if err != nil {
-			t.Error(err)
+			})
+			return f
 		}
+	}
 
-		sub, err := fs.Sub(sys, "testdata")
-		if err != nil {
-			t.Error(err)
-		}
-		if err := fstest.TestFS(sub, "atroot"); err != nil {
-			t.Error(err)
-		}
-	})
+	tt := []fsTestcase{
+		{
+			Name: "Single",
+			Check: func(ctx context.Context, open, mkBuf func(*testing.T) *os.File) func(*testing.T) {
+				return func(t *testing.T) {
+					ctx := zlog.Test(ctx, t)
+					f := open(t)
+					sys, err := tarfs.New(ctx, f, -1, mkBuf(t))
+					if err != nil {
+						t.Fatal(err)
+					}
+					defer sys.Close()
 
-	t.Run("Checksum", func(t *testing.T) {
-		f, err := os.Open(name)
-		if err != nil {
-			t.Error(err)
-		}
-		t.Cleanup(func() {
-			if err := f.Close(); err != nil {
-				t.Error(err)
-			}
-		})
-		sys, err := New(f)
-		if err != nil {
-			t.Error(err)
-		}
-		for _, n := range fileset {
-			name := n
-			t.Run(name, func(t *testing.T) {
-				h := sha256.New()
-				f, err := os.Open(name)
-				if err != nil {
-					t.Fatal(err)
+					if err := fstest.TestFS(sys, fileset...); err != nil {
+						t.Error(err)
+					}
 				}
-				defer f.Close()
-				if _, err := io.Copy(h, f); err != nil {
-					t.Error(err)
+			},
+		},
+		{
+			Name: "NotAFile",
+			Check: func(ctx context.Context, open, mkBuf func(*testing.T) *os.File) func(*testing.T) {
+				return func(t *testing.T) {
+					ctx := zlog.Test(ctx, t)
+					f := open(t)
+					b, err := io.ReadAll(f)
+					if err != nil {
+						t.Error(err)
+					}
+					rd := bytes.NewReader(b)
+					sys, err := tarfs.New(ctx, rd, -1, mkBuf(t))
+					if err != nil {
+						t.Fatal(err)
+					}
+					defer sys.Close()
+
+					if err := fstest.TestFS(sys, fileset...); err != nil {
+						t.Error(err)
+					}
 				}
-				want := h.Sum(nil)
+			},
+		},
+		{
+			Name: "Concurrent",
+			Check: func(ctx context.Context, open, mkBuf func(*testing.T) *os.File) func(*testing.T) {
+				return func(t *testing.T) {
+					ctx := zlog.Test(ctx, t)
+					f := open(t)
+					fi, err := f.Stat()
+					if err != nil {
+						t.Fatal(err)
+					}
+					sys, err := tarfs.New(ctx, f, fi.Size(), mkBuf(t))
+					if err != nil {
+						t.Fatal(err)
+					}
+					defer sys.Close()
 
-				h.Reset()
-				b, err := fs.ReadFile(sys, name)
-				if err != nil {
-					t.Error(err)
+					const lim = 8
+					var wg sync.WaitGroup
+					t.Logf("running %d goroutines", lim)
+					wg.Add(lim)
+					for i := 0; i < lim; i++ {
+						go func() {
+							defer wg.Done()
+							if err := fstest.TestFS(sys, fileset...); err != nil {
+								t.Error(err)
+							}
+						}()
+					}
+					wg.Wait()
 				}
-				if _, err := h.Write(b); err != nil {
-					t.Error(err)
+			},
+		},
+		{
+			Name: "Sub",
+			Check: func(ctx context.Context, open, mkBuf func(*testing.T) *os.File) func(*testing.T) {
+				return func(t *testing.T) {
+					ctx := zlog.Test(ctx, t)
+					f := open(t)
+					sys, err := tarfs.New(ctx, f, -1, mkBuf(t))
+					if err != nil {
+						t.Fatal(err)
+					}
+					defer sys.Close()
+
+					sub, err := fs.Sub(sys, "testdata")
+					if err != nil {
+						t.Error(err)
+					}
+					if err := fstest.TestFS(sub, "atroot"); err != nil {
+						t.Error(err)
+					}
 				}
-				got := h.Sum(nil)
+			},
+		},
+		{
+			Name: "Checksum",
+			Check: func(ctx context.Context, open, mkBuf func(*testing.T) *os.File) func(*testing.T) {
+				return func(t *testing.T) {
+					ctx := zlog.Test(ctx, t)
+					f := open(t)
+					sys, err := tarfs.New(ctx, f, -1, mkBuf(t))
+					if err != nil {
+						t.Fatal(err)
+					}
+					defer sys.Close()
+					for _, n := range fileset {
+						name := n
+						t.Run(name, func(t *testing.T) {
+							h := sha256.New()
+							f, err := os.Open(name)
+							if err != nil {
+								t.Fatal(err)
+							}
+							defer f.Close()
+							if _, err := io.Copy(h, f); err != nil {
+								t.Error(err)
+							}
+							want := h.Sum(nil)
 
-				if !bytes.Equal(got, want) {
-					t.Errorf("got: %x, want: %x", got, want)
+							h.Reset()
+							b, err := fs.ReadFile(sys, name)
+							if err != nil {
+								t.Error(err)
+							}
+							if _, err := h.Write(b); err != nil {
+								t.Error(err)
+							}
+							got := h.Sum(nil)
+
+							if !bytes.Equal(got, want) {
+								t.Errorf("got: %x, want: %x", got, want)
+							}
+						})
+					}
 				}
-			})
+			},
+		},
+	}
+
+	t.Run("Uncompressed", func(t *testing.T) {
+		t.Parallel()
+		bufHack := func(t *testing.T) *os.File {
+			t.Helper()
+			if path.Base(t.Name()) != "NotAFile" {
+				return nil
+			}
+			return mkBuf(t)
+		}
+		for _, tc := range tt {
+			t.Run(tc.Name, tc.Check(ctx, openFile(name), bufHack))
+		}
+	})
+	t.Run("Gzip", func(t *testing.T) {
+		t.Parallel()
+		for _, tc := range tt {
+			t.Run(tc.Name, tc.Check(ctx, openFile(name+".gz"), mkBuf))
+		}
+	})
+	t.Run("Zstd", func(t *testing.T) {
+		t.Parallel()
+		for _, tc := range tt {
+			t.Run(tc.Name, tc.Check(ctx, openFile(name+".zstd"), mkBuf))
 		}
 	})
 }
 
-// TestEmpty tests that a wholly empty tar still creates an empty root.
-func TestEmpty(t *testing.T) {
-	// Two zero blocks is the tar footer, so just make one up.
-	rd := bytes.NewReader(make([]byte, 2*512))
-	sys, err := New(rd)
-	if err != nil {
-		t.Error(err)
-	}
-	if _, err := fs.Stat(sys, "."); err != nil {
-		t.Error(err)
-	}
-	ent, err := fs.ReadDir(sys, ".")
-	if err != nil {
-		t.Error(err)
-	}
-	for _, e := range ent {
-		t.Log(e)
-	}
-	if len(ent) != 0 {
-		t.Errorf("got: %d, want: 0", len(ent))
-	}
+type fsTestcase struct {
+	Check func(ctx context.Context, open, mkBuf func(*testing.T) *os.File) func(*testing.T)
+	Name  string
 }
 
-func checktar(t *testing.T, name string) {
-	t.Helper()
-	out, err := os.Create(name)
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer out.Close()
-	tw := tar.NewWriter(out)
-	defer tw.Close()
+// MakeFixture makes the expected tar for TestFS, with the compression "cmp".
+//
+// "Cmp" must be one of:
+//   - ""
+//   - gz
+//   - zstd
+func makeFixture(cmp string) func(testing.TB, *os.File) {
+	return func(t testing.TB, f *os.File) {
+		var w io.Writer
+		buf := bufio.NewWriter(f)
+		defer func() {
+			if err := buf.Flush(); err != nil {
+				t.Error(err)
+			}
+		}()
+		switch cmp {
+		case "":
+			w = buf
+		case "gz":
+			z := gzip.NewWriter(buf)
+			defer z.Close()
+			w = z
+		case "zstd":
+			z, err := zstd.NewWriter(buf)
+			if err != nil {
+				t.Fatal(err)
+			}
+			defer z.Close()
+			w = z
+		default:
+			t.Fatalf("unknown compression scheme: %q", cmp)
+		}
 
-	in := os.DirFS(".")
-	if err := fs.WalkDir(in, ".", mktar(t, in, tw)); err != nil {
-		t.Fatal(err)
+		tw := tar.NewWriter(w)
+		defer tw.Close()
+		in := os.DirFS(".")
+		if err := fs.WalkDir(in, ".", mktar(t, filepath.Base(f.Name()), in, tw)); err != nil {
+			t.Fatal(err)
+		}
 	}
 }
 
-func mktar(t *testing.T, in fs.FS, tw *tar.Writer) fs.WalkDirFunc {
+// Mktar is a [fs.WalkDirFunc] to copy files from "in" to "tw".
+//
+// "Name" is supplied for logging only.
+func mktar(t testing.TB, name string, in fs.FS, tw *tar.Writer) fs.WalkDirFunc {
 	return func(p string, d fs.DirEntry, err error) error {
 		if err != nil {
 			return err
 		}
-		switch {
-		case filepath.Ext(d.Name()) == ".tar":
+		switch ext := path.Ext(d.Name()); {
+		case ext == ".tar" || ext == ".gz" || ext == ".zstd":
+			// Skip all these.
 			return nil
 		case d.Name() == "." && d.IsDir():
 			return nil
@@ -208,7 +339,7 @@ func mktar(t *testing.T, in fs.FS, tw *tar.Writer) fs.WalkDirFunc {
 			return fs.SkipDir
 		default:
 		}
-		t.Logf("adding %q", p)
+		t.Logf("%s: adding %q", name, p)
 		i, err := d.Info()
 		if err != nil {
 			return err
@@ -236,33 +367,80 @@ func mktar(t *testing.T, in fs.FS, tw *tar.Writer) fs.WalkDirFunc {
 	}
 }
 
+// TestEmpty tests that a wholly empty tar still creates an empty root.
+func TestEmpty(t *testing.T) {
+	t.Parallel()
+	ctx := zlog.Test(context.Background(), t)
+	f, err := os.Create(filepath.Join(t.TempDir(), filepath.Base(t.Name())))
+	if err != nil {
+		t.Error(err)
+	}
+	t.Cleanup(func() {
+		if err := f.Close(); err != nil {
+			t.Error(err)
+		}
+	})
+	// Two zero blocks is the tar footer, so just make one up.
+	if err := f.Truncate(2 * 512); err != nil {
+		t.Error(err)
+	}
+	sys, err := tarfs.New(ctx, f, -1, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer sys.Close()
+	if _, err := fs.Stat(sys, "."); err != nil {
+		t.Error(err)
+	}
+	ent, err := fs.ReadDir(sys, ".")
+	if err != nil {
+		t.Error(err)
+	}
+	for _, e := range ent {
+		t.Log(e)
+	}
+	if len(ent) != 0 {
+		t.Errorf("got: %d, want: 0", len(ent))
+	}
+}
+
 func TestSymlinks(t *testing.T) {
-	tmp := t.TempDir()
+	t.Parallel()
 	run := func(openErr bool, hs []tar.Header, chk func(*testing.T, fs.FS)) func(*testing.T) {
 		return func(t *testing.T) {
+			ctx := zlog.Test(context.Background(), t)
 			t.Helper()
-			// This is a perfect candidate for using test.GenerateFixture, but
-			// creates an import cycle.
-			f, err := os.Create(filepath.Join(tmp, path.Base(t.Name())))
-			if err != nil {
-				t.Fatal(err)
-			}
-			defer f.Close()
-
-			tw := tar.NewWriter(f)
-			for i := range hs {
-				if err := tw.WriteHeader(&hs[i]); err != nil {
+			name := test.GenerateFixture(t, path.Base(t.Name())+".tar", modTime, func(t testing.TB, f *os.File) {
+				tw := tar.NewWriter(f)
+				for i := range hs {
+					if err := tw.WriteHeader(&hs[i]); err != nil {
+						t.Error(err)
+					}
+				}
+				if err := tw.Close(); err != nil {
 					t.Error(err)
 				}
+			})
+			f, err := os.Open(name)
+			if err != nil {
+				t.Fatal(err)
 			}
-			if err := tw.Close(); err != nil {
-				t.Error(err)
+			fi, err := f.Stat()
+			if err != nil {
+				t.Fatal(err)
 			}
 
-			sys, err := New(f)
+			sys, err := tarfs.New(ctx, f, fi.Size(), nil)
 			t.Log(err)
+			t.Cleanup(func() {
+				if sys != nil {
+					if err := sys.Close(); err != nil {
+						t.Error(err)
+					}
+				}
+			})
 			if (err != nil) != openErr {
-				t.Fail()
+				t.FailNow()
 			}
 
 			if chk != nil {
@@ -461,6 +639,8 @@ func TestSymlinks(t *testing.T) {
 }
 
 func TestKnownLayers(t *testing.T) {
+	t.Parallel()
+	ctx := zlog.Test(context.Background(), t)
 	ents, err := os.ReadDir(`testdata/known`)
 	if err != nil {
 		t.Fatal(err)
@@ -471,15 +651,17 @@ func TestKnownLayers(t *testing.T) {
 			continue
 		}
 		t.Run(n, func(t *testing.T) {
+			ctx := zlog.Test(ctx, t)
 			f, err := os.Open(filepath.Join(`testdata/known`, n))
 			if err != nil {
 				t.Fatal(err)
 			}
 			defer f.Close()
-			sys, err := New(f)
+			sys, err := tarfs.New(ctx, f, -1, nil)
 			if err != nil {
 				t.Fatal(err)
 			}
+			defer sys.Close()
 			if err := fs.WalkDir(sys, ".", func(p string, d fs.DirEntry, err error) error {
 				if err != nil {
 					t.Error(err)
@@ -534,6 +716,8 @@ func TestKnownLayers(t *testing.T) {
 // `var/run/console/algo.txt` and `log.txt` can be accessed via `run/logs/log.txt`
 // or `var/run/logs/log.txt`.
 func TestTarConcatenate(t *testing.T) {
+	t.Parallel()
+	ctx := zlog.Test(context.Background(), t)
 	tests := []struct {
 		expectedFS map[string]bool
 		testFile   string
@@ -562,10 +746,11 @@ func TestTarConcatenate(t *testing.T) {
 		if err != nil {
 			t.Fatalf("failed to open test tar: %v", err)
 		}
-		sys, err := New(f)
+		sys, err := tarfs.New(ctx, f, -1, nil)
 		if err != nil {
 			t.Fatalf("failed to create tarfs: %v", err)
 		}
+		defer sys.Close()
 		if err := fs.WalkDir(sys, ".", func(path string, d fs.DirEntry, err error) error {
 			if err != nil {
 				return err
@@ -593,12 +778,14 @@ func TestTarConcatenate(t *testing.T) {
 }
 
 func TestInvalidName(t *testing.T) {
+	t.Parallel()
+	ctx := zlog.Test(context.Background(), t)
 	f, err := os.Open(`testdata/bad_name.tar`)
 	if err != nil {
 		t.Fatalf("failed to open test tar: %v", err)
 	}
 	defer f.Close()
-	sys, err := New(f)
+	sys, err := tarfs.New(ctx, f, -1, nil)
 	if err != nil {
 		t.Fatalf("failed to create tarfs: %v", err)
 	}