Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion pkg/coretag/coretag.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ func Enable() error {
}

// GetAllCoreTags returns the core tag of all the threads in the thread group.
// PID 0 means the current pid.
func GetAllCoreTags(pid int) ([]uint64, error) {
// prctl(PR_SCHED_CORE_GET, PR_SCHED_CORE_SCOPE_THREAD_GROUP, ...) is not supported
// in linux. So instead we get all threads from /proc/<pid>/task and get all the
Expand Down Expand Up @@ -75,9 +76,14 @@ func GetAllCoreTags(pid int) ([]uint64, error) {
}

// getTids returns set of tids as reported by /proc/<pid>/task.
// PID 0 means the current PID.
func getTids(pid int) (map[int]struct{}, error) {
tids := make(map[int]struct{})
files, err := os.ReadDir("/proc/" + strconv.Itoa(pid) + "/task")
path := "/proc/self/task"
if pid != 0 {
path = fmt.Sprintf("/proc/%d/task", pid)
}
files, err := os.ReadDir(path)
if err != nil {
return nil, err
}
Expand Down
11 changes: 10 additions & 1 deletion pkg/coretag/coretag_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package coretag

import (
"os"
"reflect"
"testing"

"gvisor.dev/gvisor/pkg/hostos"
Expand All @@ -36,11 +37,19 @@ func TestEnable(t *testing.T) {
t.Fatalf("Enable() got error %v, wanted nil", err)
}

coreTags, err := GetAllCoreTags(os.Getpid())
pid := os.Getpid()
coreTags, err := GetAllCoreTags(pid)
if err != nil {
t.Fatalf("GetAllCoreTags() got error %v, wanted nil", err)
}
if len(coreTags) != 1 {
t.Fatalf("Got coreTags %v, wanted len(coreTags)=1", coreTags)
}
coreTagsSelf, err := GetAllCoreTags(0)
if err != nil {
t.Fatalf("GetAllCoreTags(0) got error %v, wanted nil", err)
}
if !reflect.DeepEqual(coreTags, coreTagsSelf) {
t.Fatalf("Got different coreTags for PID %d vs self: %v vs %v", pid, coreTags, coreTagsSelf)
}
}
2 changes: 1 addition & 1 deletion runsc/boot/loader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ func TestStartSignal(t *testing.T) {
func TestHostnetWithRawSockets(t *testing.T) {
// Drop CAP_NET_RAW from effective capabilities, if we have it.
pid := os.Getpid()
caps, err := capability.NewPid2(os.Getpid())
caps, err := capability.NewPid2(0)
if err != nil {
t.Fatalf("error getting capabilities for pid %d: %v", pid, err)
}
Expand Down
2 changes: 1 addition & 1 deletion runsc/cmd/boot.go
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...any) subcomma

// Verify that all sentry threads are properly core tagged, and log
// current core tag.
coreTags, err := coretag.GetAllCoreTags(os.Getpid())
coreTags, err := coretag.GetAllCoreTags(0)
if err != nil {
util.Fatalf("Failed read current core tags: %v", err)
}
Expand Down
72 changes: 69 additions & 3 deletions runsc/cmd/chroot.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,73 @@ func copyFile(dst, src string) error {
return err
}

// setupMinimalProcfs creates a minimal procfs-like tree at `${chroot}/proc`.
func setupMinimalProcfs(chroot string) error {
// We can't always directly mount procfs because it may be obstructed
// by submounts within it. See https://gvisor.dev/issue/10944.
// All we really need from procfs is /proc/self and a few kernel
// parameter files, which are typically not obstructed.
// So we create a tmpfs at /proc and manually copy the kernel parameter
// files into it. Then, to get /proc/self, we mount either a new
// instance of procfs (if possible), or a recursive bind mount of the
// procfs we do have access to (which still contains the obstructed
// submounts but /proc/self is not obstructed), and we symlink
// our /proc/self to the one in that mount.
//
// Why not try to mount the new procfs instance at /proc directly?
// Because that would cause the set of files at /proc to differ
// between the "new procfs instance" case and the "recursive bind
// mount" case. Thus, this could introduce a bug whereby gVisor starts
// to depend on a /proc file that is present in one case but not the
// other, without decent test coverage to catch it.
procRoot := filepath.Join(chroot, "/proc")
if err := os.Mkdir(procRoot, 0755); err != nil {
return fmt.Errorf("error creating /proc in chroot: %v", err)
}
if err := specutils.SafeMount("runsc-proc", procRoot, "tmpfs",
unix.MS_NOSUID|unix.MS_NODEV|unix.MS_NOEXEC, "", "/proc"); err != nil {
return fmt.Errorf("error mounting tmpfs in /proc: %v", err)
}
flags := uint32(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_RDONLY)
procSubmountDir := "sandbox-proc"
if newProcfsErr := mountInChroot(chroot, "proc", "/proc/"+procSubmountDir, "proc", flags); newProcfsErr != nil {
log.Debugf("Unable to mount a new instance of the procfs file system at %q (%v); trying a recursive bind mount instead.", filepath.Join(procRoot, procSubmountDir), newProcfsErr)
procSubmountDir = "host-proc"
if bindErr := mountInChroot(chroot, "/proc", "/proc/"+procSubmountDir, "bind",
unix.MS_BIND|unix.MS_REC|flags); bindErr != nil {
return fmt.Errorf("error recursively bind-mounting proc at %q (%w) after also failing to mount a new procfs instance there (%v)", filepath.Join(procRoot, procSubmountDir), bindErr, newProcfsErr)
}
log.Debugf("Successfully mounted a recursive bind mount of procfs at %q; continuing.", filepath.Join(procRoot, procSubmountDir))
}
// Create needed directories.
for _, d := range []string{
"/proc/sys",
"/proc/sys/kernel",
"/proc/sys/vm",
} {
if err := os.Mkdir(filepath.Join(chroot, d), 0755); err != nil {
return fmt.Errorf("error creating directory %q: %v", filepath.Join(chroot, d), err)
}
}
// Copy needed files.
for _, f := range []string{
"/proc/sys/vm/mmap_min_addr",
"/proc/sys/kernel/cap_last_cap",
} {
if err := copyFile(filepath.Join(chroot, f), f); err != nil {
return fmt.Errorf("failed to copy %q -> %q: %w", f, filepath.Join(chroot, f), err)
}
}
// Create symlink for /proc/self.
if err := os.Symlink(procSubmountDir+"/self", filepath.Join(procRoot, "self")); err != nil {
return fmt.Errorf("error creating symlink %q -> %q: %w", filepath.Join(procRoot, "self"), procSubmountDir+"/self", err)
}
if err := os.Chmod(procRoot, 0o111); err != nil {
return fmt.Errorf("error chmodding %q: %v", procRoot, err)
}
return nil
}

// setUpChroot creates an empty directory with runsc mounted at /runsc and proc
// mounted at /proc.
func setUpChroot(spec *specs.Spec, conf *config.Config) error {
Expand Down Expand Up @@ -109,9 +176,8 @@ func setUpChroot(spec *specs.Spec, conf *config.Config) error {
log.Warningf("Failed to copy /etc/localtime: %v. UTC timezone will be used.", err)
}

flags := uint32(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_RDONLY)
if err := mountInChroot(chroot, "proc", "/proc", "proc", flags); err != nil {
return fmt.Errorf("error mounting proc in chroot: %v", err)
if err := setupMinimalProcfs(chroot); err != nil {
return fmt.Errorf("error setting up minimal procfs in chroot %q: %v", chroot, err)
}

if err := tpuProxyUpdateChroot("/", chroot, spec, conf); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion runsc/specutils/namespace.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ func SetUIDGIDMappings(cmd *exec.Cmd, s *specs.Spec) {

// HasCapabilities returns true if the user has all capabilities in 'cs'.
func HasCapabilities(cs ...capability.Cap) bool {
caps, err := capability.NewPid2(os.Getpid())
caps, err := capability.NewPid2(0)
if err != nil {
return false
}
Expand Down
12 changes: 0 additions & 12 deletions test/e2e/runtime_in_docker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,18 +74,6 @@ func (test testVariant) run(ctx context.Context, logger testutil.Logger, runscPa
ReadOnly: false,
})
}
// Mount an unobstructed view of procfs at /proc2 so that the runtime
// can mount a fresh procfs.
// TODO(gvisor.dev/issue/10944): Remove this once issue is fixed.
opts.Mounts = append(opts.Mounts, mount.Mount{
Type: mount.TypeBind,
Source: "/proc",
Target: "/proc2",
ReadOnly: false,
BindOptions: &mount.BindOptions{
NonRecursive: true,
},
})
const wantMessage = "It became a jumble of words, a litany, almost a kind of glossolalia."
args := []string{
"/runtime",
Expand Down