From b5526ec54766a25a357107cbea69020e21fd0c3b Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Sat, 26 Oct 2019 21:29:23 +0900 Subject: [PATCH] cgroup2: the first cut Currently, only PID controller is implemented as an example. Other controllers will be implemented in other PRs. Interface changes: * `Cgroup` interface now excludes V1-specific and V2-specific methods. `type CgroupV1 interface{Cgroup, ...}` extends `Cgroup` for V1-specific methods. Same applies to `type CgroupV2 interface{Cgroup, ...}`. * Now `Hierarchy()` returns whether the hierarchy is in unified-mode or not. `cmd/cgroupsplayground/main.go` is included as an example of the new API. Signed-off-by: Akihiro Suda --- Protobuild.toml | 7 + cgroup.go | 159 ++++++++-- cgroup_test.go | 31 +- cmd/cgroupsplayground/main.go | 58 ++++ control.go | 34 +- errors.go | 2 + go.mod | 1 + go.sum | 8 + hierarchy.go | 2 +- mock_test.go | 7 +- paths.go | 15 + paths_test.go | 9 + pids_v2.go | 87 ++++++ stats/v2/doc.go | 17 + stats/v2/metrics.pb.go | 572 ++++++++++++++++++++++++++++++++++ stats/v2/metrics.pb.txt | 33 ++ stats/v2/metrics.proto | 21 ++ subsystem.go | 18 +- subsystem_v2.go | 26 ++ systemd.go | 6 +- utils.go | 82 ++++- v1.go | 16 +- v2.go | 31 ++ 23 files changed, 1178 insertions(+), 64 deletions(-) create mode 100644 cmd/cgroupsplayground/main.go create mode 100644 pids_v2.go create mode 100644 stats/v2/doc.go create mode 100644 stats/v2/metrics.pb.go create mode 100755 stats/v2/metrics.pb.txt create mode 100644 stats/v2/metrics.proto create mode 100644 subsystem_v2.go create mode 100644 v2.go diff --git a/Protobuild.toml b/Protobuild.toml index 7c324b3f..5f6fc009 100644 --- a/Protobuild.toml +++ b/Protobuild.toml @@ -37,3 +37,10 @@ ignore_files = [ "google/protobuf/descriptor.proto", "gogoproto/gogo.proto" ] +[[descriptors]] +prefix = "github.com/containerd/cgroups/stats/v2" +target = "stats/v2/metrics.pb.txt" +ignore_files = [ + "google/protobuf/descriptor.proto", + "gogoproto/gogo.proto" +] diff --git a/cgroup.go b/cgroup.go index ba465659..4adb7224 100644 --- a/cgroup.go +++ b/cgroup.go @@ -26,11 +26,13 @@ import ( "sync" v1 "github.com/containerd/cgroups/stats/v1" + v2 "github.com/containerd/cgroups/stats/v2" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" ) -// New returns a new control via the cgroup cgroups interface +// New returns a new control via the cgroup cgroups interface. +// Supports both v1 and v2. func New(hierarchy Hierarchy, path Path, resources *specs.LinuxResources, opts ...InitOpts) (Cgroup, error) { config := newInitConfig() for _, o := range opts { @@ -38,7 +40,7 @@ func New(hierarchy Hierarchy, path Path, resources *specs.LinuxResources, opts . return nil, err } } - subsystems, err := hierarchy() + subsystems, unifiedMode, err := hierarchy() if err != nil { return nil, err } @@ -60,13 +62,24 @@ func New(hierarchy Hierarchy, path Path, resources *specs.LinuxResources, opts . } active = append(active, s) } - return &cgroup{ - path: path, - subsystems: active, + if unifiedMode { + return &cgroupV2{ + cgroup: cgroup{ + path: path, + subsystems: active, + }, + }, nil + } + return &cgroupV1{ + cgroup: cgroup{ + path: path, + subsystems: active, + }, }, nil } // Load will load an existing cgroup and allow it to be controlled +// Supports both v1 and v2. func Load(hierarchy Hierarchy, path Path, opts ...InitOpts) (Cgroup, error) { config := newInitConfig() for _, o := range opts { @@ -75,7 +88,7 @@ func Load(hierarchy Hierarchy, path Path, opts ...InitOpts) (Cgroup, error) { } } var activeSubsystems []Subsystem - subsystems, err := hierarchy() + subsystems, unifiedMode, err := hierarchy() if err != nil { return nil, err } @@ -98,21 +111,33 @@ func Load(hierarchy Hierarchy, path Path, opts ...InitOpts) (Cgroup, error) { } return nil, err } - if _, err := os.Lstat(s.Path(p)); err != nil { - if os.IsNotExist(err) { - continue + if !unifiedMode { + if _, err := os.Lstat(s.Path(p)); err != nil { + if os.IsNotExist(err) { + continue + } + return nil, err } - return nil, err } activeSubsystems = append(activeSubsystems, s) } + if unifiedMode { + return &cgroupV2{ + cgroup: cgroup{ + path: path, + subsystems: activeSubsystems, + }, + }, nil + } // if we do not have any active systems then the cgroup is deleted if len(activeSubsystems) == 0 { return nil, ErrCgroupDeleted } - return &cgroup{ - path: path, - subsystems: activeSubsystems, + return &cgroupV1{ + cgroup: cgroup{ + path: path, + subsystems: activeSubsystems, + }, }, nil } @@ -124,8 +149,16 @@ type cgroup struct { err error } +type cgroupV1 struct { + cgroup +} + +type cgroupV2 struct { + cgroup +} + // New returns a new sub cgroup -func (c *cgroup) New(name string, resources *specs.LinuxResources) (Cgroup, error) { +func (c *cgroupV1) New(name string, resources *specs.LinuxResources) (Cgroup, error) { c.mu.Lock() defer c.mu.Unlock() if c.err != nil { @@ -137,9 +170,32 @@ func (c *cgroup) New(name string, resources *specs.LinuxResources) (Cgroup, erro return nil, err } } - return &cgroup{ - path: path, - subsystems: c.subsystems, + return &cgroupV1{ + cgroup: cgroup{ + path: path, + subsystems: c.subsystems, + }, + }, nil +} + +// New returns a new sub cgroup +func (c *cgroupV2) New(name string, resources *specs.LinuxResources) (Cgroup, error) { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return nil, c.err + } + path := subPath(c.path, name) + for _, s := range c.subsystems { + if err := initializeSubsystem(s, path, resources); err != nil { + return nil, err + } + } + return &cgroupV2{ + cgroup: cgroup{ + path: path, + subsystems: c.subsystems, + }, }, nil } @@ -180,7 +236,7 @@ func (c *cgroup) add(process Process) error { } // AddTask moves the provided tasks (threads) into the new cgroup -func (c *cgroup) AddTask(process Process) error { +func (c *cgroupV1) AddTask(process Process) error { if process.Pid <= 0 { return ErrInvalidPid } @@ -247,7 +303,7 @@ func (c *cgroup) Delete() error { } // Stat returns the current metrics for the cgroup -func (c *cgroup) Stat(handlers ...ErrorHandler) (*v1.Metrics, error) { +func (c *cgroupV1) Stat(handlers ...ErrorHandler) (*v1.Metrics, error) { c.mu.Lock() defer c.mu.Unlock() if c.err != nil { @@ -267,7 +323,49 @@ func (c *cgroup) Stat(handlers ...ErrorHandler) (*v1.Metrics, error) { errs = make(chan error, len(c.subsystems)) ) for _, s := range c.subsystems { - if ss, ok := s.(stater); ok { + if ss, ok := s.(staterV1); ok { + sp, err := c.path(s.Name()) + if err != nil { + return nil, err + } + wg.Add(1) + go func() { + defer wg.Done() + if err := ss.Stat(sp, stats); err != nil { + for _, eh := range handlers { + if herr := eh(err); herr != nil { + errs <- herr + } + } + } + }() + } + } + wg.Wait() + close(errs) + for err := range errs { + return nil, err + } + return stats, nil +} + +// Stat returns the current metrics for the cgroup +func (c *cgroupV2) Stat(handlers ...ErrorHandler) (*v2.Metrics, error) { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return nil, c.err + } + if len(handlers) == 0 { + handlers = append(handlers, errPassthrough) + } + var ( + stats = &v2.Metrics{} + wg = &sync.WaitGroup{} + errs = make(chan error, len(c.subsystems)) + ) + for _, s := range c.subsystems { + if ss, ok := s.(staterV2); ok { sp, err := c.path(s.Name()) if err != nil { return nil, err @@ -320,7 +418,7 @@ func (c *cgroup) Update(resources *specs.LinuxResources) error { // Processes returns the processes running inside the cgroup along // with the subsystem used, pid, and path -func (c *cgroup) Processes(subsystem Name, recursive bool) ([]Process, error) { +func (c *cgroupV1) Processes(subsystem Name, recursive bool) ([]Process, error) { c.mu.Lock() defer c.mu.Unlock() if c.err != nil { @@ -329,7 +427,7 @@ func (c *cgroup) Processes(subsystem Name, recursive bool) ([]Process, error) { return c.processes(subsystem, recursive) } -func (c *cgroup) processes(subsystem Name, recursive bool) ([]Process, error) { +func (c *cgroupV1) processes(subsystem Name, recursive bool) ([]Process, error) { s := c.getSubsystem(subsystem) sp, err := c.path(subsystem) if err != nil { @@ -361,9 +459,14 @@ func (c *cgroup) processes(subsystem Name, recursive bool) ([]Process, error) { return processes, err } +// Processes returns the processes running inside the cgroup +func (c *cgroupV2) Processes(recursive bool) ([]Process, error) { + return nil, fmt.Errorf("not implemented") +} + // Tasks returns the tasks running inside the cgroup along // with the subsystem used, pid, and path -func (c *cgroup) Tasks(subsystem Name, recursive bool) ([]Task, error) { +func (c *cgroupV1) Tasks(subsystem Name, recursive bool) ([]Task, error) { c.mu.Lock() defer c.mu.Unlock() if c.err != nil { @@ -443,7 +546,7 @@ func (c *cgroup) Thaw() error { // OOMEventFD returns the memory cgroup's out of memory event fd that triggers // when processes inside the cgroup receive an oom event. Returns // ErrMemoryNotSupported if memory cgroups is not supported. -func (c *cgroup) OOMEventFD() (uintptr, error) { +func (c *cgroupV1) OOMEventFD() (uintptr, error) { c.mu.Lock() defer c.mu.Unlock() if c.err != nil { @@ -485,7 +588,7 @@ func (c *cgroup) State() State { // MoveTo does a recursive move subsystem by subsystem of all the processes // inside the group -func (c *cgroup) MoveTo(destination Cgroup) error { +func (c *cgroupV1) MoveTo(destination Cgroup) error { c.mu.Lock() defer c.mu.Unlock() if c.err != nil { @@ -508,6 +611,12 @@ func (c *cgroup) MoveTo(destination Cgroup) error { return nil } +// MoveTo does a recursive move subsystem by subsystem of all the processes +// inside the group +func (c *cgroupV2) MoveTo(destination Cgroup) error { + return fmt.Errorf("not implemented yet") +} + func (c *cgroup) getSubsystem(n Name) Subsystem { for _, s := range c.subsystems { if s.Name() == n { diff --git a/cgroup_test.go b/cgroup_test.go index 68df81d8..55547b4d 100644 --- a/cgroup_test.go +++ b/cgroup_test.go @@ -67,7 +67,8 @@ func TestStat(t *testing.T) { t.Error(err) return } - s, err := control.Stat(IgnoreNotExist) + controlV1 := control.(CgroupV1) + s, err := controlV1.Stat(IgnoreNotExist) if err != nil { t.Error(err) return @@ -112,7 +113,8 @@ func TestAddTask(t *testing.T) { t.Error(err) return } - if err := control.AddTask(Process{Pid: 1234}); err != nil { + controlV1 := control.(CgroupV1) + if err := controlV1.AddTask(Process{Pid: 1234}); err != nil { t.Error(err) return } @@ -145,7 +147,8 @@ func TestListPids(t *testing.T) { return } } - procs, err := control.Processes(Freezer, false) + controlV1 := control.(CgroupV1) + procs, err := controlV1.Processes(Freezer, false) if err != nil { t.Error(err) return @@ -160,6 +163,10 @@ func TestListPids(t *testing.T) { } func TestListTasksPids(t *testing.T) { + if isUnifiedMode { + // FIXME: mock test should pass regardless to the system cgroup version + t.Skip(ErrV1NotSupported) + } mock, err := newMock() if err != nil { t.Fatal(err) @@ -170,7 +177,8 @@ func TestListTasksPids(t *testing.T) { t.Error(err) return } - if err := control.AddTask(Process{Pid: 1234}); err != nil { + controlV1 := control.(CgroupV1) + if err := controlV1.AddTask(Process{Pid: 1234}); err != nil { t.Error(err) return } @@ -180,7 +188,7 @@ func TestListTasksPids(t *testing.T) { return } } - tasks, err := control.Tasks(Freezer, false) + tasks, err := controlV1.Tasks(Freezer, false) if err != nil { t.Error(err) return @@ -240,9 +248,11 @@ func mockNewNotInRdma(subsystems []Subsystem, path Path, resources *specs.LinuxR } } } - return &cgroup{ - path: path, - subsystems: subsystems, + return &cgroupV1{ + cgroup: cgroup{ + path: path, + subsystems: subsystems, + }, }, nil } @@ -273,7 +283,7 @@ func TestLoadWithMissingSubsystems(t *testing.T) { t.Fatal(err) } defer mock.delete() - subsystems, err := mock.hierarchy() + subsystems, _, err := mock.hierarchy() if err != nil { t.Error(err) return @@ -333,6 +343,7 @@ func TestCreateSubCgroup(t *testing.T) { t.Error(err) return } + subV1 := sub.(CgroupV1) if err := sub.Add(Process{Pid: 1234}); err != nil { t.Error(err) return @@ -343,7 +354,7 @@ func TestCreateSubCgroup(t *testing.T) { return } } - if err := sub.AddTask(Process{Pid: 5678}); err != nil { + if err := subV1.AddTask(Process{Pid: 5678}); err != nil { t.Error(err) return } diff --git a/cmd/cgroupsplayground/main.go b/cmd/cgroupsplayground/main.go new file mode 100644 index 00000000..f31bb06b --- /dev/null +++ b/cmd/cgroupsplayground/main.go @@ -0,0 +1,58 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package main + +import ( + "os" + + "github.com/containerd/cgroups" + "github.com/sirupsen/logrus" +) + +func main() { + if err := xmain(); err != nil { + logrus.Fatalf("%+v", err) + } +} + +func xmain() error { + var hier cgroups.Hierarchy + hier = cgroups.V1 + if cgroups.RunningWithUnifiedMode() { + hier = cgroups.V2 + } + pid := os.Getpid() + logrus.Infof("Loading hier (v2=%v) for PID %d", cgroups.RunningWithUnifiedMode(), pid) + cg, err := cgroups.Load(hier, cgroups.PidPath(pid)) + if err != nil { + return err + } + subsystems := cg.Subsystems() + logrus.Infof("Has %d subsystems", len(subsystems)) + for i, s := range subsystems { + logrus.Infof("Subsystem %d: %q", i, s.Name()) + if ss, ok := s.(pather); ok { + logrus.Infof("Path(\"foo\")=%q", ss.Path("foo")) + } + } + return nil +} + +type pather interface { + cgroups.Subsystem + Path(path string) string +} diff --git a/control.go b/control.go index a024fd65..eb277087 100644 --- a/control.go +++ b/control.go @@ -20,6 +20,7 @@ import ( "os" v1 "github.com/containerd/cgroups/stats/v1" + v2 "github.com/containerd/cgroups/stats/v2" specs "github.com/opencontainers/runtime-spec/specs-go" ) @@ -61,29 +62,42 @@ type Cgroup interface { New(string, *specs.LinuxResources) (Cgroup, error) // Add adds a process to the cgroup (cgroup.procs) Add(Process) error - // AddTask adds a process to the cgroup (tasks) - AddTask(Process) error // Delete removes the cgroup as a whole Delete() error // MoveTo moves all the processes under the calling cgroup to the provided one // subsystems are moved one at a time MoveTo(Cgroup) error - // Stat returns the stats for all subsystems in the cgroup - Stat(...ErrorHandler) (*v1.Metrics, error) // Update updates all the subsystems with the provided resource changes Update(resources *specs.LinuxResources) error - // Processes returns all the processes in a select subsystem for the cgroup - Processes(Name, bool) ([]Process, error) - // Tasks returns all the tasks in a select subsystem for the cgroup - Tasks(Name, bool) ([]Task, error) // Freeze freezes or pauses all processes inside the cgroup Freeze() error // Thaw thaw or resumes all processes inside the cgroup Thaw() error - // OOMEventFD returns the memory subsystem's event fd for OOM events - OOMEventFD() (uintptr, error) // State returns the cgroups current state State() State // Subsystems returns all the subsystems in the cgroup Subsystems() []Subsystem } + +type CgroupV1 interface { + Cgroup + // AddTask adds a process to the cgroup (tasks) + AddTask(Process) error + // Stat returns the stats for all subsystems in the cgroup + Stat(...ErrorHandler) (*v1.Metrics, error) + // Processes returns all the processes in a select subsystem for the cgroup + Processes(Name, bool) ([]Process, error) + // Tasks returns all the tasks in a select subsystem for the cgroup + Tasks(Name, bool) ([]Task, error) + // OOMEventFD returns the memory subsystem's event fd for OOM events + OOMEventFD() (uintptr, error) +} + +type CgroupV2 interface { + Cgroup + // Stat returns the stats for all subsystems in the cgroup + Stat(...ErrorHandler) (*v2.Metrics, error) + // Processes returns all the processes in the cgroup + Processes(bool) ([]Process, error) + // TODO: support memory.events +} diff --git a/errors.go b/errors.go index f1ad8315..b09b14af 100644 --- a/errors.go +++ b/errors.go @@ -27,6 +27,8 @@ var ( ErrInvalidFormat = errors.New("cgroups: parsing file with invalid format failed") ErrFreezerNotSupported = errors.New("cgroups: freezer cgroup not supported on this system") ErrMemoryNotSupported = errors.New("cgroups: memory cgroup not supported on this system") + ErrV1NotSupported = errors.New("cgroups: v1 cgroup not supported on this system") + ErrV2NotSupported = errors.New("cgroups: v2 cgroup not supported on this system") ErrCgroupDeleted = errors.New("cgroups: cgroup deleted") ErrNoCgroupMountDestination = errors.New("cgroups: cannot find cgroup mount destination") ) diff --git a/go.mod b/go.mod index 06d139e9..3c32a296 100644 --- a/go.mod +++ b/go.mod @@ -9,5 +9,6 @@ require ( github.com/gogo/protobuf v1.2.1 github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700 github.com/pkg/errors v0.8.1 + github.com/sirupsen/logrus v1.4.2 golang.org/x/sys v0.0.0-20190514135907-3a4b5fb9f71f ) diff --git a/go.sum b/go.sum index 932ef459..c655b398 100644 --- a/go.sum +++ b/go.sum @@ -2,6 +2,7 @@ github.com/coreos/go-systemd v0.0.0-20181030182848-ad9ff7f9a9ff h1:bI9r9ZUi2/EmS github.com/coreos/go-systemd v0.0.0-20181030182848-ad9ff7f9a9ff/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e h1:Wf6HqHfScWJN9/ZjdUKyjop4mf3Qdd+1TvvltAvM3m8= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/godbus/dbus v0.0.0-20190422162347-ade71ed3457e h1:BWhy2j3IXJhjCbC68FptL43tDKIq8FladmaTs3Xs7Z8= @@ -12,10 +13,17 @@ github.com/gogo/protobuf v1.2.1 h1:/s5zKNz0uPFCZ5hddgPdo2TK2TVrUNMn0OOX8/aZMTE= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700 h1:eNUVfm/RFLIi1G7flU5/ZRTHvd4kcVuzfRnL6OFlzCI= github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4= +github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190514135907-3a4b5fb9f71f h1:Xab8gg26GrI/x3RNdVhVkHHM1XLyGeRBEvz4Q5x4YW8= golang.org/x/sys v0.0.0-20190514135907-3a4b5fb9f71f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/hierarchy.go b/hierarchy.go index 9221bf3f..ac088c58 100644 --- a/hierarchy.go +++ b/hierarchy.go @@ -17,4 +17,4 @@ package cgroups // Hierarchy enableds both unified and split hierarchy for cgroups -type Hierarchy func() ([]Subsystem, error) +type Hierarchy func() (subsystems []Subsystem, unifiedMode bool, err error) diff --git a/mock_test.go b/mock_test.go index 6cc1bfb7..9d370c49 100644 --- a/mock_test.go +++ b/mock_test.go @@ -31,7 +31,8 @@ func newMock() (*mockCgroup, error) { if err != nil { return nil, err } - subsystems, err := defaults(root) + unifiedMode := false + subsystems, err := defaults(root, unifiedMode) if err != nil { return nil, err } @@ -73,6 +74,6 @@ func (m *mockCgroup) delete() error { return os.RemoveAll(m.root) } -func (m *mockCgroup) hierarchy() ([]Subsystem, error) { - return m.subsystems, nil +func (m *mockCgroup) hierarchy() ([]Subsystem, bool, error) { + return m.subsystems, false, nil } diff --git a/paths.go b/paths.go index f45fd425..8d493327 100644 --- a/paths.go +++ b/paths.go @@ -39,6 +39,13 @@ func StaticPath(path string) Path { // NestedPath will nest the cgroups based on the calling processes cgroup // placing its child processes inside its own path func NestedPath(suffix string) Path { + if isUnifiedMode { + up, err := parseCgroupFileV2("/proc/self/cgroup") + if err != nil { + return errorPath(err) + } + return StaticPath(up) + } paths, err := parseCgroupFile("/proc/self/cgroup") if err != nil { return errorPath(err) @@ -50,6 +57,13 @@ func NestedPath(suffix string) Path { // This is commonly used for the Load function to restore an existing container func PidPath(pid int) Path { p := fmt.Sprintf("/proc/%d/cgroup", pid) + if isUnifiedMode { + up, err := parseCgroupFileV2(p) + if err != nil { + return errorPath(err) + } + return StaticPath(up) + } paths, err := parseCgroupFile(p) if err != nil { return errorPath(errors.Wrapf(err, "parse cgroup file %s", p)) @@ -60,6 +74,7 @@ func PidPath(pid int) Path { // ErrControllerNotActive is returned when a controller is not supported or enabled var ErrControllerNotActive = errors.New("controller is not supported") +// existingPath is only for v1 func existingPath(paths map[string]string, suffix string) Path { // localize the paths based on the root mount dest for nested cgroups for n, p := range paths { diff --git a/paths_test.go b/paths_test.go index affca0e6..e6e183e2 100644 --- a/paths_test.go +++ b/paths_test.go @@ -35,6 +35,9 @@ func TestStaticPath(t *testing.T) { } func TestSelfPath(t *testing.T) { + if isUnifiedMode { + t.Skip(ErrV1NotSupported) + } _, err := v1MountPoint() if err == ErrMountPointNotExist { t.Skip("skipping test that requires cgroup hierarchy") @@ -57,6 +60,9 @@ func TestSelfPath(t *testing.T) { } func TestPidPath(t *testing.T) { + if isUnifiedMode { + t.Skip(ErrV1NotSupported) + } _, err := v1MountPoint() if err == ErrMountPointNotExist { t.Skip("skipping test that requires cgroup hierarchy") @@ -114,6 +120,9 @@ func TestEmptySubsystem(t *testing.T) { } func TestSystemd240(t *testing.T) { + if isUnifiedMode { + t.Skip(ErrV1NotSupported) + } const data = `8:net_cls:/ 7:memory:/system.slice/docker.service 6:freezer:/ diff --git a/pids_v2.go b/pids_v2.go new file mode 100644 index 00000000..84d2a3b8 --- /dev/null +++ b/pids_v2.go @@ -0,0 +1,87 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + + v2 "github.com/containerd/cgroups/stats/v2" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func NewPidsV2(root string) *pidsControllerV2 { + return &pidsControllerV2{ + root: root, + } +} + +type pidsControllerV2 struct { + root string +} + +func (p *pidsControllerV2) Name() Name { + // TODO: consider returning PidsV2 rather than Pids + return Pids +} + +func (p *pidsControllerV2) Path(path string) string { + return filepath.Join(p.root, path) +} + +func (p *pidsControllerV2) Create(path string, resources *specs.LinuxResources) error { + if err := os.MkdirAll(p.Path(path), defaultDirPerm); err != nil { + return err + } + if resources.Pids != nil && resources.Pids.Limit > 0 { + return ioutil.WriteFile( + filepath.Join(p.Path(path), "pids.max"), + []byte(strconv.FormatInt(resources.Pids.Limit, 10)), + defaultFilePerm, + ) + } + return nil +} + +func (p *pidsControllerV2) Update(path string, resources *specs.LinuxResources) error { + return p.Create(path, resources) +} + +func (p *pidsControllerV2) Stat(path string, stats *v2.Metrics) error { + current, err := readUint(filepath.Join(p.Path(path), "pids.current")) + if err != nil { + return err + } + var max uint64 + maxData, err := ioutil.ReadFile(filepath.Join(p.Path(path), "pids.max")) + if err != nil { + return err + } + if maxS := strings.TrimSpace(string(maxData)); maxS != "max" { + if max, err = parseUint(maxS, 10, 64); err != nil { + return err + } + } + stats.Pids = &v2.PidsStat{ + Current: current, + Limit: max, + } + return nil +} diff --git a/stats/v2/doc.go b/stats/v2/doc.go new file mode 100644 index 00000000..95a671f9 --- /dev/null +++ b/stats/v2/doc.go @@ -0,0 +1,17 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 diff --git a/stats/v2/metrics.pb.go b/stats/v2/metrics.pb.go new file mode 100644 index 00000000..88a28eac --- /dev/null +++ b/stats/v2/metrics.pb.go @@ -0,0 +1,572 @@ +// Code generated by protoc-gen-gogo. DO NOT EDIT. +// source: github.com/containerd/cgroups/stats/v2/metrics.proto + +package v2 + +import ( + fmt "fmt" + proto "github.com/gogo/protobuf/proto" + io "io" + math "math" + reflect "reflect" + strings "strings" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package + +type Metrics struct { + // reserved: repeated HugetlbStat hugetlb = 1; + Pids *PidsStat `protobuf:"bytes,2,opt,name=pids,proto3" json:"pids,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *Metrics) Reset() { *m = Metrics{} } +func (*Metrics) ProtoMessage() {} +func (*Metrics) Descriptor() ([]byte, []int) { + return fileDescriptor_f20f222ca428bd84, []int{0} +} +func (m *Metrics) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *Metrics) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_Metrics.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalTo(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *Metrics) XXX_Merge(src proto.Message) { + xxx_messageInfo_Metrics.Merge(m, src) +} +func (m *Metrics) XXX_Size() int { + return m.Size() +} +func (m *Metrics) XXX_DiscardUnknown() { + xxx_messageInfo_Metrics.DiscardUnknown(m) +} + +var xxx_messageInfo_Metrics proto.InternalMessageInfo + +type PidsStat struct { + Current uint64 `protobuf:"varint,1,opt,name=current,proto3" json:"current,omitempty"` + Limit uint64 `protobuf:"varint,2,opt,name=limit,proto3" json:"limit,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PidsStat) Reset() { *m = PidsStat{} } +func (*PidsStat) ProtoMessage() {} +func (*PidsStat) Descriptor() ([]byte, []int) { + return fileDescriptor_f20f222ca428bd84, []int{1} +} +func (m *PidsStat) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *PidsStat) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_PidsStat.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalTo(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *PidsStat) XXX_Merge(src proto.Message) { + xxx_messageInfo_PidsStat.Merge(m, src) +} +func (m *PidsStat) XXX_Size() int { + return m.Size() +} +func (m *PidsStat) XXX_DiscardUnknown() { + xxx_messageInfo_PidsStat.DiscardUnknown(m) +} + +var xxx_messageInfo_PidsStat proto.InternalMessageInfo + +func init() { + proto.RegisterType((*Metrics)(nil), "io.containerd.cgroups.v2.Metrics") + proto.RegisterType((*PidsStat)(nil), "io.containerd.cgroups.v2.PidsStat") +} + +func init() { + proto.RegisterFile("github.com/containerd/cgroups/stats/v2/metrics.proto", fileDescriptor_f20f222ca428bd84) +} + +var fileDescriptor_f20f222ca428bd84 = []byte{ + // 202 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0x32, 0x49, 0xcf, 0x2c, 0xc9, + 0x28, 0x4d, 0xd2, 0x4b, 0xce, 0xcf, 0xd5, 0x4f, 0xce, 0xcf, 0x2b, 0x49, 0xcc, 0xcc, 0x4b, 0x2d, + 0x4a, 0xd1, 0x4f, 0x4e, 0x2f, 0xca, 0x2f, 0x2d, 0x28, 0xd6, 0x2f, 0x2e, 0x49, 0x2c, 0x29, 0xd6, + 0x2f, 0x33, 0xd2, 0xcf, 0x4d, 0x2d, 0x29, 0xca, 0x4c, 0x2e, 0xd6, 0x2b, 0x28, 0xca, 0x2f, 0xc9, + 0x17, 0x92, 0xc8, 0xcc, 0xd7, 0x43, 0xa8, 0xd6, 0x83, 0xaa, 0xd6, 0x2b, 0x33, 0x52, 0x72, 0xe4, + 0x62, 0xf7, 0x85, 0x28, 0x15, 0x32, 0xe3, 0x62, 0x29, 0xc8, 0x4c, 0x29, 0x96, 0x60, 0x52, 0x60, + 0xd4, 0xe0, 0x36, 0x52, 0xd2, 0xc3, 0xa5, 0x47, 0x2f, 0x20, 0x33, 0xa5, 0x38, 0xb8, 0x24, 0xb1, + 0x24, 0x08, 0xac, 0x5e, 0xc9, 0x8a, 0x8b, 0x03, 0x26, 0x22, 0x24, 0xc1, 0xc5, 0x9e, 0x5c, 0x5a, + 0x54, 0x94, 0x9a, 0x57, 0x22, 0xc1, 0xa8, 0xc0, 0xa8, 0xc1, 0x12, 0x04, 0xe3, 0x0a, 0x89, 0x70, + 0xb1, 0xe6, 0x64, 0xe6, 0x66, 0x96, 0x80, 0x8d, 0x67, 0x09, 0x82, 0x70, 0x9c, 0x24, 0x4e, 0x3c, + 0x94, 0x63, 0xb8, 0xf1, 0x50, 0x8e, 0xa1, 0xe1, 0x91, 0x1c, 0xe3, 0x89, 0x47, 0x72, 0x8c, 0x17, + 0x1e, 0xc9, 0x31, 0x3e, 0x78, 0x24, 0xc7, 0x98, 0xc4, 0x06, 0x76, 0xb9, 0x31, 0x20, 0x00, 0x00, + 0xff, 0xff, 0x38, 0x37, 0xd7, 0x43, 0xf1, 0x00, 0x00, 0x00, +} + +func (m *Metrics) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *Metrics) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.Pids != nil { + dAtA[i] = 0x12 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Pids.Size())) + n1, err := m.Pids.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n1 + } + if m.XXX_unrecognized != nil { + i += copy(dAtA[i:], m.XXX_unrecognized) + } + return i, nil +} + +func (m *PidsStat) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *PidsStat) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.Current != 0 { + dAtA[i] = 0x8 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Current)) + } + if m.Limit != 0 { + dAtA[i] = 0x10 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Limit)) + } + if m.XXX_unrecognized != nil { + i += copy(dAtA[i:], m.XXX_unrecognized) + } + return i, nil +} + +func encodeVarintMetrics(dAtA []byte, offset int, v uint64) int { + for v >= 1<<7 { + dAtA[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + dAtA[offset] = uint8(v) + return offset + 1 +} +func (m *Metrics) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.Pids != nil { + l = m.Pids.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *PidsStat) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.Current != 0 { + n += 1 + sovMetrics(uint64(m.Current)) + } + if m.Limit != 0 { + n += 1 + sovMetrics(uint64(m.Limit)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func sovMetrics(x uint64) (n int) { + for { + n++ + x >>= 7 + if x == 0 { + break + } + } + return n +} +func sozMetrics(x uint64) (n int) { + return sovMetrics(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func (this *Metrics) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&Metrics{`, + `Pids:` + strings.Replace(fmt.Sprintf("%v", this.Pids), "PidsStat", "PidsStat", 1) + `,`, + `XXX_unrecognized:` + fmt.Sprintf("%v", this.XXX_unrecognized) + `,`, + `}`, + }, "") + return s +} +func (this *PidsStat) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&PidsStat{`, + `Current:` + fmt.Sprintf("%v", this.Current) + `,`, + `Limit:` + fmt.Sprintf("%v", this.Limit) + `,`, + `XXX_unrecognized:` + fmt.Sprintf("%v", this.XXX_unrecognized) + `,`, + `}`, + }, "") + return s +} +func valueToStringMetrics(v interface{}) string { + rv := reflect.ValueOf(v) + if rv.IsNil() { + return "nil" + } + pv := reflect.Indirect(rv).Interface() + return fmt.Sprintf("*%v", pv) +} +func (m *Metrics) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: Metrics: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: Metrics: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Pids", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Pids == nil { + m.Pids = &PidsStat{} + } + if err := m.Pids.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *PidsStat) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: PidsStat: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: PidsStat: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Current", wireType) + } + m.Current = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Current |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Limit", wireType) + } + m.Limit = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Limit |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func skipMetrics(dAtA []byte) (n int, err error) { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowMetrics + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowMetrics + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if dAtA[iNdEx-1] < 0x80 { + break + } + } + return iNdEx, nil + case 1: + iNdEx += 8 + return iNdEx, nil + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowMetrics + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if length < 0 { + return 0, ErrInvalidLengthMetrics + } + iNdEx += length + if iNdEx < 0 { + return 0, ErrInvalidLengthMetrics + } + return iNdEx, nil + case 3: + for { + var innerWire uint64 + var start int = iNdEx + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowMetrics + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + innerWire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + innerWireType := int(innerWire & 0x7) + if innerWireType == 4 { + break + } + next, err := skipMetrics(dAtA[start:]) + if err != nil { + return 0, err + } + iNdEx = start + next + if iNdEx < 0 { + return 0, ErrInvalidLengthMetrics + } + } + return iNdEx, nil + case 4: + return iNdEx, nil + case 5: + iNdEx += 4 + return iNdEx, nil + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + } + panic("unreachable") +} + +var ( + ErrInvalidLengthMetrics = fmt.Errorf("proto: negative length found during unmarshaling") + ErrIntOverflowMetrics = fmt.Errorf("proto: integer overflow") +) diff --git a/stats/v2/metrics.pb.txt b/stats/v2/metrics.pb.txt new file mode 100755 index 00000000..e0c8d09e --- /dev/null +++ b/stats/v2/metrics.pb.txt @@ -0,0 +1,33 @@ +file { + name: "github.com/containerd/cgroups/stats/v2/metrics.proto" + package: "io.containerd.cgroups.v2" + message_type { + name: "Metrics" + field { + name: "pids" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".io.containerd.cgroups.v2.PidsStat" + json_name: "pids" + } + } + message_type { + name: "PidsStat" + field { + name: "current" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "current" + } + field { + name: "limit" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "limit" + } + } + syntax: "proto3" +} diff --git a/stats/v2/metrics.proto b/stats/v2/metrics.proto new file mode 100644 index 00000000..ebcd0231 --- /dev/null +++ b/stats/v2/metrics.proto @@ -0,0 +1,21 @@ +syntax = "proto3"; + +package io.containerd.cgroups.v2; + +// import "gogoproto/gogo.proto"; + +message Metrics { + // reserved: repeated HugetlbStat hugetlb = 1; + PidsStat pids = 2; + // reserved: CPUStat cpu = 3 [(gogoproto.customname) = "CPU"]; + // reserved: MemoryStat memory = 4; + // reserved: BlkIOStat blkio = 5; + // reserved: RdmaStat rdma = 6; + // reserved: repeated NetworkStat network = 7; + // reserved: CgroupStats cgroup_stats = 8; +} + +message PidsStat { + uint64 current = 1; + uint64 limit = 2; +} diff --git a/subsystem.go b/subsystem.go index 1349fc66..f6bed218 100644 --- a/subsystem.go +++ b/subsystem.go @@ -20,6 +20,7 @@ import ( "fmt" v1 "github.com/containerd/cgroups/stats/v1" + v2 "github.com/containerd/cgroups/stats/v2" specs "github.com/opencontainers/runtime-spec/specs-go" ) @@ -84,11 +85,16 @@ type deleter interface { Delete(path string) error } -type stater interface { +type staterV1 interface { Subsystem Stat(path string, stats *v1.Metrics) error } +type staterV2 interface { + Subsystem + Stat(path string, stats *v2.Metrics) error +} + type updater interface { Subsystem Update(path string, resources *specs.LinuxResources) error @@ -96,18 +102,18 @@ type updater interface { // SingleSubsystem returns a single cgroup subsystem within the base Hierarchy func SingleSubsystem(baseHierarchy Hierarchy, subsystem Name) Hierarchy { - return func() ([]Subsystem, error) { - subsystems, err := baseHierarchy() + return func() ([]Subsystem, bool, error) { + subsystems, unifiedMode, err := baseHierarchy() if err != nil { - return nil, err + return nil, false, err } for _, s := range subsystems { if s.Name() == subsystem { return []Subsystem{ s, - }, nil + }, unifiedMode, nil } } - return nil, fmt.Errorf("unable to find subsystem %s", subsystem) + return nil, unifiedMode, fmt.Errorf("unable to find subsystem %s", subsystem) } } diff --git a/subsystem_v2.go b/subsystem_v2.go new file mode 100644 index 00000000..c6045ae9 --- /dev/null +++ b/subsystem_v2.go @@ -0,0 +1,26 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +// SubsystemsV2 shall return a complete list of the default cgroups +// available on most linux systems configured with unified mode (WIP). +func SubsystemsV2() []Name { + n := []Name{ + Pids, + } + return n +} diff --git a/systemd.go b/systemd.go index c5d4e308..ccfcd40c 100644 --- a/systemd.go +++ b/systemd.go @@ -38,11 +38,15 @@ var ( ) func Systemd() ([]Subsystem, error) { + if isUnifiedMode { + return nil, ErrV1NotSupported + } root, err := v1MountPoint() if err != nil { return nil, err } - defaultSubsystems, err := defaults(root) + unifiedMode := false + defaultSubsystems, err := defaults(root, unifiedMode) if err != nil { return nil, err } diff --git a/utils.go b/utils.go index 8a97d04d..44245763 100644 --- a/utils.go +++ b/utils.go @@ -25,13 +25,23 @@ import ( "path/filepath" "strconv" "strings" + "syscall" "time" units "github.com/docker/go-units" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" ) -var isUserNS = runningInUserNS() +const ( + unifiedMountpoint = "/sys/fs/cgroup" +) + +// for ease of mocking, functions should avoid using these variables whenever possible. +var ( + isUserNS = runningInUserNS() + isUnifiedMode = RunningWithUnifiedMode() +) // runningInUserNS detects whether we are currently running in a user namespace. // Copied from github.com/lxc/lxd/shared/util.go @@ -62,8 +72,24 @@ func runningInUserNS() bool { return true } +// RunningWithUnifiedMode detects whether we are currently running with unified mode. +// Copied from https://github.com/opencontainers/runc/blob/c4d8e1688c816a8cef632a3b44a38611511b7140/libcontainer/cgroups/utils.go#L41 +func RunningWithUnifiedMode() bool { + var st syscall.Statfs_t + if err := syscall.Statfs(unifiedMountpoint, &st); err != nil { + panic("cannot statfs cgroup root") + } + return st.Type == unix.CGROUP2_SUPER_MAGIC +} + // defaults returns all known groups -func defaults(root string) ([]Subsystem, error) { +func defaults(root string, unifiedMode bool) ([]Subsystem, error) { + if unifiedMode { + s := []Subsystem{ + NewPidsV2(root), + } + return s, nil + } h, err := NewHugetlb(root) if err != nil && !os.IsNotExist(err) { return nil, err @@ -112,6 +138,7 @@ func remove(path string) error { } // readPids will read all the pids of processes in a cgroup by the provided path +// Supports both v1 and v2. func readPids(path string, subsystem Name) ([]Process, error) { f, err := os.Open(filepath.Join(path, cgroupProcs)) if err != nil { @@ -138,8 +165,12 @@ func readPids(path string, subsystem Name) ([]Process, error) { return out, nil } -// readTasksPids will read all the pids of tasks in a cgroup by the provided path +// readTasksPids will read all the pids of tasks in a cgroup by the provided path. +// Only for v1. func readTasksPids(path string, subsystem Name) ([]Task, error) { + if isUnifiedMode { + return nil, ErrV1NotSupported + } f, err := os.Open(filepath.Join(path, cgroupTasks)) if err != nil { return nil, err @@ -225,6 +256,8 @@ func parseKV(raw string) (string, uint64, error) { } } +// parseCgroupFile parses cgroup file. +// Only for v1. func parseCgroupFile(path string) (map[string]string, error) { f, err := os.Open(path) if err != nil { @@ -234,6 +267,8 @@ func parseCgroupFile(path string) (map[string]string, error) { return parseCgroupFromReader(f) } +// parseCgroupFileFromReader parses cgroup file. +// Only for v1. func parseCgroupFromReader(r io.Reader) (map[string]string, error) { var ( cgroups = make(map[string]string) @@ -259,7 +294,48 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) { return cgroups, nil } +// parseCgroupFileV2 parses cgroup file. +// Only for v2. +func parseCgroupFileV2(path string) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", err + } + defer f.Close() + return parseCgroupFromReaderV2(f) +} + +// parseCgroupFileFromReaderV2 parses cgroup file. +// Only for v2. +func parseCgroupFromReaderV2(r io.Reader) (string, error) { + var ( + s = bufio.NewScanner(r) + ) + for s.Scan() { + if err := s.Err(); err != nil { + return "", err + } + var ( + text = s.Text() + parts = strings.SplitN(text, ":", 3) + ) + if len(parts) < 3 { + return "", fmt.Errorf("invalid cgroup entry: %q", text) + } + for _, subs := range strings.Split(parts[1], ",") { + if subs == "" { + return parts[2], nil + } + } + } + return "", fmt.Errorf("cgroup path not found") +} + +// getCgroupDestination is only for v1 func getCgroupDestination(subsystem string) (string, error) { + if isUnifiedMode { + return "", ErrV1NotSupported + } f, err := os.Open("/proc/self/mountinfo") if err != nil { return "", err diff --git a/v1.go b/v1.go index a076d469..32f2fda6 100644 --- a/v1.go +++ b/v1.go @@ -25,14 +25,17 @@ import ( ) // V1 returns all the groups in the default cgroups mountpoint in a single hierarchy -func V1() ([]Subsystem, error) { +func V1() ([]Subsystem, bool, error) { + if isUnifiedMode { + return nil, false, ErrV1NotSupported + } root, err := v1MountPoint() if err != nil { - return nil, err + return nil, isUnifiedMode, err } - subsystems, err := defaults(root) + subsystems, err := defaults(root, false) if err != nil { - return nil, err + return nil, isUnifiedMode, err } var enabled []Subsystem for _, s := range pathers(subsystems) { @@ -41,12 +44,15 @@ func V1() ([]Subsystem, error) { enabled = append(enabled, s) } } - return enabled, nil + return enabled, isUnifiedMode, nil } // v1MountPoint returns the mount point where the cgroup // mountpoints are mounted in a single hiearchy func v1MountPoint() (string, error) { + if isUnifiedMode { + return "", ErrV1NotSupported + } f, err := os.Open("/proc/self/mountinfo") if err != nil { return "", err diff --git a/v2.go b/v2.go new file mode 100644 index 00000000..c5a279f1 --- /dev/null +++ b/v2.go @@ -0,0 +1,31 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +// V2 returns all the groups in the default cgroups mountpoint in a single hierarchy +func V2() ([]Subsystem, bool, error) { + if !isUnifiedMode { + return nil, false, ErrV2NotSupported + } + subsystems, err := defaults(unifiedMountpoint, isUnifiedMode) + if err != nil { + return nil, isUnifiedMode, err + } + enabled := subsystems + // TODO: check and remove the default groups that do not exist + return enabled, isUnifiedMode, nil +}