diff --git a/Protobuild.toml b/Protobuild.toml index 7c324b3f..516dcc15 100644 --- a/Protobuild.toml +++ b/Protobuild.toml @@ -37,3 +37,10 @@ ignore_files = [ "google/protobuf/descriptor.proto", "gogoproto/gogo.proto" ] +[[descriptors]] +prefix = "github.com/containerd/cgroups/v2/stats" +target = "v2/stats/metrics.pb.txt" +ignore_files = [ + "google/protobuf/descriptor.proto", + "gogoproto/gogo.proto" +] diff --git a/cmd/cgroups-playground/main.go b/cmd/cgroups-playground/main.go new file mode 100644 index 00000000..68bc98c0 --- /dev/null +++ b/cmd/cgroups-playground/main.go @@ -0,0 +1,58 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package main + +import ( + "os" + + "github.com/containerd/cgroups/v2" + "github.com/sirupsen/logrus" +) + +func main() { + if err := xmain(); err != nil { + logrus.Fatalf("%+v", err) + } +} + +func xmain() error { + pid := os.Getpid() + g, err := v2.PidGroupPath(pid) + if err != nil { + return err + } + unifiedMountpoint := "/sys/fs/cgroup" + logrus.Infof("Loading V2 for %q (PID %d), mountpoint=%q", g, pid, unifiedMountpoint) + cg, err := v2.Load(unifiedMountpoint, g) + if err != nil { + return err + } + processes, err := cg.Processes(true) + if err != nil { + return err + } + logrus.Infof("Has %d processes (recursively)", len(processes)) + for i, s := range processes { + logrus.Infof("Process %d: %d", i, s.Pid) + } + subsystems := cg.Subsystems() + logrus.Infof("Has %d subsystems", len(subsystems)) + for i, s := range subsystems { + logrus.Infof("Subsystem %d: %q", i, s.Name()) + } + return nil +} diff --git a/go.mod b/go.mod index 06d139e9..3c32a296 100644 --- a/go.mod +++ b/go.mod @@ -9,5 +9,6 @@ require ( github.com/gogo/protobuf v1.2.1 github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700 github.com/pkg/errors v0.8.1 + github.com/sirupsen/logrus v1.4.2 golang.org/x/sys v0.0.0-20190514135907-3a4b5fb9f71f ) diff --git a/go.sum b/go.sum index 932ef459..c655b398 100644 --- a/go.sum +++ b/go.sum @@ -2,6 +2,7 @@ github.com/coreos/go-systemd v0.0.0-20181030182848-ad9ff7f9a9ff h1:bI9r9ZUi2/EmS github.com/coreos/go-systemd v0.0.0-20181030182848-ad9ff7f9a9ff/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e h1:Wf6HqHfScWJN9/ZjdUKyjop4mf3Qdd+1TvvltAvM3m8= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/godbus/dbus v0.0.0-20190422162347-ade71ed3457e h1:BWhy2j3IXJhjCbC68FptL43tDKIq8FladmaTs3Xs7Z8= @@ -12,10 +13,17 @@ github.com/gogo/protobuf v1.2.1 h1:/s5zKNz0uPFCZ5hddgPdo2TK2TVrUNMn0OOX8/aZMTE= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700 h1:eNUVfm/RFLIi1G7flU5/ZRTHvd4kcVuzfRnL6OFlzCI= github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4= +github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190514135907-3a4b5fb9f71f h1:Xab8gg26GrI/x3RNdVhVkHHM1XLyGeRBEvz4Q5x4YW8= golang.org/x/sys v0.0.0-20190514135907-3a4b5fb9f71f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/v2/cgroup.go b/v2/cgroup.go new file mode 100644 index 00000000..75bab4d9 --- /dev/null +++ b/v2/cgroup.go @@ -0,0 +1,362 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + + statsv2 "github.com/containerd/cgroups/v2/stats" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" +) + +// New returns a new control via the cgroup cgroups interface. +// +// unifiedMountpoint should be either "/sys/fs/cgroup" (pure v2) or +// "/sys/fs/cgroup/unified" (hybrid). +func New(unifiedMountpoint string, g GroupPath, resources *specs.LinuxResources, opts ...InitOpts) (Cgroup, error) { + if err := VerifyGroupPath(g); err != nil { + return nil, err + } + config := &initConfig{} + for _, o := range opts { + if err := o(config); err != nil { + return nil, err + } + } + subsystems, errs := defaults(unifiedMountpoint) + if len(subsystems) == 0 { + return nil, errors.Errorf("cannot detect any subsystem under %q: %+v", unifiedMountpoint, errs) + } + for _, s := range subsystems { + if c, ok := s.(Creator); ok { + if err := c.Create(g, resources); err != nil { + return nil, err + } + } + } + return &cgroup{ + g: g, + unifiedMountpoint: unifiedMountpoint, + subsystems: subsystems, + }, nil +} + +// Load will load an existing cgroup and allow it to be controlled +func Load(unifiedMountpoint string, g GroupPath, opts ...InitOpts) (Cgroup, error) { + if err := VerifyGroupPath(g); err != nil { + return nil, err + } + config := &initConfig{} + for _, o := range opts { + if err := o(config); err != nil { + return nil, err + } + } + subsystems, _ := defaults(unifiedMountpoint) + if len(subsystems) == 0 { + return nil, ErrCgroupDeleted + } + + return &cgroup{ + g: g, + unifiedMountpoint: unifiedMountpoint, + subsystems: subsystems, + }, nil +} + +type cgroup struct { + g GroupPath + unifiedMountpoint string + + subsystems []Subsystem + mu sync.Mutex + err error +} + +// New returns a new sub cgroup +func (c *cgroup) GroupPath() GroupPath { + return c.g +} + +// New returns a new sub cgroup +func (c *cgroup) New(name string, resources *specs.LinuxResources) (Cgroup, error) { + if strings.HasPrefix(name, "/") { + return nil, errors.New("name must be relative") + } + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return nil, c.err + } + g := GroupPath(filepath.Join(string(c.g), name)) + var subsystems []Subsystem + for _, s := range c.subsystems { + if ok, _ := s.Available(g); ok { + subsystems = append(subsystems, s) + if c, ok := s.(Creator); ok { + if err := c.Create(g, resources); err != nil { + return nil, err + } + } + } + } + + return &cgroup{ + g: g, + unifiedMountpoint: c.unifiedMountpoint, + subsystems: subsystems, + }, nil +} + +// Subsystems returns all the subsystems that are currently being +// consumed by the group +func (c *cgroup) Subsystems() []Subsystem { + return c.subsystems +} + +// Add moves the provided process into the new cgroup +func (c *cgroup) Add(process Process) error { + if process.Pid <= 0 { + return ErrInvalidPid + } + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + return c.add(process) +} + +func (c *cgroup) add(process Process) error { + if err := ioutil.WriteFile( + filepath.Join(c.unifiedMountpoint, string(c.g), cgroupProcs), + []byte(strconv.Itoa(process.Pid)), + defaultFilePerm, + ); err != nil { + return err + } + return nil +} + +// Delete will remove the control group from each of the subsystems registered +func (c *cgroup) Delete() error { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + var errors []string + for _, s := range c.subsystems { + if d, ok := s.(Deleter); ok { + if err := d.Delete(c.g); err != nil { + errors = append(errors, err.Error()) + } + continue + } + } + path := filepath.Join(c.unifiedMountpoint, string(c.g)) + if err := remove(path); err != nil { + errors = append(errors, err.Error()) + } + if len(errors) > 0 { + return fmt.Errorf("cgroups: unable to remove %q: %s", path, strings.Join(errors, ", ")) + } + c.err = ErrCgroupDeleted + return nil +} + +// Stat returns the current metrics for the cgroup +func (c *cgroup) Stat(handlers ...ErrorHandler) (*statsv2.Metrics, error) { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return nil, c.err + } + if len(handlers) == 0 { + handlers = append(handlers, errPassthrough) + } + var ( + stats = &statsv2.Metrics{} + wg = &sync.WaitGroup{} + errs = make(chan error, len(c.subsystems)) + ) + for _, s := range c.subsystems { + if ss, ok := s.(Stater); ok { + wg.Add(1) + go func() { + defer wg.Done() + if err := ss.Stat(c.g, stats); err != nil { + for _, eh := range handlers { + if herr := eh(err); herr != nil { + errs <- herr + } + } + } + }() + } + } + wg.Wait() + close(errs) + for err := range errs { + return nil, err + } + return stats, nil +} + +// Update updates the cgroup with the new resource values provided +// +// Be prepared to handle EBUSY when trying to update a cgroup with +// live processes and other operations like Stats being performed at the +// same time +func (c *cgroup) Update(resources *specs.LinuxResources) error { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + for _, s := range c.subsystems { + if u, ok := s.(Updater); ok { + if err := u.Update(c.g, resources); err != nil { + return err + } + } + } + return nil +} + +// Processes returns the processes running inside the cgroup along +// with the pid +func (c *cgroup) Processes(recursive bool) ([]Process, error) { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return nil, c.err + } + return c.processes(recursive) +} + +func (c *cgroup) processes(recursive bool) ([]Process, error) { + path := filepath.Join(c.unifiedMountpoint, string(c.g)) + var processes []Process + err := filepath.Walk(path, func(p string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !recursive && info.IsDir() { + if p == path { + return nil + } + return filepath.SkipDir + } + _, name := filepath.Split(p) + if name != cgroupProcs { + return nil + } + procs, err := parseCgroupProcsFile(p) + if err != nil { + return err + } + processes = append(processes, procs...) + return nil + }) + return processes, err +} + +// Freeze freezes the entire cgroup and all the processes inside it +func (c *cgroup) Freeze() error { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + s := c.getSubsystem(Freezer) + if s == nil { + return ErrFreezerNotSupported + } + return s.(*freezerController).Freeze(c.g) +} + +// Thaw thaws out the cgroup and all the processes inside it +func (c *cgroup) Thaw() error { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + s := c.getSubsystem(Freezer) + if s == nil { + return ErrFreezerNotSupported + } + return s.(*freezerController).Thaw(c.g) +} + +// State returns the state of the cgroup and its processes +func (c *cgroup) State() State { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil && c.err == ErrCgroupDeleted { + return Deleted + } + s := c.getSubsystem(Freezer) + if s == nil { + return Thawed + } + state, err := s.(*freezerController).state(c.g) + if err != nil { + return Unknown + } + return state +} + +// MoveTo does a recursive move subsystem by subsystem of all the processes +// inside the group +func (c *cgroup) MoveTo(destination Cgroup) error { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + processes, err := c.processes(true) + if err != nil { + return err + } + for _, p := range processes { + if err := destination.Add(p); err != nil { + if strings.Contains(err.Error(), "no such process") { + continue + } + return err + } + } + return nil +} + +func (c *cgroup) getSubsystem(n Name) Subsystem { + for _, s := range c.subsystems { + if s.Name() == n { + return s + } + } + return nil +} diff --git a/v2/control.go b/v2/control.go new file mode 100644 index 00000000..88e863d1 --- /dev/null +++ b/v2/control.go @@ -0,0 +1,70 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import ( + "os" + + statsv2 "github.com/containerd/cgroups/v2/stats" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +const ( + cgroupProcs = "cgroup.procs" + defaultDirPerm = 0755 +) + +// defaultFilePerm is a var so that the test framework can change the filemode +// of all files created when the tests are running. The difference between the +// tests and real world use is that files like "cgroup.procs" will exist when writing +// to a read cgroup filesystem and do not exist prior when running in the tests. +// this is set to a non 0 value in the test code +var defaultFilePerm = os.FileMode(0) + +type Process struct { + // Pid is the process id of the process + Pid int +} + +// Cgroup handles interactions with the individual groups to perform +// actions on them as them main interface to this cgroup package +type Cgroup interface { + GroupPath() GroupPath + // New creates a new cgroup under the calling cgroup + New(string, *specs.LinuxResources) (Cgroup, error) + // Add adds a process to the cgroup (cgroup.procs) + Add(Process) error + // Delete removes the cgroup as a whole + Delete() error + // MoveTo moves all the processes under the calling cgroup to the provided one + // subsystems are moved one at a time + MoveTo(Cgroup) error + // Stat returns the stats for all subsystems in the cgroup + Stat(...ErrorHandler) (*statsv2.Metrics, error) + // Update updates all the subsystems with the provided resource changes + Update(resources *specs.LinuxResources) error + // Processes returns all the processes in a select subsystem for the cgroup + Processes(bool) ([]Process, error) + // Freeze freezes or pauses all processes inside the cgroup + Freeze() error + // Thaw thaw or resumes all processes inside the cgroup + Thaw() error + // State returns the cgroups current state + State() State + // Subsystems returns all the subsystems in the cgroup + Subsystems() []Subsystem +} diff --git a/v2/errors.go b/v2/errors.go new file mode 100644 index 00000000..f56aafdd --- /dev/null +++ b/v2/errors.go @@ -0,0 +1,50 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import ( + "errors" + "os" +) + +var ( + ErrInvalidPid = errors.New("cgroups: pid must be greater than 0") + ErrMountPointNotExist = errors.New("cgroups: cgroup mountpoint does not exist") + ErrInvalidFormat = errors.New("cgroups: parsing file with invalid format failed") + ErrFreezerNotSupported = errors.New("cgroups: freezer cgroup (v2) not supported on this system") + ErrMemoryNotSupported = errors.New("cgroups: memory cgroup (v2) not supported on this system") + ErrPidsNotSupported = errors.New("cgroups: pids cgroup (v2) not supported on this system") + ErrCgroupDeleted = errors.New("cgroups: cgroup deleted") + ErrNoCgroupMountDestination = errors.New("cgroups: cannot find cgroup mount destination") + + ErrInvalidGroupPath = errors.New("cgroups: group path format must be compatible with /proc/PID/cgroup") +) + +// ErrorHandler is a function that handles and acts on errors +type ErrorHandler func(err error) error + +// IgnoreNotExist ignores any errors that are for not existing files +func IgnoreNotExist(err error) error { + if os.IsNotExist(err) { + return nil + } + return err +} + +func errPassthrough(err error) error { + return err +} diff --git a/v2/freezer.go b/v2/freezer.go new file mode 100644 index 00000000..13c81dc2 --- /dev/null +++ b/v2/freezer.go @@ -0,0 +1,112 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import ( + "io/ioutil" + "path/filepath" + "strings" + "time" + + "github.com/pkg/errors" +) + +func NewFreezer(unifiedMountpoint string) (*freezerController, error) { + f := &freezerController{ + unifiedMountpoint: unifiedMountpoint, + } + ok, err := f.Available("/") + if err != nil { + return nil, err + } + if !ok { + return nil, ErrFreezerNotSupported + } + return f, nil +} + +type freezerController struct { + unifiedMountpoint string +} + +func (f *freezerController) Name() Name { + return Freezer +} + +func (f *freezerController) path(g GroupPath) string { + return filepath.Join(f.unifiedMountpoint, string(g)) +} + +func (f *freezerController) Available(g GroupPath) (bool, error) { + return available(f.unifiedMountpoint, g, Freezer) +} + +func (f *freezerController) Freeze(g GroupPath) error { + return f.waitState(g, Frozen) +} + +func (f *freezerController) Thaw(g GroupPath) error { + return f.waitState(g, Thawed) +} + +func (f *freezerController) changeState(g GroupPath, state State) error { + desiredState := "" + switch state { + case Frozen: + desiredState = "1" + case Thawed: + desiredState = "0" + default: + return errors.Errorf("unknown state %q", state) + } + return ioutil.WriteFile( + filepath.Join(f.path(g), "cgroup.freeze"), + []byte(strings.ToUpper(string(desiredState))), + defaultFilePerm, + ) +} + +func (f *freezerController) state(g GroupPath) (State, error) { + current, err := ioutil.ReadFile(filepath.Join(f.path(g), "cgroup.freeze")) + if err != nil { + return "", err + } + switch strings.TrimSpace(string(current)) { + case "1": + return Frozen, nil + case "0": + return Thawed, nil + default: + return "", nil + } +} + +func (f *freezerController) waitState(g GroupPath, state State) error { + for { + if err := f.changeState(g, state); err != nil { + return err + } + current, err := f.state(g) + if err != nil { + return err + } + if current == state { + return nil + } + time.Sleep(1 * time.Millisecond) + } +} diff --git a/v2/opts.go b/v2/opts.go new file mode 100644 index 00000000..e00c4e84 --- /dev/null +++ b/v2/opts.go @@ -0,0 +1,25 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +// InitOpts allows configuration for the creation or loading of a cgroup +type InitOpts func(*initConfig) error + +// initConfig provides configuration options for the creation +// or loading of a cgroup and its subsystems +type initConfig struct { +} diff --git a/v2/paths.go b/v2/paths.go new file mode 100644 index 00000000..054f73e0 --- /dev/null +++ b/v2/paths.go @@ -0,0 +1,60 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import ( + "fmt" + "path/filepath" + "strings" +) + +// GroupPath is a string that appears as the third field in /proc/PID/cgroup. +// e.g. "/user.slice/user-1001.slice/session-1.scope" +// +// GroupPath must not contain "/sys/fs/cgroup" prefix. +// GroupPath must be a absolute path starts with "/". +type GroupPath string + +// NestedGroupPath will nest the cgroups based on the calling processes cgroup +// placing its child processes inside its own path +func NestedGroupPath(suffix string) (GroupPath, error) { + path, err := parseCgroupFile("/proc/self/cgroup") + if err != nil { + return "", err + } + return GroupPath(filepath.Join(string(path), suffix)), nil +} + +// PidGroupPath will return the correct cgroup paths for an existing process running inside a cgroup +// This is commonly used for the Load function to restore an existing container +func PidGroupPath(pid int) (GroupPath, error) { + p := fmt.Sprintf("/proc/%d/cgroup", pid) + return parseCgroupFile(p) +} + +// VerifyGroupPath verifies the format of g. +// VerifyGroupPath doesn't verify whether g actually exists on the system. +func VerifyGroupPath(g GroupPath) error { + s := string(g) + if !strings.HasPrefix(s, "/") { + return ErrInvalidGroupPath + } + if strings.HasPrefix(s, "/sys/fs/cgroup") { + return ErrInvalidGroupPath + } + return nil +} diff --git a/v2/paths_test.go b/v2/paths_test.go new file mode 100644 index 00000000..fbf2e59e --- /dev/null +++ b/v2/paths_test.go @@ -0,0 +1,44 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import ( + "testing" +) + +func TestVerifyGroupPath(t *testing.T) { + valids := map[string]bool{ + "/": true, + "": false, + "/foo": true, + "/foo/bar": true, + "/sys/fs/cgroup/foo": false, + "/sys/fs/cgroup/unified/foo": false, + } + for s, valid := range valids { + err := VerifyGroupPath(GroupPath(s)) + if valid { + if err != nil { + t.Error(err) + } + } else { + if err == nil { + t.Error("error is expected") + } + } + } +} diff --git a/v2/pids.go b/v2/pids.go new file mode 100644 index 00000000..0f748402 --- /dev/null +++ b/v2/pids.go @@ -0,0 +1,98 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import ( + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + + statsv2 "github.com/containerd/cgroups/v2/stats" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func NewPids(unifiedMountpoint string) (*pidsController, error) { + p := &pidsController{ + unifiedMountpoint: unifiedMountpoint, + } + ok, err := p.Available("/") + if err != nil { + return nil, err + } + if !ok { + return nil, ErrPidsNotSupported + } + return p, nil +} + +type pidsController struct { + unifiedMountpoint string +} + +func (p *pidsController) Name() Name { + return Pids +} + +func (p *pidsController) path(g GroupPath) string { + return filepath.Join(p.unifiedMountpoint, string(g)) +} + +func (p *pidsController) Available(g GroupPath) (bool, error) { + return available(p.unifiedMountpoint, g, Pids) +} + +func (p *pidsController) Create(g GroupPath, resources *specs.LinuxResources) error { + if err := os.MkdirAll(p.path(g), defaultDirPerm); err != nil { + return err + } + if resources.Pids != nil && resources.Pids.Limit > 0 { + return ioutil.WriteFile( + filepath.Join(p.path(g), "pids.max"), + []byte(strconv.FormatInt(resources.Pids.Limit, 10)), + defaultFilePerm, + ) + } + return nil +} + +func (p *pidsController) Update(g GroupPath, resources *specs.LinuxResources) error { + return p.Create(g, resources) +} + +func (p *pidsController) Stat(g GroupPath, stats *statsv2.Metrics) error { + current, err := readUint(filepath.Join(p.path(g), "pids.current")) + if err != nil { + return err + } + var max uint64 + maxData, err := ioutil.ReadFile(filepath.Join(p.path(g), "pids.max")) + if err != nil { + return err + } + if maxS := strings.TrimSpace(string(maxData)); maxS != "max" { + if max, err = parseUint(maxS, 10, 64); err != nil { + return err + } + } + stats.Pids = &statsv2.PidsStat{ + Current: current, + Limit: max, + } + return nil +} diff --git a/v2/state.go b/v2/state.go new file mode 100644 index 00000000..b7af3395 --- /dev/null +++ b/v2/state.go @@ -0,0 +1,27 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +// State is a type that represents the state of the current cgroup +type State string + +const ( + Unknown State = "" + Thawed State = "thawed" + Frozen State = "frozen" + Deleted State = "deleted" +) diff --git a/v2/stats/doc.go b/v2/stats/doc.go new file mode 100644 index 00000000..e51e12f8 --- /dev/null +++ b/v2/stats/doc.go @@ -0,0 +1,17 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package stats diff --git a/v2/stats/metrics.pb.go b/v2/stats/metrics.pb.go new file mode 100644 index 00000000..a4a6858d --- /dev/null +++ b/v2/stats/metrics.pb.go @@ -0,0 +1,571 @@ +// Code generated by protoc-gen-gogo. DO NOT EDIT. +// source: github.com/containerd/cgroups/v2/stats/metrics.proto + +package stats + +import ( + fmt "fmt" + proto "github.com/gogo/protobuf/proto" + io "io" + math "math" + reflect "reflect" + strings "strings" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package + +type Metrics struct { + Pids *PidsStat `protobuf:"bytes,1,opt,name=pids,proto3" json:"pids,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *Metrics) Reset() { *m = Metrics{} } +func (*Metrics) ProtoMessage() {} +func (*Metrics) Descriptor() ([]byte, []int) { + return fileDescriptor_2fc6005842049e6b, []int{0} +} +func (m *Metrics) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *Metrics) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_Metrics.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalTo(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *Metrics) XXX_Merge(src proto.Message) { + xxx_messageInfo_Metrics.Merge(m, src) +} +func (m *Metrics) XXX_Size() int { + return m.Size() +} +func (m *Metrics) XXX_DiscardUnknown() { + xxx_messageInfo_Metrics.DiscardUnknown(m) +} + +var xxx_messageInfo_Metrics proto.InternalMessageInfo + +type PidsStat struct { + Current uint64 `protobuf:"varint,1,opt,name=current,proto3" json:"current,omitempty"` + Limit uint64 `protobuf:"varint,2,opt,name=limit,proto3" json:"limit,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PidsStat) Reset() { *m = PidsStat{} } +func (*PidsStat) ProtoMessage() {} +func (*PidsStat) Descriptor() ([]byte, []int) { + return fileDescriptor_2fc6005842049e6b, []int{1} +} +func (m *PidsStat) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *PidsStat) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_PidsStat.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalTo(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *PidsStat) XXX_Merge(src proto.Message) { + xxx_messageInfo_PidsStat.Merge(m, src) +} +func (m *PidsStat) XXX_Size() int { + return m.Size() +} +func (m *PidsStat) XXX_DiscardUnknown() { + xxx_messageInfo_PidsStat.DiscardUnknown(m) +} + +var xxx_messageInfo_PidsStat proto.InternalMessageInfo + +func init() { + proto.RegisterType((*Metrics)(nil), "io.containerd.cgroups.v2.Metrics") + proto.RegisterType((*PidsStat)(nil), "io.containerd.cgroups.v2.PidsStat") +} + +func init() { + proto.RegisterFile("github.com/containerd/cgroups/v2/stats/metrics.proto", fileDescriptor_2fc6005842049e6b) +} + +var fileDescriptor_2fc6005842049e6b = []byte{ + // 199 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0x32, 0x49, 0xcf, 0x2c, 0xc9, + 0x28, 0x4d, 0xd2, 0x4b, 0xce, 0xcf, 0xd5, 0x4f, 0xce, 0xcf, 0x2b, 0x49, 0xcc, 0xcc, 0x4b, 0x2d, + 0x4a, 0xd1, 0x4f, 0x4e, 0x2f, 0xca, 0x2f, 0x2d, 0x28, 0xd6, 0x2f, 0x33, 0xd2, 0x2f, 0x2e, 0x49, + 0x2c, 0x29, 0xd6, 0xcf, 0x4d, 0x2d, 0x29, 0xca, 0x4c, 0x2e, 0xd6, 0x2b, 0x28, 0xca, 0x2f, 0xc9, + 0x17, 0x92, 0xc8, 0xcc, 0xd7, 0x43, 0xa8, 0xd6, 0x83, 0xaa, 0xd6, 0x2b, 0x33, 0x52, 0x72, 0xe4, + 0x62, 0xf7, 0x85, 0x28, 0x15, 0x32, 0xe3, 0x62, 0x29, 0xc8, 0x4c, 0x29, 0x96, 0x60, 0x54, 0x60, + 0xd4, 0xe0, 0x36, 0x52, 0xd2, 0xc3, 0xa5, 0x47, 0x2f, 0x20, 0x33, 0xa5, 0x38, 0xb8, 0x24, 0xb1, + 0x24, 0x08, 0xac, 0x5e, 0xc9, 0x8a, 0x8b, 0x03, 0x26, 0x22, 0x24, 0xc1, 0xc5, 0x9e, 0x5c, 0x5a, + 0x54, 0x94, 0x9a, 0x57, 0x02, 0x36, 0x86, 0x25, 0x08, 0xc6, 0x15, 0x12, 0xe1, 0x62, 0xcd, 0xc9, + 0xcc, 0xcd, 0x2c, 0x91, 0x60, 0x02, 0x8b, 0x43, 0x38, 0x4e, 0x12, 0x27, 0x1e, 0xca, 0x31, 0xdc, + 0x78, 0x28, 0xc7, 0xd0, 0xf0, 0x48, 0x8e, 0xf1, 0xc4, 0x23, 0x39, 0xc6, 0x0b, 0x8f, 0xe4, 0x18, + 0x1f, 0x3c, 0x92, 0x63, 0x4c, 0x62, 0x03, 0xbb, 0xdc, 0x18, 0x10, 0x00, 0x00, 0xff, 0xff, 0xe2, + 0xc2, 0x63, 0xc7, 0xf1, 0x00, 0x00, 0x00, +} + +func (m *Metrics) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *Metrics) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.Pids != nil { + dAtA[i] = 0xa + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Pids.Size())) + n1, err := m.Pids.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n1 + } + if m.XXX_unrecognized != nil { + i += copy(dAtA[i:], m.XXX_unrecognized) + } + return i, nil +} + +func (m *PidsStat) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *PidsStat) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.Current != 0 { + dAtA[i] = 0x8 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Current)) + } + if m.Limit != 0 { + dAtA[i] = 0x10 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Limit)) + } + if m.XXX_unrecognized != nil { + i += copy(dAtA[i:], m.XXX_unrecognized) + } + return i, nil +} + +func encodeVarintMetrics(dAtA []byte, offset int, v uint64) int { + for v >= 1<<7 { + dAtA[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + dAtA[offset] = uint8(v) + return offset + 1 +} +func (m *Metrics) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.Pids != nil { + l = m.Pids.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *PidsStat) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.Current != 0 { + n += 1 + sovMetrics(uint64(m.Current)) + } + if m.Limit != 0 { + n += 1 + sovMetrics(uint64(m.Limit)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func sovMetrics(x uint64) (n int) { + for { + n++ + x >>= 7 + if x == 0 { + break + } + } + return n +} +func sozMetrics(x uint64) (n int) { + return sovMetrics(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func (this *Metrics) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&Metrics{`, + `Pids:` + strings.Replace(fmt.Sprintf("%v", this.Pids), "PidsStat", "PidsStat", 1) + `,`, + `XXX_unrecognized:` + fmt.Sprintf("%v", this.XXX_unrecognized) + `,`, + `}`, + }, "") + return s +} +func (this *PidsStat) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&PidsStat{`, + `Current:` + fmt.Sprintf("%v", this.Current) + `,`, + `Limit:` + fmt.Sprintf("%v", this.Limit) + `,`, + `XXX_unrecognized:` + fmt.Sprintf("%v", this.XXX_unrecognized) + `,`, + `}`, + }, "") + return s +} +func valueToStringMetrics(v interface{}) string { + rv := reflect.ValueOf(v) + if rv.IsNil() { + return "nil" + } + pv := reflect.Indirect(rv).Interface() + return fmt.Sprintf("*%v", pv) +} +func (m *Metrics) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: Metrics: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: Metrics: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Pids", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Pids == nil { + m.Pids = &PidsStat{} + } + if err := m.Pids.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *PidsStat) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: PidsStat: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: PidsStat: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Current", wireType) + } + m.Current = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Current |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Limit", wireType) + } + m.Limit = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Limit |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func skipMetrics(dAtA []byte) (n int, err error) { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowMetrics + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowMetrics + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if dAtA[iNdEx-1] < 0x80 { + break + } + } + return iNdEx, nil + case 1: + iNdEx += 8 + return iNdEx, nil + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowMetrics + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if length < 0 { + return 0, ErrInvalidLengthMetrics + } + iNdEx += length + if iNdEx < 0 { + return 0, ErrInvalidLengthMetrics + } + return iNdEx, nil + case 3: + for { + var innerWire uint64 + var start int = iNdEx + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowMetrics + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + innerWire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + innerWireType := int(innerWire & 0x7) + if innerWireType == 4 { + break + } + next, err := skipMetrics(dAtA[start:]) + if err != nil { + return 0, err + } + iNdEx = start + next + if iNdEx < 0 { + return 0, ErrInvalidLengthMetrics + } + } + return iNdEx, nil + case 4: + return iNdEx, nil + case 5: + iNdEx += 4 + return iNdEx, nil + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + } + panic("unreachable") +} + +var ( + ErrInvalidLengthMetrics = fmt.Errorf("proto: negative length found during unmarshaling") + ErrIntOverflowMetrics = fmt.Errorf("proto: integer overflow") +) diff --git a/v2/stats/metrics.pb.txt b/v2/stats/metrics.pb.txt new file mode 100755 index 00000000..026b1808 --- /dev/null +++ b/v2/stats/metrics.pb.txt @@ -0,0 +1,33 @@ +file { + name: "github.com/containerd/cgroups/v2/stats/metrics.proto" + package: "io.containerd.cgroups.v2" + message_type { + name: "Metrics" + field { + name: "pids" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".io.containerd.cgroups.v2.PidsStat" + json_name: "pids" + } + } + message_type { + name: "PidsStat" + field { + name: "current" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "current" + } + field { + name: "limit" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "limit" + } + } + syntax: "proto3" +} diff --git a/v2/stats/metrics.proto b/v2/stats/metrics.proto new file mode 100644 index 00000000..3e1cede6 --- /dev/null +++ b/v2/stats/metrics.proto @@ -0,0 +1,14 @@ +syntax = "proto3"; + +package io.containerd.cgroups.v2; + +// import "gogoproto/gogo.proto"; + +message Metrics { + PidsStat pids = 1; +} + +message PidsStat { + uint64 current = 1; + uint64 limit = 2; +} diff --git a/v2/subsystem.go b/v2/subsystem.go new file mode 100644 index 00000000..ed3c21ab --- /dev/null +++ b/v2/subsystem.go @@ -0,0 +1,111 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import ( + "io/ioutil" + "path/filepath" + "strings" + + statsv2 "github.com/containerd/cgroups/v2/stats" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +// Name is a typed name for a cgroup subsystem. +// Corresponds to cgroup.controllers format. +type Name string + +const ( + // Devices is a pseudo-controller, implemented since kernel 4.15 + Devices Name = "devices" + // Hugetlb is not implemented in upstream kernel (patch available) + // Hugetlb Name = "hugetlb" + // Freezer is a pseudo-controller, implemented since kernel 5.2 + Freezer Name = "freezer" + // Pids is implemented since kernel 4.5 + Pids Name = "pids" + // PerfEvent is implemented since kernel 4.11 + PerfEvent Name = "perf_event" + // Cpuset is implemented since kernel 5.0 + Cpuset Name = "cpuset" + // Cpu is implemented since kernel 4.15 + Cpu Name = "cpu" + // Memory is implemented since kernel 4.5 + Memory Name = "memory" + // Io is implemented since kernel 4.5 + Io Name = "io" + // Rdma is implemented since kernel 4.11 + Rdma Name = "rdma" +) + +// Subsystems returns available subsystems +func Subsystems(unifiedMountpoint string, g GroupPath) ([]Name, error) { + if err := VerifyGroupPath(g); err != nil { + return nil, err + } + path := filepath.Join(unifiedMountpoint, string(g), "cgroup.controllers") + b, err := ioutil.ReadFile(path) + if err != nil { + return nil, err + } + subsystems := []Name{ + Devices, + Freezer, + } + for _, s := range strings.Fields(string(b)) { + subsystems = append(subsystems, Name(s)) + } + return subsystems, nil +} + +func available(unifiedMountpoint string, g GroupPath, name Name) (bool, error) { + names, err := Subsystems(unifiedMountpoint, g) + if err != nil { + return false, err + } + for _, n := range names { + if n == name { + return true, nil + } + } + return false, nil +} + +type Subsystem interface { + Name() Name + Available(g GroupPath) (bool, error) +} + +type Creator interface { + Subsystem + Create(g GroupPath, resources *specs.LinuxResources) error +} + +type Deleter interface { + Subsystem + Delete(g GroupPath) error +} + +type Stater interface { + Subsystem + Stat(g GroupPath, stats *statsv2.Metrics) error +} + +type Updater interface { + Subsystem + Update(g GroupPath, resources *specs.LinuxResources) error +} diff --git a/v2/utils.go b/v2/utils.go new file mode 100644 index 00000000..152b3eac --- /dev/null +++ b/v2/utils.go @@ -0,0 +1,148 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import ( + "bufio" + "fmt" + "io" + "io/ioutil" + "os" + "strconv" + "strings" + "time" + + "github.com/pkg/errors" +) + +func defaults(unifiedMountpoint string) ([]Subsystem, map[Name]error) { + var subsystems []Subsystem + unavailables := make(map[Name]error, 0) + if x, err := NewPids(unifiedMountpoint); err != nil { + unavailables[Pids] = err + } else { + subsystems = append(subsystems, x) + } + + if x, err := NewFreezer(unifiedMountpoint); err != nil { + unavailables[Freezer] = err + } else { + subsystems = append(subsystems, x) + } + return subsystems, unavailables +} + +// remove will remove a cgroup path handling EAGAIN and EBUSY errors and +// retrying the remove after a exp timeout +func remove(path string) error { + var err error + delay := 10 * time.Millisecond + for i := 0; i < 5; i++ { + if i != 0 { + time.Sleep(delay) + delay *= 2 + } + if err = os.RemoveAll(path); err == nil { + return nil + } + } + return errors.Wrapf(err, "cgroups: unable to remove path %q", path) +} + +// parseCgroupProcsFile parses /sys/fs/cgroup/$GROUPPATH/cgroup.procs +func parseCgroupProcsFile(path string) ([]Process, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + var ( + out []Process + s = bufio.NewScanner(f) + ) + for s.Scan() { + if t := s.Text(); t != "" { + pid, err := strconv.Atoi(t) + if err != nil { + return nil, err + } + out = append(out, Process{ + Pid: pid, + }) + } + } + return out, nil +} + +func readUint(path string) (uint64, error) { + v, err := ioutil.ReadFile(path) + if err != nil { + return 0, err + } + return parseUint(strings.TrimSpace(string(v)), 10, 64) +} + +func parseUint(s string, base, bitSize int) (uint64, error) { + v, err := strconv.ParseUint(s, base, bitSize) + if err != nil { + intValue, intErr := strconv.ParseInt(s, base, bitSize) + // 1. Handle negative values greater than MinInt64 (and) + // 2. Handle negative values lesser than MinInt64 + if intErr == nil && intValue < 0 { + return 0, nil + } else if intErr != nil && + intErr.(*strconv.NumError).Err == strconv.ErrRange && + intValue < 0 { + return 0, nil + } + return 0, err + } + return v, nil +} + +// parseCgroupFile parses /proc/PID/cgroup file and return GroupPath +func parseCgroupFile(path string) (GroupPath, error) { + f, err := os.Open(path) + if err != nil { + return "", err + } + defer f.Close() + return parseCgroupFromReader(f) +} + +func parseCgroupFromReader(r io.Reader) (GroupPath, error) { + var ( + s = bufio.NewScanner(r) + ) + for s.Scan() { + if err := s.Err(); err != nil { + return "", err + } + var ( + text = s.Text() + parts = strings.SplitN(text, ":", 3) + ) + if len(parts) < 3 { + return "", fmt.Errorf("invalid cgroup entry: %q", text) + } + // text is like "0::/user.slice/user-1001.slice/session-1.scope" + if parts[0] == "0" && parts[1] == "" { + return GroupPath(parts[2]), nil + } + } + return "", fmt.Errorf("cgroup path not found") +} diff --git a/v2/utils_test.go b/v2/utils_test.go new file mode 100644 index 00000000..2a12ee74 --- /dev/null +++ b/v2/utils_test.go @@ -0,0 +1,45 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import ( + "strings" + "testing" +) + +func TestParseCgroupFromReader(t *testing.T) { + cases := map[string]string{ + "0::/user.slice/user-1001.slice/session-1.scope\n": "/user.slice/user-1001.slice/session-1.scope", + "2:cpuset:/foo\n1:name=systemd:/\n": "", + "2:cpuset:/foo\n1:name=systemd:/\n0::/user.slice/user-1001.slice/session-1.scope\n": "/user.slice/user-1001.slice/session-1.scope", + } + for s, expected := range cases { + g, err := parseCgroupFromReader(strings.NewReader(s)) + if expected != "" { + if string(g) != expected { + t.Errorf("expected %q, got %q", expected, string(g)) + } + if err != nil { + t.Error(err) + } + } else { + if err == nil { + t.Error("error is expected") + } + } + } +}