diff --git a/libcontainer/cgroups/systemd/common.go b/libcontainer/cgroups/systemd/common.go index 6d5def712..268b713c0 100644 --- a/libcontainer/cgroups/systemd/common.go +++ b/libcontainer/cgroups/systemd/common.go @@ -28,10 +28,6 @@ const ( ) var ( - connOnce sync.Once - connDbus *systemdDbus.Conn - connErr error - versionOnce sync.Once version int @@ -291,19 +287,6 @@ func generateDeviceProperties(rules []*devices.Rule) ([]systemdDbus.Property, er return properties, nil } -// getDbusConnection lazy initializes systemd dbus connection -// and returns it -func getDbusConnection(rootless bool) (*systemdDbus.Conn, error) { - connOnce.Do(func() { - if rootless { - connDbus, connErr = NewUserSystemdDbus() - } else { - connDbus, connErr = systemdDbus.New() - } - }) - return connDbus, connErr -} - func newProp(name string, units interface{}) systemdDbus.Property { return systemdDbus.Property{ Name: name, @@ -319,19 +302,29 @@ func getUnitName(c *configs.Cgroup) string { return c.Name } -// isUnitExists returns true if the error is that a systemd unit already exists. -func isUnitExists(err error) bool { +// isDbusError returns true if the error is a specific dbus error. +func isDbusError(err error, name string) bool { if err != nil { - if dbusError, ok := err.(dbus.Error); ok { - return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists") + var derr *dbus.Error + if errors.As(err, &derr) { + return strings.Contains(derr.Name, name) } } return false } -func startUnit(dbusConnection *systemdDbus.Conn, unitName string, properties []systemdDbus.Property) error { +// isUnitExists returns true if the error is that a systemd unit already exists. +func isUnitExists(err error) bool { + return isDbusError(err, "org.freedesktop.systemd1.UnitExists") +} + +func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Property) error { statusChan := make(chan string, 1) - if _, err := dbusConnection.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil { + err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { + _, err := c.StartTransientUnit(unitName, "replace", properties, statusChan) + return err + }) + if err == nil { timeout := time.NewTimer(30 * time.Second) defer timeout.Stop() @@ -340,11 +333,11 @@ func startUnit(dbusConnection *systemdDbus.Conn, unitName string, properties []s close(statusChan) // Please refer to https://godoc.org/github.com/coreos/go-systemd/dbus#Conn.StartUnit if s != "done" { - dbusConnection.ResetFailedUnit(unitName) + resetFailedUnit(cm, unitName) return errors.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s) } case <-timeout.C: - dbusConnection.ResetFailedUnit(unitName) + resetFailedUnit(cm, unitName) return errors.New("Timeout waiting for systemd to create " + unitName) } } else if !isUnitExists(err) { @@ -354,9 +347,13 @@ func startUnit(dbusConnection *systemdDbus.Conn, unitName string, properties []s return nil } -func stopUnit(dbusConnection *systemdDbus.Conn, unitName string) error { +func stopUnit(cm *dbusConnManager, unitName string) error { statusChan := make(chan string, 1) - if _, err := dbusConnection.StopUnit(unitName, "replace", statusChan); err == nil { + err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { + _, err := c.StopUnit(unitName, "replace", statusChan) + return err + }) + if err == nil { select { case s := <-statusChan: close(statusChan) @@ -371,10 +368,30 @@ func stopUnit(dbusConnection *systemdDbus.Conn, unitName string) error { return nil } -func systemdVersion(conn *systemdDbus.Conn) int { +func resetFailedUnit(cm *dbusConnManager, name string) { + err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { + return c.ResetFailedUnit(name) + }) + if err != nil { + logrus.Warnf("unable to reset failed unit: %v", err) + } +} + +func setUnitProperties(cm *dbusConnManager, name string, properties ...systemdDbus.Property) error { + return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { + return c.SetUnitProperties(name, true, properties...) + }) +} + +func systemdVersion(cm *dbusConnManager) int { versionOnce.Do(func() { version = -1 - verStr, err := conn.GetManagerProperty("Version") + var verStr string + err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { + var err error + verStr, err = c.GetManagerProperty("Version") + return err + }) if err == nil { version, err = systemdVersionAtoi(verStr) } @@ -402,10 +419,10 @@ func systemdVersionAtoi(verStr string) (int, error) { return ver, errors.Wrapf(err, "can't parse version %s", verStr) } -func addCpuQuota(conn *systemdDbus.Conn, properties *[]systemdDbus.Property, quota int64, period uint64) { +func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota int64, period uint64) { if period != 0 { // systemd only supports CPUQuotaPeriodUSec since v242 - sdVer := systemdVersion(conn) + sdVer := systemdVersion(cm) if sdVer >= 242 { *properties = append(*properties, newProp("CPUQuotaPeriodUSec", period)) @@ -436,13 +453,13 @@ func addCpuQuota(conn *systemdDbus.Conn, properties *[]systemdDbus.Property, quo } } -func addCpuset(conn *systemdDbus.Conn, props *[]systemdDbus.Property, cpus, mems string) error { +func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems string) error { if cpus == "" && mems == "" { return nil } // systemd only supports AllowedCPUs/AllowedMemoryNodes since v244 - sdVer := systemdVersion(conn) + sdVer := systemdVersion(cm) if sdVer < 244 { logrus.Debugf("systemd v%d is too old to support AllowedCPUs/AllowedMemoryNodes"+ " (settings will still be applied to cgroupfs)", sdVer) diff --git a/libcontainer/cgroups/systemd/dbus.go b/libcontainer/cgroups/systemd/dbus.go new file mode 100644 index 000000000..0297a3fc2 --- /dev/null +++ b/libcontainer/cgroups/systemd/dbus.go @@ -0,0 +1,89 @@ +// +build linux + +package systemd + +import ( + "sync" + + systemdDbus "github.com/coreos/go-systemd/v22/dbus" + dbus "github.com/godbus/dbus/v5" +) + +type dbusConnManager struct { + conn *systemdDbus.Conn + rootless bool + sync.RWMutex +} + +// newDbusConnManager initializes systemd dbus connection manager. +func newDbusConnManager(rootless bool) *dbusConnManager { + return &dbusConnManager{ + rootless: rootless, + } +} + +// getConnection lazily initializes and returns systemd dbus connection. +func (d *dbusConnManager) getConnection() (*systemdDbus.Conn, error) { + // In the case where d.conn != nil + // Use the read lock the first time to ensure + // that Conn can be acquired at the same time. + d.RLock() + if conn := d.conn; conn != nil { + d.RUnlock() + return conn, nil + } + d.RUnlock() + + // In the case where d.conn == nil + // Use write lock to ensure that only one + // will be created + d.Lock() + defer d.Unlock() + if conn := d.conn; conn != nil { + return conn, nil + } + + conn, err := d.newConnection() + if err != nil { + return nil, err + } + d.conn = conn + return conn, nil +} + +func (d *dbusConnManager) newConnection() (*systemdDbus.Conn, error) { + if d.rootless { + return newUserSystemdDbus() + } + return systemdDbus.New() +} + +// resetConnection resets the connection to its initial state +// (so it can be reconnected if necessary). +func (d *dbusConnManager) resetConnection(conn *systemdDbus.Conn) { + d.Lock() + defer d.Unlock() + if d.conn != nil && d.conn == conn { + d.conn.Close() + d.conn = nil + } +} + +var errDbusConnClosed = dbus.ErrClosed.Error() + +// retryOnDisconnect calls op, and if the error it returns is about closed dbus +// connection, the connection is re-established and the op is retried. This helps +// with the situation when dbus is restarted and we have a stale connection. +func (d *dbusConnManager) retryOnDisconnect(op func(*systemdDbus.Conn) error) error { + for { + conn, err := d.getConnection() + if err != nil { + return err + } + err = op(conn) + if !isDbusError(err, errDbusConnClosed) { + return err + } + d.resetConnection(conn) + } +} diff --git a/libcontainer/cgroups/systemd/user.go b/libcontainer/cgroups/systemd/user.go index 8fe916884..234df8e6b 100644 --- a/libcontainer/cgroups/systemd/user.go +++ b/libcontainer/cgroups/systemd/user.go @@ -17,8 +17,8 @@ import ( "github.com/pkg/errors" ) -// NewUserSystemdDbus creates a connection for systemd user-instance. -func NewUserSystemdDbus() (*systemdDbus.Conn, error) { +// newUserSystemdDbus creates a connection for systemd user-instance. +func newUserSystemdDbus() (*systemdDbus.Conn, error) { addr, err := DetectUserDbusSessionBusAddress() if err != nil { return nil, err diff --git a/libcontainer/cgroups/systemd/v1.go b/libcontainer/cgroups/systemd/v1.go index 64af1d94b..33ef97ee8 100644 --- a/libcontainer/cgroups/systemd/v1.go +++ b/libcontainer/cgroups/systemd/v1.go @@ -21,12 +21,14 @@ type legacyManager struct { mu sync.Mutex cgroups *configs.Cgroup paths map[string]string + dbus *dbusConnManager } func NewLegacyManager(cg *configs.Cgroup, paths map[string]string) cgroups.Manager { return &legacyManager{ cgroups: cg, paths: paths, + dbus: newDbusConnManager(false), } } @@ -57,7 +59,7 @@ var legacySubsystems = []subsystem{ &fs.NameGroup{GroupName: "name=systemd"}, } -func genV1ResourcesProperties(c *configs.Cgroup, conn *systemdDbus.Conn) ([]systemdDbus.Property, error) { +func genV1ResourcesProperties(c *configs.Cgroup, cm *dbusConnManager) ([]systemdDbus.Property, error) { var properties []systemdDbus.Property r := c.Resources @@ -77,7 +79,7 @@ func genV1ResourcesProperties(c *configs.Cgroup, conn *systemdDbus.Conn) ([]syst newProp("CPUShares", r.CpuShares)) } - addCpuQuota(conn, &properties, r.CpuQuota, r.CpuPeriod) + addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod) if r.BlkioWeight != 0 { properties = append(properties, @@ -90,7 +92,7 @@ func genV1ResourcesProperties(c *configs.Cgroup, conn *systemdDbus.Conn) ([]syst newProp("TasksMax", uint64(r.PidsLimit))) } - err = addCpuset(conn, &properties, r.CpusetCpus, r.CpusetMems) + err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems) if err != nil { return nil, err } @@ -164,11 +166,7 @@ func (m *legacyManager) Apply(pid int) error { properties = append(properties, newProp("DefaultDependencies", false)) - dbusConnection, err := getDbusConnection(false) - if err != nil { - return err - } - resourcesProperties, err := genV1ResourcesProperties(c, dbusConnection) + resourcesProperties, err := genV1ResourcesProperties(c, m.dbus) if err != nil { return err } @@ -183,7 +181,7 @@ func (m *legacyManager) Apply(pid int) error { } } - if err := startUnit(dbusConnection, unitName, properties); err != nil { + if err := startUnit(m.dbus, unitName, properties); err != nil { return err } @@ -221,13 +219,8 @@ func (m *legacyManager) Destroy() error { m.mu.Lock() defer m.mu.Unlock() - dbusConnection, err := getDbusConnection(false) - if err != nil { - return err - } - unitName := getUnitName(m.cgroups) + stopErr := stopUnit(m.dbus, getUnitName(m.cgroups)) - stopErr := stopUnit(dbusConnection, unitName) // Both on success and on error, cleanup all the cgroups we are aware of. // Some of them were created directly by Apply() and are not managed by systemd. if err := cgroups.RemovePaths(m.paths); err != nil { @@ -354,11 +347,7 @@ func (m *legacyManager) Set(container *configs.Config) error { if container.Cgroups.Resources.Unified != nil { return cgroups.ErrV1NoUnified } - dbusConnection, err := getDbusConnection(false) - if err != nil { - return err - } - properties, err := genV1ResourcesProperties(container.Cgroups, dbusConnection) + properties, err := genV1ResourcesProperties(container.Cgroups, m.dbus) if err != nil { return err } @@ -386,7 +375,7 @@ func (m *legacyManager) Set(container *configs.Config) error { } } - if err := dbusConnection.SetUnitProperties(getUnitName(container.Cgroups), true, properties...); err != nil { + if err := setUnitProperties(m.dbus, getUnitName(container.Cgroups), properties...); err != nil { _ = m.Freeze(targetFreezerState) return err } diff --git a/libcontainer/cgroups/systemd/v2.go b/libcontainer/cgroups/systemd/v2.go index 70b5b368e..6703a63c8 100644 --- a/libcontainer/cgroups/systemd/v2.go +++ b/libcontainer/cgroups/systemd/v2.go @@ -26,6 +26,7 @@ type unifiedManager struct { // path is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope" path string rootless bool + dbus *dbusConnManager } func NewUnifiedManager(config *configs.Cgroup, path string, rootless bool) cgroups.Manager { @@ -33,6 +34,7 @@ func NewUnifiedManager(config *configs.Cgroup, path string, rootless bool) cgrou cgroups: config, path: path, rootless: rootless, + dbus: newDbusConnManager(rootless), } } @@ -45,7 +47,7 @@ func NewUnifiedManager(config *configs.Cgroup, path string, rootless bool) cgrou // For the list of keys, see https://www.kernel.org/doc/Documentation/cgroup-v2.txt // // For the list of systemd unit properties, see systemd.resource-control(5). -func unifiedResToSystemdProps(conn *systemdDbus.Conn, res map[string]string) (props []systemdDbus.Property, _ error) { +func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props []systemdDbus.Property, _ error) { var err error for k, v := range res { @@ -83,7 +85,7 @@ func unifiedResToSystemdProps(conn *systemdDbus.Conn, res map[string]string) (pr return nil, fmt.Errorf("unified resource %q quota value conversion error: %w", k, err) } } - addCpuQuota(conn, &props, quota, period) + addCpuQuota(cm, &props, quota, period) case "cpu.weight": num, err := strconv.ParseUint(v, 10, 64) @@ -103,7 +105,7 @@ func unifiedResToSystemdProps(conn *systemdDbus.Conn, res map[string]string) (pr "cpuset.mems": "AllowedMemoryNodes", } // systemd only supports these properties since v244 - sdVer := systemdVersion(conn) + sdVer := systemdVersion(cm) if sdVer >= 244 { props = append(props, newProp(m[k], bits)) @@ -163,7 +165,7 @@ func unifiedResToSystemdProps(conn *systemdDbus.Conn, res map[string]string) (pr return props, nil } -func genV2ResourcesProperties(c *configs.Cgroup, conn *systemdDbus.Conn) ([]systemdDbus.Property, error) { +func genV2ResourcesProperties(c *configs.Cgroup, cm *dbusConnManager) ([]systemdDbus.Property, error) { var properties []systemdDbus.Property r := c.Resources @@ -201,7 +203,7 @@ func genV2ResourcesProperties(c *configs.Cgroup, conn *systemdDbus.Conn) ([]syst newProp("CPUWeight", r.CpuWeight)) } - addCpuQuota(conn, &properties, r.CpuQuota, r.CpuPeriod) + addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod) if r.PidsLimit > 0 || r.PidsLimit == -1 { properties = append(properties, @@ -209,7 +211,7 @@ func genV2ResourcesProperties(c *configs.Cgroup, conn *systemdDbus.Conn) ([]syst newProp("TasksMax", uint64(r.PidsLimit))) } - err = addCpuset(conn, &properties, r.CpusetCpus, r.CpusetMems) + err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems) if err != nil { return nil, err } @@ -218,7 +220,7 @@ func genV2ResourcesProperties(c *configs.Cgroup, conn *systemdDbus.Conn) ([]syst // convert Resources.Unified map to systemd properties if r.Unified != nil { - unifiedProps, err := unifiedResToSystemdProps(conn, r.Unified) + unifiedProps, err := unifiedResToSystemdProps(cm, r.Unified) if err != nil { return nil, err } @@ -279,22 +281,18 @@ func (m *unifiedManager) Apply(pid int) error { properties = append(properties, newProp("DefaultDependencies", false)) - dbusConnection, err := getDbusConnection(m.rootless) - if err != nil { - return err - } - resourcesProperties, err := genV2ResourcesProperties(c, dbusConnection) + resourcesProperties, err := genV2ResourcesProperties(c, m.dbus) if err != nil { return err } properties = append(properties, resourcesProperties...) properties = append(properties, c.SystemdProps...) - if err := startUnit(dbusConnection, unitName, properties); err != nil { + if err := startUnit(m.dbus, unitName, properties); err != nil { return errors.Wrapf(err, "error while starting unit %q with properties %+v", unitName, properties) } - if err = m.initPath(); err != nil { + if err := m.initPath(); err != nil { return err } if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil { @@ -310,17 +308,13 @@ func (m *unifiedManager) Destroy() error { m.mu.Lock() defer m.mu.Unlock() - dbusConnection, err := getDbusConnection(m.rootless) - if err != nil { - return err - } unitName := getUnitName(m.cgroups) - if err := stopUnit(dbusConnection, unitName); err != nil { + if err := stopUnit(m.dbus, unitName); err != nil { return err } // XXX this is probably not needed, systemd should handle it - err = os.Remove(m.path) + err := os.Remove(m.path) if err != nil && !os.IsNotExist(err) { return err } @@ -349,7 +343,7 @@ func (m *unifiedManager) getSliceFull() (string, error) { } if m.rootless { - dbusConnection, err := getDbusConnection(m.rootless) + dbusConnection, err := m.dbus.getConnection() if err != nil { return "", err } @@ -432,11 +426,7 @@ func (m *unifiedManager) GetStats() (*cgroups.Stats, error) { } func (m *unifiedManager) Set(container *configs.Config) error { - dbusConnection, err := getDbusConnection(m.rootless) - if err != nil { - return err - } - properties, err := genV2ResourcesProperties(m.cgroups, dbusConnection) + properties, err := genV2ResourcesProperties(m.cgroups, m.dbus) if err != nil { return err } @@ -464,7 +454,7 @@ func (m *unifiedManager) Set(container *configs.Config) error { } } - if err := dbusConnection.SetUnitProperties(getUnitName(m.cgroups), true, properties...); err != nil { + if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil { _ = m.Freeze(targetFreezerState) return errors.Wrap(err, "error while setting unit properties") }