diff --git a/etcd/cmd/microshift-etcd/run.go b/etcd/cmd/microshift-etcd/run.go index b654b164e9..451403a0ae 100644 --- a/etcd/cmd/microshift-etcd/run.go +++ b/etcd/cmd/microshift-etcd/run.go @@ -1,19 +1,23 @@ package main import ( + "context" "fmt" + "math" "net" "net/url" "os" "os/signal" "path/filepath" "syscall" + "time" "github.com/openshift/microshift/pkg/config" "github.com/openshift/microshift/pkg/util/cryptomaterial" "github.com/spf13/cobra" etcd "go.etcd.io/etcd/server/v3/embed" + "go.etcd.io/etcd/server/v3/mvcc/backend" "k8s.io/klog/v2" ) @@ -30,6 +34,7 @@ func NewRunEtcdCommand() *cobra.Command { return e.Run() }, } + return cmd } @@ -43,7 +48,11 @@ var tlsCipherSuites = []string{ } type EtcdService struct { - etcdCfg *etcd.Config + etcdCfg *etcd.Config + minDefragBytes int64 + maxFragmentedPercentage float64 + defragCheckFreq time.Duration + doStartupDefrag bool } func NewEtcd(cfg *config.MicroshiftConfig) *EtcdService { @@ -55,6 +64,11 @@ func NewEtcd(cfg *config.MicroshiftConfig) *EtcdService { func (s *EtcdService) Name() string { return "etcd" } func (s *EtcdService) configure(cfg *config.MicroshiftConfig) { + s.minDefragBytes = cfg.Etcd.MinDefragBytes + s.maxFragmentedPercentage = cfg.Etcd.MaxFragmentedPercentage + s.defragCheckFreq = cfg.Etcd.DefragCheckFreq + s.doStartupDefrag = cfg.Etcd.DoStartupDefrag + microshiftDataDir := config.GetDataDir() certsDir := cryptomaterial.CertsDirectory(microshiftDataDir) @@ -69,6 +83,7 @@ func (s *EtcdService) configure(cfg *config.MicroshiftConfig) { //s.etcdCfg.ForceNewCluster = true //TODO s.etcdCfg.Logger = "zap" s.etcdCfg.Dir = dataDir + s.etcdCfg.QuotaBackendBytes = cfg.Etcd.QuotaBackendBytes url2380 := setURL([]string{"localhost"}, "2380") url2379 := setURL([]string{"localhost"}, "2379") s.etcdCfg.APUrls = url2380 @@ -96,6 +111,23 @@ func (s *EtcdService) Run() error { return fmt.Errorf("microshift-etcd failed to start: %v", err) } <-e.Server.ReadyNotify() + defer func() { + e.Server.Stop() + <-e.Server.StopNotify() + }() + + // If we were told to, go ahead and do a defragment now. + if s.doStartupDefrag { + if err := e.Server.Backend().Defrag(); err != nil { + err = fmt.Errorf("initial defragmentation failed: %v", err) + klog.Error(err) + return err + } + } + + // Start up the defrag controller. + defragCtx, defragShutdown := context.WithCancel(context.Background()) + go s.defragController(defragCtx, e.Server.Backend()) // Wait to be stopped. sigTerm := make(chan os.Signal, 1) @@ -103,11 +135,44 @@ func (s *EtcdService) Run() error { sig := <-sigTerm klog.Infof("microshift-etcd received signal %v - stopping", sig) - e.Server.Stop() - <-e.Server.StopNotify() + // Shutdown the defrag controller. + defragShutdown() + return nil } +func (s *EtcdService) defragController(ctx context.Context, be backend.Backend) { + // Stop the controller if defrags are disabled. + if s.defragCheckFreq == 0 { + klog.Warning("defragmentation has been disabled") + return + } + + // This timer will check the fragmented conditions periodically. + timer := time.NewTimer(s.defragCheckFreq) + defer func() { + if !timer.Stop() { + <-timer.C + } + }() + for { + select { + case <-ctx.Done(): + return + case start := <-timer.C: + if isBackendFragmented(be, s.maxFragmentedPercentage, s.minDefragBytes) { + klog.Info("attempting to defragment backend") + if err := be.Defrag(); err != nil { + klog.Errorf("defragmentation failed: %v", err) + } else { + klog.Infof("defragmentation took %v", time.Since(start)) + } + } + timer.Reset(s.defragCheckFreq) + } + } +} + func setURL(hostnames []string, port string) []url.URL { urls := make([]url.URL, len(hostnames)) for i, name := range hostnames { @@ -120,3 +185,17 @@ func setURL(hostnames []string, port string) []url.URL { } return urls } + +func isBackendFragmented(b backend.Backend, maxFragmentedPercentage float64, minDefragBytes int64) bool { + fragmentedPercentage := checkFragmentationPercentage(b.Size(), b.SizeInUse()) + if fragmentedPercentage > 0.00 { + klog.Infof("backend store fragmented: %.2f %%, dbSize: %d", fragmentedPercentage, b.Size()) + } + return fragmentedPercentage >= maxFragmentedPercentage && b.Size() >= minDefragBytes +} + +func checkFragmentationPercentage(ondisk, inuse int64) float64 { + diff := float64(ondisk - inuse) + fragmentedPercentage := (diff / float64(ondisk)) * 100 + return math.Round(fragmentedPercentage*100) / 100 +} diff --git a/etcd/vendor/github.com/openshift/microshift/pkg/config/config.go b/etcd/vendor/github.com/openshift/microshift/pkg/config/config.go index 205b9ef138..cc2c3247fd 100644 --- a/etcd/vendor/github.com/openshift/microshift/pkg/config/config.go +++ b/etcd/vendor/github.com/openshift/microshift/pkg/config/config.go @@ -11,6 +11,7 @@ import ( "path/filepath" "strconv" "strings" + "time" "github.com/apparentlymart/go-cidr/cidr" "github.com/mitchellh/go-homedir" @@ -57,6 +58,19 @@ type IngressConfig struct { ServingKey []byte } +type EtcdConfig struct { + // The limit on the size of the etcd database; etcd will start failing writes if its size on disk reaches this value + QuotaBackendBytes int64 + // If the backend is fragmented more than `maxFragmentedPercentage` + // and the database size is greater than `minDefragBytes`, do a defrag. + MinDefragBytes int64 + MaxFragmentedPercentage float64 + // How often to check the conditions for defragging (0 means no defrags, except for a single on startup if `doStartupDefrag` is set). + DefragCheckFreq time.Duration + // Whether or not to do a defrag when the server finishes starting + DoStartupDefrag bool +} + type MicroshiftConfig struct { LogVLevel int `json:"logVLevel"` @@ -76,6 +90,7 @@ type MicroshiftConfig struct { Cluster ClusterConfig `json:"cluster"` Ingress IngressConfig `json:"-"` + Etcd EtcdConfig `json:"etcd"` } // Top level config file @@ -223,6 +238,13 @@ func NewMicroshiftConfig() *MicroshiftConfig { ServiceCIDR: "10.43.0.0/16", ServiceNodePortRange: "30000-32767", }, + Etcd: EtcdConfig{ + MinDefragBytes: 100 * 1024 * 1024, // 100MB + MaxFragmentedPercentage: 45, // percent + DefragCheckFreq: 5 * time.Minute, + DoStartupDefrag: true, + QuotaBackendBytes: 2 * 1024 * 1024 * 1024, // 2GB + }, } } diff --git a/pkg/config/config.go b/pkg/config/config.go index 205b9ef138..cc2c3247fd 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -11,6 +11,7 @@ import ( "path/filepath" "strconv" "strings" + "time" "github.com/apparentlymart/go-cidr/cidr" "github.com/mitchellh/go-homedir" @@ -57,6 +58,19 @@ type IngressConfig struct { ServingKey []byte } +type EtcdConfig struct { + // The limit on the size of the etcd database; etcd will start failing writes if its size on disk reaches this value + QuotaBackendBytes int64 + // If the backend is fragmented more than `maxFragmentedPercentage` + // and the database size is greater than `minDefragBytes`, do a defrag. + MinDefragBytes int64 + MaxFragmentedPercentage float64 + // How often to check the conditions for defragging (0 means no defrags, except for a single on startup if `doStartupDefrag` is set). + DefragCheckFreq time.Duration + // Whether or not to do a defrag when the server finishes starting + DoStartupDefrag bool +} + type MicroshiftConfig struct { LogVLevel int `json:"logVLevel"` @@ -76,6 +90,7 @@ type MicroshiftConfig struct { Cluster ClusterConfig `json:"cluster"` Ingress IngressConfig `json:"-"` + Etcd EtcdConfig `json:"etcd"` } // Top level config file @@ -223,6 +238,13 @@ func NewMicroshiftConfig() *MicroshiftConfig { ServiceCIDR: "10.43.0.0/16", ServiceNodePortRange: "30000-32767", }, + Etcd: EtcdConfig{ + MinDefragBytes: 100 * 1024 * 1024, // 100MB + MaxFragmentedPercentage: 45, // percent + DefragCheckFreq: 5 * time.Minute, + DoStartupDefrag: true, + QuotaBackendBytes: 2 * 1024 * 1024 * 1024, // 2GB + }, } } diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index b433868422..40d14d4ca1 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -5,6 +5,7 @@ import ( "path/filepath" "reflect" "testing" + "time" "sigs.k8s.io/yaml" ) @@ -70,6 +71,13 @@ func TestConfigFile(t *testing.T) { ServiceCIDR: "40.30.20.10/16", ServiceNodePortRange: "1024-32767", }, + Etcd: EtcdConfig{ + QuotaBackendBytes: 2 * 1024 * 1024 * 1024, + MinDefragBytes: 100 * 1024 * 1024, + MaxFragmentedPercentage: 45, + DefragCheckFreq: 5 * time.Minute, + DoStartupDefrag: true, + }, }, expectErr: false, }, @@ -158,6 +166,13 @@ func TestMicroshiftConfigReadAndValidate(t *testing.T) { ServiceNodePortRange: "1024-32767", DNS: "40.30.0.10", }, + Etcd: EtcdConfig{ + QuotaBackendBytes: 2 * 1024 * 1024 * 1024, + MinDefragBytes: 100 * 1024 * 1024, + MaxFragmentedPercentage: 45, + DefragCheckFreq: 5 * time.Minute, + DoStartupDefrag: true, + }, }, expectErr: false, },