diff --git a/pkg/admin/data/data_manager.go b/pkg/admin/data/data_manager.go index a2e8318f18..f69a67c0c5 100644 --- a/pkg/admin/data/data_manager.go +++ b/pkg/admin/data/data_manager.go @@ -91,7 +91,7 @@ func (dm *manager) Backup(name BackupName) error { dest := dm.GetBackupPath(name) - if err := copyDataDir(dest); err != nil { + if err := copyPath(config.DataDir, dest); err != nil { return err } @@ -99,12 +99,51 @@ func (dm *manager) Backup(name BackupName) error { return nil } -func (dm *manager) Restore(n BackupName) error { - return fmt.Errorf("Restore not implemented") +func (dm *manager) Restore(name BackupName) error { + klog.InfoS("Restoring the data", "storage", dm.storage, "name", name, "data", config.DataDir) + + if name == "" { + return &EmptyArgErr{"name"} + } + + if exists, err := dm.BackupExists(name); err != nil { + return fmt.Errorf("checking if backup %s exists failed: %w", name, err) + } else if !exists { + return fmt.Errorf("failed to restore backup, %s does not exist", name) + } + + tmp := fmt.Sprintf("%s.saved", config.DataDir) + klog.InfoS("Temporarily renaming data dir", "data", config.DataDir, "renamedTo", tmp) + if err := os.Rename(config.DataDir, tmp); err != nil { + return fmt.Errorf("renaming %s to %s failed: %w", config.DataDir, tmp, err) + } + + src := dm.GetBackupPath(name) + if err := copyPath(src, config.DataDir); err != nil { + klog.ErrorS(err, "Copying backup failed - restoring previous data dir") + + if err := os.RemoveAll(config.DataDir); err != nil { + return fmt.Errorf("removing %s failed: %w", config.DataDir, err) + } + + if err := os.Rename(tmp, config.DataDir); err != nil { + return fmt.Errorf("renaming %s to %s failed: %w", tmp, config.DataDir, err) + } + + return fmt.Errorf("restoring backup %s failed: %w", src, err) + } + + klog.InfoS("Removing temporary data dir", "path", tmp) + if err := os.RemoveAll(tmp); err != nil { + klog.ErrorS(err, "Failed to remove path", "path", tmp) + } + + klog.InfoS("Restore finished", "backup", src, "data", config.DataDir) + return nil } -func copyDataDir(dest string) error { - cmd := exec.Command("cp", append(cpArgs, config.DataDir, dest)...) //nolint:gosec +func copyPath(src, dest string) error { + cmd := exec.Command("cp", append(cpArgs, src, dest)...) //nolint:gosec klog.InfoS("Executing command", "cmd", cmd) var outb, errb bytes.Buffer diff --git a/pkg/admin/prerun/prerun.go b/pkg/admin/prerun/prerun.go index 59e3da61c4..8b0ed32b4b 100644 --- a/pkg/admin/prerun/prerun.go +++ b/pkg/admin/prerun/prerun.go @@ -1,14 +1,15 @@ package prerun import ( + "bytes" "encoding/json" "errors" "fmt" "os" + "os/exec" "strings" "github.com/openshift/microshift/pkg/admin/data" - "github.com/openshift/microshift/pkg/config" "github.com/openshift/microshift/pkg/util" "k8s.io/klog/v2" ) @@ -31,36 +32,49 @@ func (hi *HealthInfo) IsHealthy() bool { return hi.Health == "healthy" } -func Perform() error { +type PreRun struct { + dataManager data.Manager +} + +func New(dataManager data.Manager) *PreRun { + return &PreRun{ + dataManager: dataManager, + } +} + +func (pr *PreRun) Perform() error { health, err := getHealthInfo() if err != nil { if errors.Is(err, errHealthFileDoesNotExist) { klog.InfoS("Health file does not exist - skipping backup") return nil } - klog.ErrorS(err, "Failed to load health from disk") return err } - klog.InfoS("Loaded health info from the disk", "health", health) if isCurr, err := containsCurrentBootID(health.BootID); err != nil { return err } else if isCurr { - klog.InfoS("Health file contains current boot - skipping backup") + // This might happen if microshift is (re)started after greenboot finishes running. + // Green script will overwrite the health.json with + // current boot's ID, deployment ID, and health. + klog.InfoS("Health file contains current boot - skipping pre-run") return nil } - if !health.IsHealthy() { - klog.InfoS("System was not healthy - skipping backup") - return nil - } + klog.InfoS("Previous boot", "health", health.Health, "deploymentID", health.DeploymentID, "bootID", health.BootID) - dataManager, err := data.NewManager(config.BackupsDir) - if err != nil { - return err + if health.IsHealthy() { + return pr.backup(health) } - existingBackups, err := dataManager.GetBackupList() + return pr.restore() +} + +func (pr *PreRun) backup(health *HealthInfo) error { + klog.InfoS("Backing up the data for deployment", "deployment", health.DeploymentID) + + existingBackups, err := pr.dataManager.GetBackupList() if err != nil { return err } @@ -76,20 +90,84 @@ func Perform() error { return nil } - if err := dataManager.Backup(newBackupName); err != nil { + if err := pr.dataManager.Backup(newBackupName); err != nil { return err } - removeOldBackups(dataManager, backupsForDeployment) + pr.removeOldBackups(backupsForDeployment) return nil } +func (pr *PreRun) restore() error { + // TODO: Check if containers are already running (i.e. microshift.service was restarted)? + + currentDeploymentID, err := getCurrentDeploymentID() + if err != nil { + return err + } + klog.InfoS("Restoring data for current deployment", "deployID", currentDeploymentID) + + existingBackups, err := pr.dataManager.GetBackupList() + if err != nil { + return err + } + klog.InfoS("List of existing backups", "backups", existingBackups) + backupsForDeployment := getExistingBackupsForTheDeployment(existingBackups, currentDeploymentID) + + if len(backupsForDeployment) == 0 { + return fmt.Errorf("there is no backup to restore for current deployment (%s)", currentDeploymentID) + } + + if len(backupsForDeployment) > 1 { + // could happen during backing up when removing older backups failed + klog.InfoS("TODO: more than 1 backup, need to pick most recent one") + } + + return pr.dataManager.Restore(backupsForDeployment[0]) +} + +func getCurrentDeploymentID() (string, error) { + cmd := exec.Command("rpm-ostree", "status", "--jsonpath=$.deployments[0].id", "--booted") + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + return "", fmt.Errorf("command %s failed: %s", strings.TrimSpace(cmd.String()), strings.TrimSpace(stderr.String())) + } + + ids := []string{} + if err := json.Unmarshal(stdout.Bytes(), &ids); err != nil { + return "", fmt.Errorf("unmarshalling '%s' to json failed: %w", strings.TrimSpace(stdout.String()), err) + } + + if len(ids) != 1 { + // this shouldn't happen if running on ostree system, but just in case + klog.ErrorS(nil, "Unexpected amount of deployments in rpm-ostree output", + "cmd", cmd.String(), + "stdout", strings.TrimSpace(stdout.String()), + "stderr", strings.TrimSpace(stderr.String()), + "unmarshalledIDs", ids) + return "", fmt.Errorf("rpm-ostree returned unexpected amount of deployment IDs: %d", len(ids)) + } + + return ids[0], nil +} + +func (pr *PreRun) removeOldBackups(backups []data.BackupName) { + for _, b := range backups { + klog.InfoS("Removing older backup", "name", b) + if err := pr.dataManager.RemoveBackup(b); err != nil { + klog.ErrorS(err, "Failed to remove backup", "name", b) + } + } +} + func containsCurrentBootID(id string) (bool, error) { path := "/proc/sys/kernel/random/boot_id" content, err := os.ReadFile(path) if err != nil { - klog.ErrorS(err, "Failed to read file", "path", path) return false, fmt.Errorf("reading file %s failed: %w", path, err) } currentBootID := strings.ReplaceAll(strings.TrimSpace(string(content)), "-", "") @@ -107,14 +185,12 @@ func getHealthInfo() (*HealthInfo, error) { content, err := os.ReadFile(path) if err != nil { - klog.ErrorS(err, "Failed to read file", "path", path) - return nil, err + return nil, fmt.Errorf("reading file %s failed: %w", path, err) } health := &HealthInfo{} if err := json.Unmarshal(content, &health); err != nil { - klog.ErrorS(err, "Failed to unmarshal file to json", "content", string(content)) - return nil, err + return nil, fmt.Errorf("unmarshalling '%s' failed: %w", strings.TrimSpace(string(content)), err) } return health, nil } @@ -139,12 +215,3 @@ func backupAlreadyExists(existingBackups []data.BackupName, name data.BackupName } return false } - -func removeOldBackups(dataManager data.Manager, backups []data.BackupName) { - for _, b := range backups { - klog.InfoS("Removing older backup", "name", b) - if err := dataManager.RemoveBackup(b); err != nil { - klog.ErrorS(err, "Failed to remove backup", "name", b) - } - } -} diff --git a/pkg/cmd/run.go b/pkg/cmd/run.go index dab2d93d1f..e48ee378f2 100644 --- a/pkg/cmd/run.go +++ b/pkg/cmd/run.go @@ -10,6 +10,7 @@ import ( "time" "github.com/coreos/go-systemd/daemon" + "github.com/openshift/microshift/pkg/admin/data" "github.com/openshift/microshift/pkg/admin/prerun" "github.com/openshift/microshift/pkg/config" "github.com/openshift/microshift/pkg/controllers" @@ -77,13 +78,23 @@ func logConfig(cfg *config.Config) { } } +func performPrerun() error { + dataManager, err := data.NewManager(config.BackupsDir) + if err != nil { + return err + } + + return prerun.New(dataManager).Perform() +} + func RunMicroshift(cfg *config.Config) error { // fail early if we don't have enough privileges if os.Geteuid() > 0 { klog.Fatalf("MicroShift must be run privileged") } - if err := prerun.Perform(); err != nil { + if err := performPrerun(); err != nil { + klog.ErrorS(err, "Pre-run procedure failed") return err }