openshift · openshift-merge-robot · Jun 6, 2023 · Jun 5, 2023 · Jun 5, 2023 · Jun 6, 2023
diff --git a/pkg/admin/data/data_manager.go b/pkg/admin/data/data_manager.go
@@ -91,20 +91,59 @@ func (dm *manager) Backup(name BackupName) error {
 
 	dest := dm.GetBackupPath(name)
 
-	if err := copyDataDir(dest); err != nil {
+	if err := copyPath(config.DataDir, dest); err != nil {
 		return err
 	}
 
 	klog.InfoS("Backup finished", "backup", dest, "data", config.DataDir)
 	return nil
 }
 
-func (dm *manager) Restore(n BackupName) error {
-	return fmt.Errorf("Restore not implemented")
+func (dm *manager) Restore(name BackupName) error {
+	klog.InfoS("Restoring the data", "storage", dm.storage, "name", name, "data", config.DataDir)
+
+	if name == "" {
+		return &EmptyArgErr{"name"}
+	}
+
+	if exists, err := dm.BackupExists(name); err != nil {
+		return fmt.Errorf("checking if backup %s exists failed: %w", name, err)
+	} else if !exists {
+		return fmt.Errorf("failed to restore backup, %s does not exist", name)
+	}
+
+	tmp := fmt.Sprintf("%s.saved", config.DataDir)
+	klog.InfoS("Temporarily renaming data dir", "data", config.DataDir, "renamedTo", tmp)
+	if err := os.Rename(config.DataDir, tmp); err != nil {
+		return fmt.Errorf("renaming %s to %s failed: %w", config.DataDir, tmp, err)
+	}
+
+	src := dm.GetBackupPath(name)
+	if err := copyPath(src, config.DataDir); err != nil {
+		klog.ErrorS(err, "Copying backup failed - restoring previous data dir")
+
+		if err := os.RemoveAll(config.DataDir); err != nil {
+			return fmt.Errorf("removing %s failed: %w", config.DataDir, err)
+		}
+
+		if err := os.Rename(tmp, config.DataDir); err != nil {
+			return fmt.Errorf("renaming %s to %s failed: %w", tmp, config.DataDir, err)
+		}
+
+		return fmt.Errorf("restoring backup %s failed: %w", src, err)
+	}
+
+	klog.InfoS("Removing temporary data dir", "path", tmp)
+	if err := os.RemoveAll(tmp); err != nil {
+		klog.ErrorS(err, "Failed to remove path", "path", tmp)
+	}
+
+	klog.InfoS("Restore finished", "backup", src, "data", config.DataDir)
+	return nil
 }
 
-func copyDataDir(dest string) error {
-	cmd := exec.Command("cp", append(cpArgs, config.DataDir, dest)...) //nolint:gosec
+func copyPath(src, dest string) error {
+	cmd := exec.Command("cp", append(cpArgs, src, dest)...) //nolint:gosec
 	klog.InfoS("Executing command", "cmd", cmd)
 
 	var outb, errb bytes.Buffer

diff --git a/pkg/admin/prerun/prerun.go b/pkg/admin/prerun/prerun.go
@@ -1,14 +1,15 @@
 package prerun
 
 import (
+	"bytes"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"os"
+	"os/exec"
 	"strings"
 
 	"github.com/openshift/microshift/pkg/admin/data"
-	"github.com/openshift/microshift/pkg/config"
 	"github.com/openshift/microshift/pkg/util"
 	"k8s.io/klog/v2"
 )
@@ -31,36 +32,49 @@ func (hi *HealthInfo) IsHealthy() bool {
 	return hi.Health == "healthy"
 }
 
-func Perform() error {
+type PreRun struct {
+	dataManager data.Manager
+}
+
+func New(dataManager data.Manager) *PreRun {
+	return &PreRun{
+		dataManager: dataManager,
+	}
+}
+
+func (pr *PreRun) Perform() error {
 	health, err := getHealthInfo()
 	if err != nil {
 		if errors.Is(err, errHealthFileDoesNotExist) {
 			klog.InfoS("Health file does not exist - skipping backup")
 			return nil
 		}
-		klog.ErrorS(err, "Failed to load health from disk")
 		return err
 	}
-	klog.InfoS("Loaded health info from the disk", "health", health)
 
 	if isCurr, err := containsCurrentBootID(health.BootID); err != nil {
 		return err
 	} else if isCurr {
-		klog.InfoS("Health file contains current boot - skipping backup")
+		// This might happen if microshift is (re)started after greenboot finishes running.
+		// Green script will overwrite the health.json with
+		// current boot's ID, deployment ID, and health.
+		klog.InfoS("Health file contains current boot - skipping pre-run")
 		return nil
 	}
 
-	if !health.IsHealthy() {
-		klog.InfoS("System was not healthy - skipping backup")
-		return nil
-	}
+	klog.InfoS("Previous boot", "health", health.Health, "deploymentID", health.DeploymentID, "bootID", health.BootID)
 
-	dataManager, err := data.NewManager(config.BackupsDir)
-	if err != nil {
-		return err
+	if health.IsHealthy() {
+		return pr.backup(health)
 	}
 
-	existingBackups, err := dataManager.GetBackupList()
+	return pr.restore()
+}
+
+func (pr *PreRun) backup(health *HealthInfo) error {
+	klog.InfoS("Backing up the data for deployment", "deployment", health.DeploymentID)
+
+	existingBackups, err := pr.dataManager.GetBackupList()
 	if err != nil {
 		return err
 	}
@@ -76,20 +90,84 @@ func Perform() error {
 		return nil
 	}
 
-	if err := dataManager.Backup(newBackupName); err != nil {
+	if err := pr.dataManager.Backup(newBackupName); err != nil {
 		return err
 	}
 
-	removeOldBackups(dataManager, backupsForDeployment)
+	pr.removeOldBackups(backupsForDeployment)
 
 	return nil
 }
 
+func (pr *PreRun) restore() error {
+	// TODO: Check if containers are already running (i.e. microshift.service was restarted)?
+
+	currentDeploymentID, err := getCurrentDeploymentID()
+	if err != nil {
+		return err
+	}
+	klog.InfoS("Restoring data for current deployment", "deployID", currentDeploymentID)
+
+	existingBackups, err := pr.dataManager.GetBackupList()
+	if err != nil {
+		return err
+	}
+	klog.InfoS("List of existing backups", "backups", existingBackups)
+	backupsForDeployment := getExistingBackupsForTheDeployment(existingBackups, currentDeploymentID)
+
+	if len(backupsForDeployment) == 0 {
+		return fmt.Errorf("there is no backup to restore for current deployment (%s)", currentDeploymentID)
+	}
+
+	if len(backupsForDeployment) > 1 {
+		// could happen during backing up when removing older backups failed
+		klog.InfoS("TODO: more than 1 backup, need to pick most recent one")
+	}
+
+	return pr.dataManager.Restore(backupsForDeployment[0])
+}
+
+func getCurrentDeploymentID() (string, error) {
+	cmd := exec.Command("rpm-ostree", "status", "--jsonpath=$.deployments[0].id", "--booted")
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+
+	if err := cmd.Run(); err != nil {
+		return "", fmt.Errorf("command %s failed: %s", strings.TrimSpace(cmd.String()), strings.TrimSpace(stderr.String()))
+	}
+
+	ids := []string{}
+	if err := json.Unmarshal(stdout.Bytes(), &ids); err != nil {
+		return "", fmt.Errorf("unmarshalling '%s' to json failed: %w", strings.TrimSpace(stdout.String()), err)
+	}
+
+	if len(ids) != 1 {
+		// this shouldn't happen if running on ostree system, but just in case
+		klog.ErrorS(nil, "Unexpected amount of deployments in rpm-ostree output",
+			"cmd", cmd.String(),
+			"stdout", strings.TrimSpace(stdout.String()),
+			"stderr", strings.TrimSpace(stderr.String()),
+			"unmarshalledIDs", ids)
+		return "", fmt.Errorf("rpm-ostree returned unexpected amount of deployment IDs: %d", len(ids))
+	}
+
+	return ids[0], nil
+}
+
+func (pr *PreRun) removeOldBackups(backups []data.BackupName) {
+	for _, b := range backups {
+		klog.InfoS("Removing older backup", "name", b)
+		if err := pr.dataManager.RemoveBackup(b); err != nil {
+			klog.ErrorS(err, "Failed to remove backup", "name", b)
+		}
+	}
+}
+
 func containsCurrentBootID(id string) (bool, error) {
 	path := "/proc/sys/kernel/random/boot_id"
 	content, err := os.ReadFile(path)
 	if err != nil {
-		klog.ErrorS(err, "Failed to read file", "path", path)
 		return false, fmt.Errorf("reading file %s failed: %w", path, err)
 	}
 	currentBootID := strings.ReplaceAll(strings.TrimSpace(string(content)), "-", "")
@@ -107,14 +185,12 @@ func getHealthInfo() (*HealthInfo, error) {
 
 	content, err := os.ReadFile(path)
 	if err != nil {
-		klog.ErrorS(err, "Failed to read file", "path", path)
-		return nil, err
+		return nil, fmt.Errorf("reading file %s failed: %w", path, err)
 	}
 
 	health := &HealthInfo{}
 	if err := json.Unmarshal(content, &health); err != nil {
-		klog.ErrorS(err, "Failed to unmarshal file to json", "content", string(content))
-		return nil, err
+		return nil, fmt.Errorf("unmarshalling '%s' failed: %w", strings.TrimSpace(string(content)), err)
 	}
 	return health, nil
 }
@@ -139,12 +215,3 @@ func backupAlreadyExists(existingBackups []data.BackupName, name data.BackupName
 	}
 	return false
 }
-
-func removeOldBackups(dataManager data.Manager, backups []data.BackupName) {
-	for _, b := range backups {
-		klog.InfoS("Removing older backup", "name", b)
-		if err := dataManager.RemoveBackup(b); err != nil {
-			klog.ErrorS(err, "Failed to remove backup", "name", b)
-		}
-	}
-}
diff --git a/pkg/cmd/run.go b/pkg/cmd/run.go
@@ -10,6 +10,7 @@ import (
 	"time"
 
 	"github.com/coreos/go-systemd/daemon"
+	"github.com/openshift/microshift/pkg/admin/data"
 	"github.com/openshift/microshift/pkg/admin/prerun"
 	"github.com/openshift/microshift/pkg/config"
 	"github.com/openshift/microshift/pkg/controllers"
@@ -77,13 +78,23 @@ func logConfig(cfg *config.Config) {
 	}
 }
 
+func performPrerun() error {
+	dataManager, err := data.NewManager(config.BackupsDir)
+	if err != nil {
+		return err
+	}
+
+	return prerun.New(dataManager).Perform()
+}
+
 func RunMicroshift(cfg *config.Config) error {
 	// fail early if we don't have enough privileges
 	if os.Geteuid() > 0 {
 		klog.Fatalf("MicroShift must be run privileged")
 	}
 
-	if err := prerun.Perform(); err != nil {
+	if err := performPrerun(); err != nil {
+		klog.ErrorS(err, "Pre-run procedure failed")
 		return err
 	}