From 49c9a4eb54227a728fb126199c3bc9f629c29de3 Mon Sep 17 00:00:00 2001
From: Siegfried Weber <mail@siegfriedweber.net>
Date: Tue, 27 Jul 2021 10:34:36 +0200
Subject: [PATCH 01/14] Set TimeoutStopSec only once

---
 src/provider/systemdmanager/systemdunit.rs | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/provider/systemdmanager/systemdunit.rs b/src/provider/systemdmanager/systemdunit.rs
index c57d4e1..25459c3 100644
--- a/src/provider/systemdmanager/systemdunit.rs
+++ b/src/provider/systemdmanager/systemdunit.rs
@@ -205,14 +205,6 @@ impl SystemDUnit {
 
         unit.set_property(Section::Service, "TimeoutStopSec", &termination_timeout);
 
-        if let Some(stop_timeout) = pod_spec.termination_grace_period_seconds {
-            unit.set_property(
-                Section::Service,
-                "TimeoutStopSec",
-                stop_timeout.to_string().as_str(),
-            );
-        }
-
         if let Some(user_name) = SystemDUnit::get_user_name_from_pod_security_context(pod)? {
             if !user_mode {
                 unit.set_property(Section::Service, "User", user_name);

From bb03232dd3e0f77fea2a21f1ddd7a6b7854e005e Mon Sep 17 00:00:00 2001
From: Siegfried Weber <mail@siegfriedweber.net>
Date: Tue, 27 Jul 2021 15:40:40 +0200
Subject: [PATCH 02/14] Map restart policy to systemd service

---
 src/provider/systemdmanager/systemdunit.rs | 101 +++++++++++++++++++++
 1 file changed, 101 insertions(+)

diff --git a/src/provider/systemdmanager/systemdunit.rs b/src/provider/systemdmanager/systemdunit.rs
index 25459c3..754f1d8 100644
--- a/src/provider/systemdmanager/systemdunit.rs
+++ b/src/provider/systemdmanager/systemdunit.rs
@@ -6,6 +6,7 @@ use kubelet::pod::Pod;
 
 use crate::provider::error::StackableError;
 use crate::provider::error::StackableError::PodValidationError;
+use crate::provider::kubernetes::accessor::{restart_policy, RestartPolicy};
 use crate::provider::states::pod::creating_config::CreatingConfig;
 use crate::provider::states::pod::PodState;
 use crate::provider::systemdmanager::manager::UnitTypes;
@@ -39,6 +40,79 @@ lazy_static! {
         Regex::new("^[a-zA-Z_][a-zA-Z0-9_-]{0,30}$").unwrap();
 }
 
+/// Configures whether the service shall be restarted when the service
+/// process exits, is killed, or a timeout is reached.
+///
+/// The service process may be the main service process, but it may also
+/// be one of the processes specified with `ExecStartPre=`,
+/// `ExecStartPost=`, `ExecStop=`, `ExecStopPost=`, or `ExecReload=`.
+/// When the death of the process is a result of systemd operation (e.g.
+/// service stop or restart), the service will not be restarted.
+/// Timeouts include missing the watchdog "keep-alive ping" deadline and
+/// a service start, reload, and stop operation timeouts.
+///
+/// As exceptions to the setting, the service will not be restarted if
+/// the exit code or signal is specified in `RestartPreventExitStatus=`
+/// or the service is stopped with `systemctl stop` or an equivalent
+/// operation. Also, the services will always be restarted if the exit
+/// code or signal is specified in `RestartForceExitStatus=`.
+///
+/// Note that service restart is subject to unit start rate limiting
+/// configured with `StartLimitIntervalSec=` and `StartLimitBurst=`. A
+/// restarted service enters the failed state only after the start
+/// limits are reached.
+///
+/// Setting this to "RestartOption::OnFailure" is the recommended choice
+/// for long-running services, in order to increase reliability by
+/// attempting automatic recovery from errors. For services that shall
+/// be able to terminate on their own choice (and avoid immediate
+/// restarting), "RestartOption::OnAbnormal" is an alternative choice.
+#[derive(Clone, Debug, Display, Eq, PartialEq)]
+#[strum(serialize_all = "kebab-case")]
+pub enum RestartOption {
+    /// The service will be restarted regardless of whether it exited
+    /// cleanly or not, got terminated abnormally by a signal, or hit a
+    /// timeout.
+    Always,
+    /// The service will not be restarted.
+    No,
+    /// The service will be restarted when the process is terminated by
+    /// a signal (including on core dump, excluding the signals
+    /// `SIGHUP`, `SIGINT`, `SIGTERM`, or `SIGPIPE`), when an operation
+    /// times out, or when the watchdog timeout is triggered.
+    OnAbnormal,
+    /// The service will be restarted only if the service process exits
+    /// due to an uncaught signal not specified as a clean exit status.
+    OnAbort,
+    /// The service will be restarted when the process exits with a
+    /// non-zero exit code, is terminated by a signal (including on core
+    /// dump, but excluding the signals `SIGHUP`, `SIGINT`, `SIGTERM`,
+    /// or `SIGPIPE`), when an operation (such as service reload) times
+    /// out, and when the configured watchdog timeout is triggered.
+    OnFailure,
+    /// The service will be restarted only when the service process
+    /// exits cleanly. In this context, a clean exit means any of the
+    /// following:
+    /// - exit code of 0;
+    /// - for types other than Type=oneshot, one of the signals
+    ///   `SIGHUP`, `SIGINT`, `SIGTERM`, or `SIGPIPE`;
+    /// - exit statuses and signals specified in SuccessExitStatus=.
+    OnSuccess,
+    /// The service will be restarted only if the watchdog timeout for
+    /// the service expires.
+    OnWatchdog,
+}
+
+impl From<RestartPolicy> for RestartOption {
+    fn from(restart_policy: RestartPolicy) -> Self {
+        match restart_policy {
+            RestartPolicy::Always => RestartOption::Always,
+            RestartPolicy::OnFailure => RestartOption::OnFailure,
+            RestartPolicy::Never => RestartOption::OnAbnormal,
+        }
+    }
+}
+
 /// A struct that represents an individual systemd unit
 #[derive(Clone, Debug)]
 pub struct SystemDUnit {
@@ -205,6 +279,8 @@ impl SystemDUnit {
 
         unit.set_property(Section::Service, "TimeoutStopSec", &termination_timeout);
 
+        unit.set_restart_option(RestartOption::from(restart_policy(&pod)));
+
         if let Some(user_name) = SystemDUnit::get_user_name_from_pod_security_context(pod)? {
             if !user_mode {
                 unit.set_property(Section::Service, "User", user_name);
@@ -216,6 +292,10 @@ impl SystemDUnit {
         Ok(unit)
     }
 
+    fn set_restart_option(&mut self, setting: RestartOption) {
+        self.set_property(Section::Service, "Restart", &setting.to_string());
+    }
+
     fn get_user_name_from_pod_security_context(pod: &Pod) -> Result<Option<&str>, StackableError> {
         let validate = |user_name| {
             if USER_NAME_PATTERN.is_match(user_name) {
@@ -494,6 +574,7 @@ mod test {
         "stackable.service",
         indoc! {"
             [Service]
+            Restart=always
             TimeoutStopSec=30
             User=pod-user"}
     )]
@@ -532,6 +613,7 @@ mod test {
             Environment="LOG_DIR=/var/log/default-stackable"
             Environment="LOG_LEVEL=INFO"
             ExecStart=start.sh arg /etc/default-stackable
+            Restart=always
             StandardError=journal
             StandardOutput=journal
             TimeoutStopSec=30
@@ -565,6 +647,7 @@ mod test {
 
             [Service]
             ExecStart=start.sh
+            Restart=always
             StandardError=journal
             StandardOutput=journal
             TimeoutStopSec=30
@@ -585,8 +668,26 @@ mod test {
         "stackable.service",
         indoc! {"
             [Service]
+            Restart=always
             TimeoutStopSec=10"}
     )]
+    #[case::set_restart_policy(
+        BusType::System,
+        "
+            apiVersion: v1
+            kind: Pod
+            metadata:
+              name: stackable
+            spec:
+              containers: []
+              restartPolicy: OnFailure",
+        "stackable.service",
+        indoc! {"
+            [Service]
+            Restart=on-failure
+            TimeoutStopSec=30"
+        }
+    )]
 
     fn create_unit_from_pod(
         #[case] bus_type: BusType,

From 89edf1034090c786b8a6efd49354fdbfc36e6efa Mon Sep 17 00:00:00 2001
From: Siegfried Weber <mail@siegfriedweber.net>
Date: Thu, 29 Jul 2021 19:40:09 +0200
Subject: [PATCH 03/14] Remove restart capability from pod state

The running state is no longer responsible for deciding if a service
should be restarted. This is done by systemd. Therefore the transition
from the running state to the starting state was removed.

The service_state function was added to the SystemdManager to check if a
service is running or terminated. This function is used for the
transition from the running to the terminated state and for patching the
container state.
---
 src/provider/states/pod/running.rs          | 54 ++++++++-------------
 src/provider/states/pod/starting.rs         |  6 +--
 src/provider/systemdmanager/service.rs      | 47 +++++++++++++-----
 src/provider/systemdmanager/systemd1_api.rs | 14 ++++++
 src/provider/systemdmanager/systemdunit.rs  | 36 ++++++++++++++
 5 files changed, 106 insertions(+), 51 deletions(-)

diff --git a/src/provider/states/pod/running.rs b/src/provider/states/pod/running.rs
index 6cd88aa..f83a2ff 100644
--- a/src/provider/states/pod/running.rs
+++ b/src/provider/states/pod/running.rs
@@ -11,17 +11,14 @@ use kubelet::{
 use log::{debug, info, trace, warn};
 use tokio::time::Duration;
 
-use super::{installing::Installing, starting::Starting, terminated::Terminated};
+use super::terminated::Terminated;
 use crate::provider::{
-    kubernetes::{
-        accessor::{restart_policy, RestartPolicy},
-        status::patch_container_status,
-    },
-    PodHandle, PodState, ProviderState,
+    kubernetes::status::patch_container_status, systemdmanager::service::ServiceState, PodHandle,
+    PodState, ProviderState,
 };
 
 #[derive(Debug, TransitionTo)]
-#[transition_to(Installing, Starting, Terminated)]
+#[transition_to(Terminated)]
 pub struct Running {
     pub transition_time: Time,
 }
@@ -66,7 +63,7 @@ impl State<PodState> for Running {
         // Interruption of this loop is triggered externally by the Krustlet code when
         //   - the pod which this state machine refers to gets deleted
         //   - Krustlet shuts down
-        while !running_containers.is_empty() && !container_failed {
+        while !running_containers.is_empty() {
             tokio::time::sleep(Duration::from_secs(10)).await;
             trace!(
                 "Checking if service {} is still running.",
@@ -79,23 +76,15 @@ impl State<PodState> for Running {
             for (container_key, container_handle) in running_containers.iter() {
                 let systemd_service = &container_handle.systemd_service;
 
-                match systemd_service.is_running().await {
-                    Ok(true) => {}
-                    Ok(false) => match systemd_service.failed().await {
-                        Ok(true) => failed_containers
-                            .push((container_key.to_owned(), container_handle.to_owned())),
-                        Ok(false) => succeeded_containers
-                            .push((container_key.to_owned(), container_handle.to_owned())),
-                        Err(dbus_error) => warn!(
-                            "Error querying Failed property for Unit [{}] of service [{}]: [{}]",
-                            systemd_service.file(),
-                            pod_state.service_name,
-                            dbus_error
-                        ),
-                    },
+                match systemd_service.service_state().await {
+                    Ok(ServiceState::Running) => {}
+                    Ok(ServiceState::Succeeded) => succeeded_containers
+                        .push((container_key.to_owned(), container_handle.to_owned())),
+                    Ok(ServiceState::Failed) => failed_containers
+                        .push((container_key.to_owned(), container_handle.to_owned())),
                     Err(dbus_error) => {
                         warn!(
-                            "Error querying ActiveState for Unit [{}] of service [{}]: [{}].",
+                            "Error querying state for unit [{}] of service [{}]: [{}].",
                             systemd_service.file(),
                             pod_state.service_name,
                             dbus_error
@@ -132,6 +121,7 @@ impl State<PodState> for Running {
                 )
                 .await;
                 running_containers.remove(container_key);
+                container_failed = true;
             }
 
             for container_handle in running_containers.values() {
@@ -141,20 +131,14 @@ impl State<PodState> for Running {
                     pod_state.service_name
                 );
             }
-
-            container_failed = !failed_containers.is_empty();
         }
 
-        if container_failed {
-            if restart_policy(&pod) == RestartPolicy::Never {
-                Transition::next(self, Terminated { successful: false })
-            } else {
-                debug!("Restart policy is set to restart, starting...");
-                Transition::next(self, Starting {})
-            }
-        } else {
-            Transition::next(self, Terminated { successful: true })
-        }
+        Transition::next(
+            self,
+            Terminated {
+                successful: !container_failed,
+            },
+        )
     }
 
     async fn status(&self, pod_state: &mut PodState, _pod: &Pod) -> anyhow::Result<PodStatus> {
diff --git a/src/provider/states/pod/starting.rs b/src/provider/states/pod/starting.rs
index 6ef61a2..7495c3f 100644
--- a/src/provider/states/pod/starting.rs
+++ b/src/provider/states/pod/starting.rs
@@ -4,7 +4,7 @@ use crate::provider::{
         accessor::{restart_policy, RestartPolicy},
         status::patch_container_status,
     },
-    systemdmanager::service::SystemdService,
+    systemdmanager::service::{ServiceState, SystemdService},
     PodHandle, PodState, ProviderState,
 };
 
@@ -73,7 +73,7 @@ async fn start_service_units(
     for (container_key, container_handle) in pod_handle.unwrap_or_default() {
         let systemd_service = &container_handle.systemd_service;
 
-        if systemd_service.is_running().await? {
+        if systemd_service.service_state().await? == ServiceState::Running {
             debug!(
                 "Unit [{}] for service [{}] is already running. Skip startup.",
                 systemd_service.file(),
@@ -125,7 +125,7 @@ async fn await_startup(systemd_service: &SystemdService, duration: Duration) ->
             systemd_service.file()
         );
 
-        if systemd_service.is_running().await? {
+        if systemd_service.service_state().await? == ServiceState::Running {
             debug!(
                 "Service [{}] still running after [{}] seconds",
                 systemd_service.file(),
diff --git a/src/provider/systemdmanager/service.rs b/src/provider/systemdmanager/service.rs
index 57a1b78..edf1c56 100644
--- a/src/provider/systemdmanager/service.rs
+++ b/src/provider/systemdmanager/service.rs
@@ -3,6 +3,18 @@ use super::systemd1_api::{ActiveState, AsyncManagerProxy, AsyncServiceProxy, Asy
 use crate::provider::systemdmanager::systemd1_api::ServiceResult;
 use anyhow::anyhow;
 
+/// ServiceState represents a coarse-grained state of the service unit.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ServiceState {
+    /// The service has not finished yet. Either it is currently running
+    /// or it is scheduled to run.
+    Running,
+    /// The service terminated successfully and will not be restarted.
+    Succeeded,
+    /// The service terminated unsuccessfully and will not be restarted.
+    Failed,
+}
+
 /// Stores proxies of a systemd unit and service
 #[derive(Clone, Debug)]
 pub struct SystemdService {
@@ -49,19 +61,28 @@ impl SystemdService {
         self.file.clone()
     }
 
-    /// Checks if the ActiveState is set to active.
-    pub async fn is_running(&self) -> anyhow::Result<bool> {
-        self.unit_proxy
-            .active_state()
-            .await
-            .map(|state| state == ActiveState::Active)
-            .map_err(|error| {
-                anyhow!(
-                    "ActiveState of systemd unit [{}] cannot be retrieved: {}",
-                    self.file,
-                    error
-                )
-            })
+    /// Returns a coarse-grained state of the service unit.
+    pub async fn service_state(&self) -> anyhow::Result<ServiceState> {
+        let active_state = self.unit_proxy.active_state().await?;
+
+        let service_state = match active_state {
+            ActiveState::Failed => ServiceState::Failed,
+            ActiveState::Active => {
+                let sub_state = self.unit_proxy.sub_state().await?;
+                // The service sub state `exited` is not explicitly
+                // documented but can be found in the source code of
+                // systemd:
+                // https://github.com/systemd/systemd/blob/v249/src/basic/unit-def.h#L133
+                if sub_state == "exited" {
+                    ServiceState::Succeeded
+                } else {
+                    ServiceState::Running
+                }
+            }
+            _ => ServiceState::Running,
+        };
+
+        Ok(service_state)
     }
 
     /// Checks if the result is not set to success.
diff --git a/src/provider/systemdmanager/systemd1_api.rs b/src/provider/systemdmanager/systemd1_api.rs
index 8a0cf23..c08a831 100644
--- a/src/provider/systemdmanager/systemd1_api.rs
+++ b/src/provider/systemdmanager/systemd1_api.rs
@@ -404,6 +404,20 @@ trait Unit {
     #[dbus_proxy(property)]
     fn active_state(&self) -> zbus::Result<ActiveState>;
 
+    /// SubState encodes states of the same state machine that
+    /// ActiveState covers, but knows more fine-grained states that are
+    /// unit-type-specific. Where ActiveState only covers six high-level
+    /// states, SubState covers possibly many more low-level
+    /// unit-type-specific states that are mapped to the six high-level
+    /// states. Note that multiple low-level states might map to the
+    /// same high-level state, but not vice versa. Not all high-level
+    /// states have low-level counterparts on all unit types. At this
+    /// point the low-level states are not documented here, and are more
+    /// likely to be extended later on than the common high-level
+    /// states.
+    #[dbus_proxy(property)]
+    fn sub_state(&self) -> zbus::Result<String>;
+
     /// Unique ID for a runtime cycle of a unit
     #[dbus_proxy(property, name = "InvocationID")]
     fn invocation_id(&self) -> zbus::Result<InvocationId>;
diff --git a/src/provider/systemdmanager/systemdunit.rs b/src/provider/systemdmanager/systemdunit.rs
index 754f1d8..c6a89ec 100644
--- a/src/provider/systemdmanager/systemdunit.rs
+++ b/src/provider/systemdmanager/systemdunit.rs
@@ -40,6 +40,24 @@ lazy_static! {
         Regex::new("^[a-zA-Z_][a-zA-Z0-9_-]{0,30}$").unwrap();
 }
 
+/// Boolean arguments used in unit files
+#[derive(Clone, Debug, Display, Eq, PartialEq)]
+#[strum(serialize_all = "kebab-case")]
+pub enum Boolean {
+    Yes,
+    No,
+}
+
+impl From<bool> for Boolean {
+    fn from(value: bool) -> Self {
+        if value {
+            Boolean::Yes
+        } else {
+            Boolean::No
+        }
+    }
+}
+
 /// Configures whether the service shall be restarted when the service
 /// process exits, is killed, or a timeout is reached.
 ///
@@ -281,6 +299,11 @@ impl SystemDUnit {
 
         unit.set_restart_option(RestartOption::from(restart_policy(&pod)));
 
+        // systemd should not remove the unit on its own when it
+        // terminated successfully, so that the agent can check the
+        // outcome and patch the contaner status accordingly.
+        unit.set_remain_after_exit_option(Boolean::Yes);
+
         if let Some(user_name) = SystemDUnit::get_user_name_from_pod_security_context(pod)? {
             if !user_mode {
                 unit.set_property(Section::Service, "User", user_name);
@@ -292,10 +315,18 @@ impl SystemDUnit {
         Ok(unit)
     }
 
+    /// Configures whether the service shall be restarted when the
+    /// service process exits, is killed, or a timeout is reached.
     fn set_restart_option(&mut self, setting: RestartOption) {
         self.set_property(Section::Service, "Restart", &setting.to_string());
     }
 
+    /// Causes systemd to consider the unit to be active if the start
+    /// action exited successfully.
+    fn set_remain_after_exit_option(&mut self, setting: Boolean) {
+        self.set_property(Section::Service, "RemainAfterExit", &setting.to_string());
+    }
+
     fn get_user_name_from_pod_security_context(pod: &Pod) -> Result<Option<&str>, StackableError> {
         let validate = |user_name| {
             if USER_NAME_PATTERN.is_match(user_name) {
@@ -574,6 +605,7 @@ mod test {
         "stackable.service",
         indoc! {"
             [Service]
+            RemainAfterExit=yes
             Restart=always
             TimeoutStopSec=30
             User=pod-user"}
@@ -613,6 +645,7 @@ mod test {
             Environment="LOG_DIR=/var/log/default-stackable"
             Environment="LOG_LEVEL=INFO"
             ExecStart=start.sh arg /etc/default-stackable
+            RemainAfterExit=yes
             Restart=always
             StandardError=journal
             StandardOutput=journal
@@ -647,6 +680,7 @@ mod test {
 
             [Service]
             ExecStart=start.sh
+            RemainAfterExit=yes
             Restart=always
             StandardError=journal
             StandardOutput=journal
@@ -668,6 +702,7 @@ mod test {
         "stackable.service",
         indoc! {"
             [Service]
+            RemainAfterExit=yes
             Restart=always
             TimeoutStopSec=10"}
     )]
@@ -684,6 +719,7 @@ mod test {
         "stackable.service",
         indoc! {"
             [Service]
+            RemainAfterExit=yes
             Restart=on-failure
             TimeoutStopSec=30"
         }

From 71558d8cd167f69b99869f3347193a73f013c347 Mon Sep 17 00:00:00 2001
From: Siegfried Weber <mail@siegfriedweber.net>
Date: Fri, 30 Jul 2021 14:41:19 +0200
Subject: [PATCH 04/14] Service state "Created" added

---
 src/provider/states/pod/running.rs          | 10 +++-
 src/provider/states/pod/starting.rs         |  4 +-
 src/provider/systemdmanager/service.rs      | 55 ++++++++++++++++-----
 src/provider/systemdmanager/systemd1_api.rs | 11 +++++
 src/provider/systemdmanager/systemdunit.rs  |  6 +--
 5 files changed, 67 insertions(+), 19 deletions(-)

diff --git a/src/provider/states/pod/running.rs b/src/provider/states/pod/running.rs
index f83a2ff..b386c27 100644
--- a/src/provider/states/pod/running.rs
+++ b/src/provider/states/pod/running.rs
@@ -77,7 +77,15 @@ impl State<PodState> for Running {
                 let systemd_service = &container_handle.systemd_service;
 
                 match systemd_service.service_state().await {
-                    Ok(ServiceState::Running) => {}
+                    Ok(ServiceState::Created) => {
+                        warn!(
+                            "The unit [{}] of service [{}] was not started. \
+                            This should not happen. Ignoring this state for now.",
+                            systemd_service.file(),
+                            pod_state.service_name
+                        );
+                    }
+                    Ok(ServiceState::Started) => {}
                     Ok(ServiceState::Succeeded) => succeeded_containers
                         .push((container_key.to_owned(), container_handle.to_owned())),
                     Ok(ServiceState::Failed) => failed_containers
diff --git a/src/provider/states/pod/starting.rs b/src/provider/states/pod/starting.rs
index 7495c3f..9cdd106 100644
--- a/src/provider/states/pod/starting.rs
+++ b/src/provider/states/pod/starting.rs
@@ -73,7 +73,7 @@ async fn start_service_units(
     for (container_key, container_handle) in pod_handle.unwrap_or_default() {
         let systemd_service = &container_handle.systemd_service;
 
-        if systemd_service.service_state().await? == ServiceState::Running {
+        if systemd_service.service_state().await? == ServiceState::Started {
             debug!(
                 "Unit [{}] for service [{}] is already running. Skip startup.",
                 systemd_service.file(),
@@ -125,7 +125,7 @@ async fn await_startup(systemd_service: &SystemdService, duration: Duration) ->
             systemd_service.file()
         );
 
-        if systemd_service.service_state().await? == ServiceState::Running {
+        if systemd_service.service_state().await? == ServiceState::Started {
             debug!(
                 "Service [{}] still running after [{}] seconds",
                 systemd_service.file(),
diff --git a/src/provider/systemdmanager/service.rs b/src/provider/systemdmanager/service.rs
index edf1c56..7071a07 100644
--- a/src/provider/systemdmanager/service.rs
+++ b/src/provider/systemdmanager/service.rs
@@ -1,14 +1,18 @@
 //! Exposes methods from the systemd unit and service interfaces.
-use super::systemd1_api::{ActiveState, AsyncManagerProxy, AsyncServiceProxy, AsyncUnitProxy};
+use super::systemd1_api::{
+    ActiveState, AsyncManagerProxy, AsyncServiceProxy, AsyncUnitProxy, SUB_STATE_SERVICE_EXITED,
+};
 use crate::provider::systemdmanager::systemd1_api::ServiceResult;
 use anyhow::anyhow;
 
 /// ServiceState represents a coarse-grained state of the service unit.
 #[derive(Clone, Debug, Eq, PartialEq)]
+/// Represents the state of a service unit object.
 pub enum ServiceState {
-    /// The service has not finished yet. Either it is currently running
-    /// or it is scheduled to run.
-    Running,
+    /// The service was not started yet.
+    Created,
+    /// The service was started and is currently running or restarting.
+    Started,
     /// The service terminated successfully and will not be restarted.
     Succeeded,
     /// The service terminated unsuccessfully and will not be restarted.
@@ -61,25 +65,50 @@ impl SystemdService {
         self.file.clone()
     }
 
-    /// Returns a coarse-grained state of the service unit.
+    /// Returns a coarse-grained state of the service unit object.
+    ///
+    /// It is assumed that RemainAfterExit is set to "yes" in the given
+    /// unit. Otherwise it would not be possible to distinguish between
+    /// "inactive and never run" and "inactive and terminated
+    /// successfully".
     pub async fn service_state(&self) -> anyhow::Result<ServiceState> {
         let active_state = self.unit_proxy.active_state().await?;
 
         let service_state = match active_state {
-            ActiveState::Failed => ServiceState::Failed,
+            ActiveState::Inactive => {
+                // ActiveState "inactive" means in general that the
+                // previous run was successful or no previous run has
+                // taken place yet. If RemainAfterExit is set to "yes"
+                // then a successfully terminated service stays in
+                // ActiveState "active" and only a service which was not
+                // started before is in ActiveState "inactive". It is
+                // assumed here that RemainAfterExit is enabled.
+                ServiceState::Created
+            }
             ActiveState::Active => {
                 let sub_state = self.unit_proxy.sub_state().await?;
-                // The service sub state `exited` is not explicitly
-                // documented but can be found in the source code of
-                // systemd:
-                // https://github.com/systemd/systemd/blob/v249/src/basic/unit-def.h#L133
-                if sub_state == "exited" {
+                if sub_state == SUB_STATE_SERVICE_EXITED {
+                    // The service terminated successfully (otherwise
+                    // ActiveState would be set to "failed") and will
+                    // not be restarted (otherwise ActiveState would be
+                    // set to "activating") and RemainAfterExit is set
+                    // to "yes" (otherwise ActiveState would be set to
+                    // "inactive"). It is assumed here that
+                    // RemainAfterExit is enabled.
                     ServiceState::Succeeded
                 } else {
-                    ServiceState::Running
+                    ServiceState::Started
                 }
             }
-            _ => ServiceState::Running,
+            ActiveState::Failed => {
+                // The service terminated unsuccessfully and will not be
+                // restarted (otherwise ActiveState would be set to
+                // "activating").
+                ServiceState::Failed
+            }
+            ActiveState::Reloading => ServiceState::Started,
+            ActiveState::Activating => ServiceState::Started,
+            ActiveState::Deactivating => ServiceState::Started,
         };
 
         Ok(service_state)
diff --git a/src/provider/systemdmanager/systemd1_api.rs b/src/provider/systemdmanager/systemd1_api.rs
index c08a831..609b0cb 100644
--- a/src/provider/systemdmanager/systemd1_api.rs
+++ b/src/provider/systemdmanager/systemd1_api.rs
@@ -1,4 +1,7 @@
 //! Binding to the D-Bus interface of systemd
+//!
+//! Further documentation can be found in the
+//! [manual](https://www.freedesktop.org/software/systemd/man/org.freedesktop.systemd1).
 use fmt::Display;
 use inflector::cases::kebabcase;
 use serde::{de::Visitor, Deserialize, Serialize};
@@ -368,6 +371,11 @@ pub enum ActiveState {
 
 impl_tryfrom_ownedvalue_for_enum!(ActiveState);
 
+/// Sub state of a service unit object which is set if the service
+/// terminated successfully but is still active due to the
+/// RemainAfterExit setting.
+pub const SUB_STATE_SERVICE_EXITED: &str = "exited";
+
 /// Unique ID for a runtime cycle of a unit
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct InvocationId(Vec<u8>);
@@ -415,6 +423,9 @@ trait Unit {
     /// point the low-level states are not documented here, and are more
     /// likely to be extended later on than the common high-level
     /// states.
+    ///
+    /// Possible sub states can be found in the source code of systemd:
+    /// https://github.com/systemd/systemd/blob/v249/src/basic/unit-def.h
     #[dbus_proxy(property)]
     fn sub_state(&self) -> zbus::Result<String>;
 
diff --git a/src/provider/systemdmanager/systemdunit.rs b/src/provider/systemdmanager/systemdunit.rs
index c6a89ec..187bc37 100644
--- a/src/provider/systemdmanager/systemdunit.rs
+++ b/src/provider/systemdmanager/systemdunit.rs
@@ -299,9 +299,9 @@ impl SystemDUnit {
 
         unit.set_restart_option(RestartOption::from(restart_policy(&pod)));
 
-        // systemd should not remove the unit on its own when it
-        // terminated successfully, so that the agent can check the
-        // outcome and patch the contaner status accordingly.
+        // Setting RemainAfterExit to "yes" is necessary to reliably
+        // determine the state of the service unit object, see
+        // manager::SystemdManager::service_state.
         unit.set_remain_after_exit_option(Boolean::Yes);
 
         if let Some(user_name) = SystemDUnit::get_user_name_from_pod_security_context(pod)? {

From fe40635a03bf6256c58790c96c3a80f86197ecce Mon Sep 17 00:00:00 2001
From: Siegfried Weber <mail@siegfriedweber.net>
Date: Fri, 30 Jul 2021 15:28:36 +0200
Subject: [PATCH 05/14] Waiting after systemd service startup removed

When a systemd service is started then the agent does not wait anymore
ten seconds to check if it was successfully started because systemd
manages restarts now and the agent cannot detect if the service is in a
restart loop.
---
 src/provider/states/pod/starting.rs    | 72 ++++----------------------
 src/provider/systemdmanager/service.rs |  3 +-
 2 files changed, 12 insertions(+), 63 deletions(-)

diff --git a/src/provider/states/pod/starting.rs b/src/provider/states/pod/starting.rs
index 9cdd106..e3b4a89 100644
--- a/src/provider/states/pod/starting.rs
+++ b/src/provider/states/pod/starting.rs
@@ -1,14 +1,10 @@
 use super::running::Running;
 use crate::provider::{
-    kubernetes::{
-        accessor::{restart_policy, RestartPolicy},
-        status::patch_container_status,
-    },
-    systemdmanager::service::{ServiceState, SystemdService},
-    PodHandle, PodState, ProviderState,
+    kubernetes::status::patch_container_status, systemdmanager::service::ServiceState, PodHandle,
+    PodState, ProviderState,
 };
 
-use anyhow::{anyhow, Result};
+use anyhow::Result;
 use kube::{
     api::{Patch, PatchParams},
     Api, Client,
@@ -17,8 +13,6 @@ use kubelet::pod::{Pod, PodKey};
 use kubelet::{container::Status, pod::state::prelude::*};
 use log::{debug, error, info};
 use serde_json::json;
-use std::time::Instant;
-use tokio::time::{self, Duration};
 
 #[derive(Default, Debug, TransitionTo)]
 #[transition_to(Running)]
@@ -50,9 +44,7 @@ impl State<PodState> for Starting {
 
 /// Starts the service units for the containers of the given pod.
 ///
-/// The units are started and enabled if they are not already running.
-/// The startup is considered successful if the unit is still running
-/// after 10 seconds.
+/// The units are started and enabled if they were not already started.
 async fn start_service_units(
     shared: SharedState<ProviderState>,
     pod_state: &PodState,
@@ -72,32 +64,19 @@ async fn start_service_units(
 
     for (container_key, container_handle) in pod_handle.unwrap_or_default() {
         let systemd_service = &container_handle.systemd_service;
+        let service_unit = &container_handle.service_unit;
 
-        if systemd_service.service_state().await? == ServiceState::Started {
-            debug!(
-                "Unit [{}] for service [{}] is already running. Skip startup.",
-                systemd_service.file(),
-                &pod_state.service_name
-            );
-        } else {
-            let service_unit = &container_handle.service_unit;
-
+        if systemd_service.service_state().await? == ServiceState::Created {
             info!("Starting systemd unit [{}]", service_unit);
             systemd_manager.start(service_unit).await?;
 
             info!("Enabling systemd unit [{}]", service_unit);
             systemd_manager.enable(service_unit).await?;
-
-            if restart_policy(pod) == RestartPolicy::Always {
-                // TODO: does this need to be configurable, or ar we happy with a hard coded value
-                //  for now. I've briefly looked at the podspec and couldn't identify a good field
-                //  to use for this - also, currently this starts containers (= systemd units) in
-                //  order and waits 10 seconds for every unit, so a service with five containers
-                //  would take 50 seconds until it reported running - which is totally fine in case
-                //  the units actually depend on each other, but a case could be made for waiting
-                //  once at the end
-                await_startup(systemd_service, Duration::from_secs(10)).await?;
-            }
+        } else {
+            debug!(
+                "Unit [{}] for service [{}] was already started. Skipping startup.",
+                service_unit, &pod_state.service_name
+            );
         }
 
         add_annotation(
@@ -114,35 +93,6 @@ async fn start_service_units(
     Ok(())
 }
 
-/// Checks if the given service unit is still running after the given duration.
-async fn await_startup(systemd_service: &SystemdService, duration: Duration) -> Result<()> {
-    let start_time = Instant::now();
-    while start_time.elapsed() < duration {
-        time::sleep(Duration::from_secs(1)).await;
-
-        debug!(
-            "Checking if unit [{}] is still up and running.",
-            systemd_service.file()
-        );
-
-        if systemd_service.service_state().await? == ServiceState::Started {
-            debug!(
-                "Service [{}] still running after [{}] seconds",
-                systemd_service.file(),
-                start_time.elapsed().as_secs()
-            );
-        } else {
-            return Err(anyhow!(
-                "Unit [{}] stopped unexpectedly during startup after [{}] seconds.",
-                systemd_service.file(),
-                start_time.elapsed().as_secs()
-            ));
-        }
-    }
-
-    Ok(())
-}
-
 /// Adds an annotation to the given pod.
 ///
 /// If there is already an annotation with the given key then the value
diff --git a/src/provider/systemdmanager/service.rs b/src/provider/systemdmanager/service.rs
index 7071a07..10da46a 100644
--- a/src/provider/systemdmanager/service.rs
+++ b/src/provider/systemdmanager/service.rs
@@ -5,9 +5,8 @@ use super::systemd1_api::{
 use crate::provider::systemdmanager::systemd1_api::ServiceResult;
 use anyhow::anyhow;
 
-/// ServiceState represents a coarse-grained state of the service unit.
-#[derive(Clone, Debug, Eq, PartialEq)]
 /// Represents the state of a service unit object.
+#[derive(Clone, Debug, Eq, PartialEq)]
 pub enum ServiceState {
     /// The service was not started yet.
     Created,

From b53d5f5ea08989f8f674ff4bdc7be7a74a8a62c8 Mon Sep 17 00:00:00 2001
From: Siegfried Weber <mail@siegfriedweber.net>
Date: Fri, 30 Jul 2021 15:58:57 +0200
Subject: [PATCH 06/14] Set RestartSec to 2 in unit files

---
 src/provider/systemdmanager/systemdunit.rs | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/provider/systemdmanager/systemdunit.rs b/src/provider/systemdmanager/systemdunit.rs
index 187bc37..076d168 100644
--- a/src/provider/systemdmanager/systemdunit.rs
+++ b/src/provider/systemdmanager/systemdunit.rs
@@ -299,6 +299,10 @@ impl SystemDUnit {
 
         unit.set_restart_option(RestartOption::from(restart_policy(&pod)));
 
+        // Relieve the machine a little bit on restart loops but choose
+        // a moderate value so that tests are not slowed down too much.
+        unit.set_restart_sec_option(2);
+
         // Setting RemainAfterExit to "yes" is necessary to reliably
         // determine the state of the service unit object, see
         // manager::SystemdManager::service_state.
@@ -321,6 +325,13 @@ impl SystemDUnit {
         self.set_property(Section::Service, "Restart", &setting.to_string());
     }
 
+    /// Configures the time to sleep in seconds before restarting a
+    /// service (as configured with [set_restart_option]). Defaults to
+    /// 100ms.
+    fn set_restart_sec_option(&mut self, seconds: u32) {
+        self.set_property(Section::Service, "RestartSec", &seconds.to_string());
+    }
+
     /// Causes systemd to consider the unit to be active if the start
     /// action exited successfully.
     fn set_remain_after_exit_option(&mut self, setting: Boolean) {

From 1df17da8282811c7f533fcfacce7f9febef40890 Mon Sep 17 00:00:00 2001
From: Siegfried Weber <mail@siegfriedweber.net>
Date: Fri, 30 Jul 2021 16:19:30 +0200
Subject: [PATCH 07/14] Start rate limiting of systemd units disabled

---
 src/provider/systemdmanager/systemdunit.rs | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/provider/systemdmanager/systemdunit.rs b/src/provider/systemdmanager/systemdunit.rs
index 076d168..32cea8a 100644
--- a/src/provider/systemdmanager/systemdunit.rs
+++ b/src/provider/systemdmanager/systemdunit.rs
@@ -303,6 +303,10 @@ impl SystemDUnit {
         // a moderate value so that tests are not slowed down too much.
         unit.set_restart_sec_option(2);
 
+        // Adhere to the given restart policy and do not limit the
+        // number of restarts.
+        unit.set_start_limit_interval_sec_option(0);
+
         // Setting RemainAfterExit to "yes" is necessary to reliably
         // determine the state of the service unit object, see
         // manager::SystemdManager::service_state.
@@ -332,6 +336,15 @@ impl SystemDUnit {
         self.set_property(Section::Service, "RestartSec", &seconds.to_string());
     }
 
+    /// Configures unit start rate limiting. Units which are started too
+    /// often within the given time span are not permitted to start any
+    /// more. The allowed number of restarts can be set with
+    /// "StartLimitBurst". May be set to 0 to disable any kind of rate
+    /// limiting.
+    fn set_start_limit_interval_sec_option(&mut self, seconds: u32) {
+        self.set_property(Section::Unit, "StartLimitIntervalSec", &seconds.to_string());
+    }
+
     /// Causes systemd to consider the unit to be active if the start
     /// action exited successfully.
     fn set_remain_after_exit_option(&mut self, setting: Boolean) {

From 12676bc0c9e30eef4e3e8684b774de5d4bdfd17a Mon Sep 17 00:00:00 2001
From: Siegfried Weber <mail@siegfriedweber.net>
Date: Fri, 30 Jul 2021 16:33:26 +0200
Subject: [PATCH 08/14] Tests fixed

---
 src/provider/systemdmanager/systemdunit.rs | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/provider/systemdmanager/systemdunit.rs b/src/provider/systemdmanager/systemdunit.rs
index 32cea8a..1b1ae53 100644
--- a/src/provider/systemdmanager/systemdunit.rs
+++ b/src/provider/systemdmanager/systemdunit.rs
@@ -628,9 +628,13 @@ mod test {
                   runAsUserName: pod-user",
         "stackable.service",
         indoc! {"
+            [Unit]
+            StartLimitIntervalSec=0
+
             [Service]
             RemainAfterExit=yes
             Restart=always
+            RestartSec=2
             TimeoutStopSec=30
             User=pod-user"}
     )]
@@ -664,6 +668,7 @@ mod test {
         indoc! {r#"
             [Unit]
             Description=default-stackable-test-container
+            StartLimitIntervalSec=0
 
             [Service]
             Environment="LOG_DIR=/var/log/default-stackable"
@@ -671,6 +676,7 @@ mod test {
             ExecStart=start.sh arg /etc/default-stackable
             RemainAfterExit=yes
             Restart=always
+            RestartSec=2
             StandardError=journal
             StandardOutput=journal
             TimeoutStopSec=30
@@ -701,11 +707,13 @@ mod test {
         indoc! {r#"
             [Unit]
             Description=default-stackable-test-container
+            StartLimitIntervalSec=0
 
             [Service]
             ExecStart=start.sh
             RemainAfterExit=yes
             Restart=always
+            RestartSec=2
             StandardError=journal
             StandardOutput=journal
             TimeoutStopSec=30
@@ -725,9 +733,13 @@ mod test {
               containers: []",
         "stackable.service",
         indoc! {"
+            [Unit]
+            StartLimitIntervalSec=0
+
             [Service]
             RemainAfterExit=yes
             Restart=always
+            RestartSec=2
             TimeoutStopSec=10"}
     )]
     #[case::set_restart_policy(
@@ -742,9 +754,13 @@ mod test {
               restartPolicy: OnFailure",
         "stackable.service",
         indoc! {"
+            [Unit]
+            StartLimitIntervalSec=0
+
             [Service]
             RemainAfterExit=yes
             Restart=on-failure
+            RestartSec=2
             TimeoutStopSec=30"
         }
     )]

From 39516c9fa65608b0d43a96cc6269ca5f37627956 Mon Sep 17 00:00:00 2001
From: Siegfried Weber <mail@siegfriedweber.net>
Date: Fri, 30 Jul 2021 16:39:21 +0200
Subject: [PATCH 09/14] Changelog updated

---
 CHANGELOG.adoc | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc
index 9173c28..04e451d 100644
--- a/CHANGELOG.adoc
+++ b/CHANGELOG.adoc
@@ -23,6 +23,13 @@
   but not any longer with versions prior to v1.19 ({267}).
 * Error message improved which is logged if a systemd unit file cannot
   be created ({276}).
+* Handling of service restarts moved from the Stackable agent to
+  systemd.
+
+=== Removed
+* Check removed if a service starts up correctly within 10 seconds.
+  systemd manages restarts now and the Stackable agent cannot detect if
+  a service is in a restart loop.
 
 == 0.5.0 - 2021-07-26
 

From e5748b9ffe3dcf26c6638dafd251a2ed56a09563 Mon Sep 17 00:00:00 2001
From: Siegfried Weber <mail@siegfriedweber.net>
Date: Fri, 30 Jul 2021 16:56:44 +0200
Subject: [PATCH 10/14] Remove the waiting phase also from the docs

---
 docs/modules/ROOT/pages/services.adoc | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/docs/modules/ROOT/pages/services.adoc b/docs/modules/ROOT/pages/services.adoc
index d743877..858a1ab 100644
--- a/docs/modules/ROOT/pages/services.adoc
+++ b/docs/modules/ROOT/pages/services.adoc
@@ -15,7 +15,3 @@ A pod which provides a service should never terminate on its own, so the
           command:
             - <service-command>
       restartPolicy: Always
-
-After a container command is executed the agent waits for 10 seconds
-before the container status is set to running. When all containers are
-running, also the pod phase is switched from `Pending` to `Running`.

From 72abc6feca7fdb196fd908a3837847b2c4b76836 Mon Sep 17 00:00:00 2001
From: Siegfried Weber <mail@siegfriedweber.net>
Date: Thu, 5 Aug 2021 12:12:26 +0200
Subject: [PATCH 11/14] Set restart count in the container status

---
 Cargo.lock                                  |  1 +
 Cargo.toml                                  |  1 +
 src/provider/kubernetes/status.rs           | 42 ++++++++++++++++++++-
 src/provider/states/pod/running.rs          | 21 +++++++++--
 src/provider/systemdmanager/service.rs      | 21 +++--------
 src/provider/systemdmanager/systemd1_api.rs |  4 ++
 src/provider/systemdmanager/systemdunit.rs  | 28 +++++++++-----
 7 files changed, 89 insertions(+), 29 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index f61b39f..3524532 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2834,6 +2834,7 @@ dependencies = [
  "handlebars",
  "hostname",
  "indoc",
+ "json-patch",
  "k8s-openapi",
  "krator",
  "kube",
diff --git a/Cargo.toml b/Cargo.toml
index fb5788c..776dc59 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -25,6 +25,7 @@ krator = { git = "https://github.com/stackabletech/krustlet.git", branch = "stac
 kube = { version= "0.48", default-features = false, features = ["derive", "native-tls"] }
 kubelet = { git = "https://github.com/stackabletech/krustlet.git", branch = "stackable_patches_v0.7.0", default-features = true, features= ["derive", "cli"] } # version = "0.7"
 Inflector = "0.11"
+json-patch = "0.2"
 lazy_static = "1.4"
 log = "0.4"
 multimap = "0.8"
diff --git a/src/provider/kubernetes/status.rs b/src/provider/kubernetes/status.rs
index fc384de..3b356f2 100644
--- a/src/provider/kubernetes/status.rs
+++ b/src/provider/kubernetes/status.rs
@@ -1,7 +1,11 @@
 //! Functions for patching the pod status
 
+use anyhow::anyhow;
 use k8s_openapi::api::core::v1::Pod as KubePod;
-use kube::{Api, Client};
+use kube::{
+    api::{Patch, PatchParams},
+    Api, Client,
+};
 use kubelet::{
     container::{ContainerKey, Status},
     pod::Pod,
@@ -30,3 +34,39 @@ pub async fn patch_container_status(
         );
     }
 }
+
+/// Patches the restart count of a container.
+pub async fn patch_restart_count(
+    client: &Client,
+    pod: &Pod,
+    container_key: &ContainerKey,
+    restart_count: u32,
+) -> anyhow::Result<()> {
+    let api: Api<KubePod> = Api::namespaced(client.clone(), pod.namespace());
+
+    let index = pod
+        .container_status_index(container_key)
+        .ok_or_else(|| anyhow!("Container not found"))?;
+
+    let container_type = if container_key.is_init() {
+        "initContainer"
+    } else {
+        "container"
+    };
+
+    let patch = json_patch::Patch(vec![json_patch::PatchOperation::Replace(
+        json_patch::ReplaceOperation {
+            path: format!("/status/{}Statuses/{}/restartCount", container_type, index),
+            value: restart_count.into(),
+        },
+    )]);
+
+    api.patch_status(
+        pod.name(),
+        &PatchParams::default(),
+        &Patch::<()>::Json(patch),
+    )
+    .await?;
+
+    Ok(())
+}
diff --git a/src/provider/states/pod/running.rs b/src/provider/states/pod/running.rs
index b386c27..2785ba5 100644
--- a/src/provider/states/pod/running.rs
+++ b/src/provider/states/pod/running.rs
@@ -13,8 +13,9 @@ use tokio::time::Duration;
 
 use super::terminated::Terminated;
 use crate::provider::{
-    kubernetes::status::patch_container_status, systemdmanager::service::ServiceState, PodHandle,
-    PodState, ProviderState,
+    kubernetes::status::{patch_container_status, patch_restart_count},
+    systemdmanager::service::ServiceState,
+    PodHandle, PodState, ProviderState,
 };
 
 #[derive(Debug, TransitionTo)]
@@ -132,12 +133,26 @@ impl State<PodState> for Running {
                 container_failed = true;
             }
 
-            for container_handle in running_containers.values() {
+            for (container_key, container_handle) in running_containers.iter() {
                 trace!(
                     "Unit [{}] of service [{}] still running ...",
                     container_handle.service_unit,
                     pod_state.service_name
                 );
+
+                match container_handle.systemd_service.restart_count().await {
+                    Ok(restart_count) => {
+                        if let Err(error) =
+                            patch_restart_count(&client, &pod, container_key, restart_count).await
+                        {
+                            warn!("Could not patch restart count: {}", error);
+                        }
+                    }
+                    Err(error) => warn!(
+                        "Could retrieve restart count from unit [{}]: {}",
+                        container_handle.service_unit, error
+                    ),
+                }
             }
         }
 
diff --git a/src/provider/systemdmanager/service.rs b/src/provider/systemdmanager/service.rs
index 10da46a..2804be8 100644
--- a/src/provider/systemdmanager/service.rs
+++ b/src/provider/systemdmanager/service.rs
@@ -2,7 +2,6 @@
 use super::systemd1_api::{
     ActiveState, AsyncManagerProxy, AsyncServiceProxy, AsyncUnitProxy, SUB_STATE_SERVICE_EXITED,
 };
-use crate::provider::systemdmanager::systemd1_api::ServiceResult;
 use anyhow::anyhow;
 
 /// Represents the state of a service unit object.
@@ -67,9 +66,9 @@ impl SystemdService {
     /// Returns a coarse-grained state of the service unit object.
     ///
     /// It is assumed that RemainAfterExit is set to "yes" in the given
-    /// unit. Otherwise it would not be possible to distinguish between
-    /// "inactive and never run" and "inactive and terminated
-    /// successfully".
+    /// unit if the service can terminate. Otherwise it would not be
+    /// possible to distinguish between "inactive and never run" and
+    /// "inactive and terminated successfully".
     pub async fn service_state(&self) -> anyhow::Result<ServiceState> {
         let active_state = self.unit_proxy.active_state().await?;
 
@@ -113,19 +112,11 @@ impl SystemdService {
         Ok(service_state)
     }
 
-    /// Checks if the result is not set to success.
-    pub async fn failed(&self) -> anyhow::Result<bool> {
+    pub async fn restart_count(&self) -> anyhow::Result<u32> {
         self.service_proxy
-            .result()
+            .nrestarts()
             .await
-            .map(|state| state != ServiceResult::Success)
-            .map_err(|error| {
-                anyhow!(
-                    "Result of systemd unit [{}] cannot be retrieved: {}",
-                    self.file,
-                    error
-                )
-            })
+            .map_err(|e| anyhow!("Error receiving NRestarts of unit [{}]. {}", self.file, e))
     }
 
     /// Retrieves the current invocation ID.
diff --git a/src/provider/systemdmanager/systemd1_api.rs b/src/provider/systemdmanager/systemd1_api.rs
index 609b0cb..c085fdb 100644
--- a/src/provider/systemdmanager/systemd1_api.rs
+++ b/src/provider/systemdmanager/systemd1_api.rs
@@ -507,6 +507,10 @@ trait Service {
     /// state (see ['ActiveState::Failed`]).
     #[dbus_proxy(property)]
     fn result(&self) -> zbus::Result<ServiceResult>;
+
+    /// Number of restarts
+    #[dbus_proxy(property, name = "NRestarts")]
+    fn nrestarts(&self) -> zbus::Result<u32>;
 }
 
 /// A systemd job object
diff --git a/src/provider/systemdmanager/systemdunit.rs b/src/provider/systemdmanager/systemdunit.rs
index 1b1ae53..906635c 100644
--- a/src/provider/systemdmanager/systemdunit.rs
+++ b/src/provider/systemdmanager/systemdunit.rs
@@ -297,7 +297,8 @@ impl SystemDUnit {
 
         unit.set_property(Section::Service, "TimeoutStopSec", &termination_timeout);
 
-        unit.set_restart_option(RestartOption::from(restart_policy(&pod)));
+        let restart_option = RestartOption::from(restart_policy(pod));
+        unit.set_restart_option(&restart_option);
 
         // Relieve the machine a little bit on restart loops but choose
         // a moderate value so that tests are not slowed down too much.
@@ -307,10 +308,17 @@ impl SystemDUnit {
         // number of restarts.
         unit.set_start_limit_interval_sec_option(0);
 
-        // Setting RemainAfterExit to "yes" is necessary to reliably
-        // determine the state of the service unit object, see
-        // manager::SystemdManager::service_state.
-        unit.set_remain_after_exit_option(Boolean::Yes);
+        // If the service can terminate successfully then
+        // RemainAfterExit must be set to "yes" so that the state of the
+        // service unit object can be reliably determined after
+        // termination, see manager::SystemdManager::service_state.
+        //
+        // If Restart is set to "always" then the service cannot
+        // terminate and there is no need to determine the state after
+        // termination. Furthermore RemainAfterExit must not be set
+        // because otherwise the Restart option would be ignored when
+        // the service returns a successful return code.
+        unit.set_remain_after_exit_option((restart_option != RestartOption::Always).into());
 
         if let Some(user_name) = SystemDUnit::get_user_name_from_pod_security_context(pod)? {
             if !user_mode {
@@ -325,7 +333,7 @@ impl SystemDUnit {
 
     /// Configures whether the service shall be restarted when the
     /// service process exits, is killed, or a timeout is reached.
-    fn set_restart_option(&mut self, setting: RestartOption) {
+    fn set_restart_option(&mut self, setting: &RestartOption) {
         self.set_property(Section::Service, "Restart", &setting.to_string());
     }
 
@@ -632,7 +640,7 @@ mod test {
             StartLimitIntervalSec=0
 
             [Service]
-            RemainAfterExit=yes
+            RemainAfterExit=no
             Restart=always
             RestartSec=2
             TimeoutStopSec=30
@@ -674,7 +682,7 @@ mod test {
             Environment="LOG_DIR=/var/log/default-stackable"
             Environment="LOG_LEVEL=INFO"
             ExecStart=start.sh arg /etc/default-stackable
-            RemainAfterExit=yes
+            RemainAfterExit=no
             Restart=always
             RestartSec=2
             StandardError=journal
@@ -711,7 +719,7 @@ mod test {
 
             [Service]
             ExecStart=start.sh
-            RemainAfterExit=yes
+            RemainAfterExit=no
             Restart=always
             RestartSec=2
             StandardError=journal
@@ -737,7 +745,7 @@ mod test {
             StartLimitIntervalSec=0
 
             [Service]
-            RemainAfterExit=yes
+            RemainAfterExit=no
             Restart=always
             RestartSec=2
             TimeoutStopSec=10"}

From 68731bbf8fb17619dee0bd54f3126f6f072a8497 Mon Sep 17 00:00:00 2001
From: Siegfried Weber <mail@siegfriedweber.net>
Date: Tue, 17 Aug 2021 16:51:32 +0200
Subject: [PATCH 12/14] Add pull request to the changelog

---
 CHANGELOG.adoc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc
index 04e451d..b1606c5 100644
--- a/CHANGELOG.adoc
+++ b/CHANGELOG.adoc
@@ -1,5 +1,7 @@
 = Changelog
 
+:263: https://github.com/stackabletech/agent/pull/263[#263]
+
 == 0.6.0 - unreleased
 
 :262: https://github.com/stackabletech/agent/pull/262[#262]
@@ -24,12 +26,12 @@
 * Error message improved which is logged if a systemd unit file cannot
   be created ({276}).
 * Handling of service restarts moved from the Stackable agent to
-  systemd.
+  systemd ({263}).
 
 === Removed
 * Check removed if a service starts up correctly within 10 seconds.
   systemd manages restarts now and the Stackable agent cannot detect if
-  a service is in a restart loop.
+  a service is in a restart loop ({263}).
 
 == 0.5.0 - 2021-07-26
 

From 2b3a628c4a12f4817fa135c00a8cf5933cd63c72 Mon Sep 17 00:00:00 2001
From: Siegfried Weber <mail@siegfriedweber.net>
Date: Thu, 19 Aug 2021 09:21:50 +0200
Subject: [PATCH 13/14] Changelog updated

---
 CHANGELOG.adoc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc
index b1606c5..413e13b 100644
--- a/CHANGELOG.adoc
+++ b/CHANGELOG.adoc
@@ -1,10 +1,9 @@
 = Changelog
 
-:263: https://github.com/stackabletech/agent/pull/263[#263]
-
 == 0.6.0 - unreleased
 
 :262: https://github.com/stackabletech/agent/pull/262[#262]
+:263: https://github.com/stackabletech/agent/pull/263[#263]
 :267: https://github.com/stackabletech/agent/pull/267[#267]
 :270: https://github.com/stackabletech/agent/pull/270[#270]
 :273: https://github.com/stackabletech/agent/pull/273[#273]

From 3b67ff7730fd53179c563773a670d9f1ee3294ad Mon Sep 17 00:00:00 2001
From: Siegfried Weber <mail@siegfriedweber.net>
Date: Thu, 26 Aug 2021 16:13:35 +0200
Subject: [PATCH 14/14] Set correct systemd default target

---
 CHANGELOG.adoc                             |  3 +++
 src/provider/systemdmanager/systemdunit.rs | 12 ++++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc
index 413e13b..0699682 100644
--- a/CHANGELOG.adoc
+++ b/CHANGELOG.adoc
@@ -32,6 +32,9 @@
   systemd manages restarts now and the Stackable agent cannot detect if
   a service is in a restart loop ({263}).
 
+=== Fixed
+* Systemd services in session mode are restarted after a reboot ({263}).
+
 == 0.5.0 - 2021-07-26
 
 :224: https://github.com/stackabletech/agent/pull/224[#224]
diff --git a/src/provider/systemdmanager/systemdunit.rs b/src/provider/systemdmanager/systemdunit.rs
index 906635c..d3d1d95 100644
--- a/src/provider/systemdmanager/systemdunit.rs
+++ b/src/provider/systemdmanager/systemdunit.rs
@@ -227,7 +227,15 @@ impl SystemDUnit {
         }
 
         // This one is mandatory, as otherwise enabling the unit fails
-        unit.set_property(Section::Install, "WantedBy", "multi-user.target");
+        unit.set_property(
+            Section::Install,
+            "WantedBy",
+            if user_mode {
+                "default.target"
+            } else {
+                "multi-user.target"
+            },
+        );
 
         Ok(unit)
     }
@@ -727,7 +735,7 @@ mod test {
             TimeoutStopSec=30
 
             [Install]
-            WantedBy=multi-user.target"#}
+            WantedBy=default.target"#}
     )]
     #[case::set_termination_timeout(
         BusType::System,