From 7310b23cfc8235713764e2d6d74793e8e9125247 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B6nke=20Liebau?= Date: Thu, 22 Apr 2021 21:26:00 +0200 Subject: [PATCH 1/3] Added handling of terminationGracePeriodSeconds to creation of systemd units. The value for this field from the PodSpec is translated to the systemd config TimeoutStopSec This will cause the service to be killed by systemd if it does not shutdown within the time in seconds specified by this parameter. Default value for this parameter is 30 seconds, if terminationGracePeriodSeconds this is not specified in the unit file and this default stays in effect. --- src/provider/systemdmanager/systemdunit.rs | 67 ++++++++++++++++++++-- 1 file changed, 61 insertions(+), 6 deletions(-) diff --git a/src/provider/systemdmanager/systemdunit.rs b/src/provider/systemdmanager/systemdunit.rs index dd742a6..6332c85 100644 --- a/src/provider/systemdmanager/systemdunit.rs +++ b/src/provider/systemdmanager/systemdunit.rs @@ -190,14 +190,23 @@ impl SystemDUnit { sections: Default::default(), }; - let restart_policy = match &pod.as_kube_pod().spec { - // if no restart policy is present we default to "never" - Some(spec) => spec.restart_policy.as_deref().unwrap_or("Never"), - None => "Never", + // Kubernetes does not allow creating pods without a spec, so if we do not get one here + //something is definitely seriously amiss + let pod_spec = match &pod.as_kube_pod().spec { + Some(spec) => spec, + None => { + return Err(PodValidationError { + msg: format!("Got pod without spec: [{}]", unit.name), + }) + } }; - // if however one is specified but we do not know about this policy then we do not default - // to never but fail the service instead to avoid unpredictable behavior + // Get restart policy from pod, if none is specified default to "Never" + let restart_policy = pod_spec.restart_policy.as_deref().unwrap_or("Never"); + + // Lookup the equivalent systemd restart policy for the configured one + // If this lookup fails (which means a restart policy was specified which we do not know + // about) then we fail the entire service to avoid unpredictable behavior let restart_policy = match RESTART_POLICY_MAP.get(restart_policy) { Some(policy) => policy, None => { @@ -212,6 +221,18 @@ impl SystemDUnit { unit.add_property(Section::Service, "Restart", restart_policy); + // If `terminationGracePeriodSeconds` was specified in the PodSpec set the value as + // 'TimeOutStopSec` on the systemd unit + // This means that the service will be killed after this period if it does not shutdown + // after receiving a stop command + if let Some(stop_timeout) = pod_spec.termination_grace_period_seconds { + unit.add_property( + Section::Service, + "TimeoutStopSec", + stop_timeout.to_string().as_str(), + ); + } + if let Some(user_name) = SystemDUnit::get_user_name_from_pod_security_context(pod)? { if !user_mode { unit.add_property(Section::Service, "User", user_name); @@ -554,6 +575,40 @@ mod test { [Install] WantedBy=multi-user.target"#} )] + #[case::with_container_on_session_bus_stoptimeout( + BusType::Session, + indoc! {r#" + apiVersion: v1 + kind: Pod + metadata: + name: stackable + spec: + terminationGracePeriodSeconds: 10 + containers: + - name: test-container.service + command: + - start.sh + securityContext: + windowsOptions: + runAsUserName: container-user + securityContext: + windowsOptions: + runAsUserName: pod-user"#}, + "default-stackable-test-container.service", + indoc! {r#" + [Unit] + Description=default-stackable-test-container + + [Service] + ExecStart=start.sh + Restart=no + StandardError=journal + StandardOutput=journal + TimeoutStopSec=10 + + [Install] + WantedBy=multi-user.target"#} + )] fn create_unit_from_pod( #[case] bus_type: BusType, #[case] pod_config: &str, From e027381bd2bd3ba5485cbe911b6dcba0628831a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B6nke=20Liebau?= Date: Fri, 23 Apr 2021 11:03:11 +0200 Subject: [PATCH 2/3] Addressed comments by Sigi --- src/provider/systemdmanager/systemdunit.rs | 51 +++++++++++----------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/src/provider/systemdmanager/systemdunit.rs b/src/provider/systemdmanager/systemdunit.rs index 6332c85..bd96776 100644 --- a/src/provider/systemdmanager/systemdunit.rs +++ b/src/provider/systemdmanager/systemdunit.rs @@ -27,6 +27,10 @@ lazy_static! { ].iter().cloned().collect(); } +/// The default timeout for stopping a service - when this has passed systemd will terminate +/// the process +const DEFAULT_TERMINATION_TIMEOUT: i64 = 30; + /// List of sections in the systemd unit /// /// The sections are written in the same order as listed here into the unit file. @@ -225,6 +229,17 @@ impl SystemDUnit { // 'TimeOutStopSec` on the systemd unit // This means that the service will be killed after this period if it does not shutdown // after receiving a stop command + // If it was not specified we use the default value for 'terminationGracePeriodSeconds' of + // 30 seconds, as this differs from the systemd default for 'TimeOutStopSec` which is 90 + // seconds. + let termination_timeout = match pod_spec.termination_grace_period_seconds { + None => DEFAULT_TERMINATION_TIMEOUT, + Some(specified_timeout) => specified_timeout, + } + .to_string(); + + unit.add_property(Section::Service, "TimeoutStopSec", &termination_timeout); + if let Some(stop_timeout) = pod_spec.termination_grace_period_seconds { unit.add_property( Section::Service, @@ -499,6 +514,7 @@ mod test { indoc! {" [Service] Restart=always + TimeoutStopSec=30 User=pod-user"} )] #[case::with_container_on_system_bus( @@ -538,6 +554,7 @@ mod test { Restart=no StandardError=journal StandardOutput=journal + TimeoutStopSec=30 User=container-user [Install] @@ -571,44 +588,28 @@ mod test { Restart=no StandardError=journal StandardOutput=journal + TimeoutStopSec=30 [Install] WantedBy=multi-user.target"#} )] - #[case::with_container_on_session_bus_stoptimeout( - BusType::Session, - indoc! {r#" + #[case::set_termination_timeout( + BusType::System, + indoc! {" apiVersion: v1 kind: Pod metadata: name: stackable spec: terminationGracePeriodSeconds: 10 - containers: - - name: test-container.service - command: - - start.sh - securityContext: - windowsOptions: - runAsUserName: container-user - securityContext: - windowsOptions: - runAsUserName: pod-user"#}, - "default-stackable-test-container.service", - indoc! {r#" - [Unit] - Description=default-stackable-test-container - + containers: []"}, + "stackable.service", + indoc! {" [Service] - ExecStart=start.sh Restart=no - StandardError=journal - StandardOutput=journal - TimeoutStopSec=10 - - [Install] - WantedBy=multi-user.target"#} + TimeoutStopSec=10"} )] + fn create_unit_from_pod( #[case] bus_type: BusType, #[case] pod_config: &str, From 77f363431103646da7023515eb78e1bee1ee8805 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B6nke=20Liebau?= Date: Fri, 23 Apr 2021 12:06:41 +0200 Subject: [PATCH 3/3] Renamed constant to include unit. --- src/provider/systemdmanager/systemdunit.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/provider/systemdmanager/systemdunit.rs b/src/provider/systemdmanager/systemdunit.rs index bd96776..2b671f2 100644 --- a/src/provider/systemdmanager/systemdunit.rs +++ b/src/provider/systemdmanager/systemdunit.rs @@ -27,9 +27,9 @@ lazy_static! { ].iter().cloned().collect(); } -/// The default timeout for stopping a service - when this has passed systemd will terminate +/// The default timeout for stopping a service, after this has passed systemd will terminate /// the process -const DEFAULT_TERMINATION_TIMEOUT: i64 = 30; +const DEFAULT_TERMINATION_TIMEOUT_SECS: i64 = 30; /// List of sections in the systemd unit /// @@ -233,7 +233,7 @@ impl SystemDUnit { // 30 seconds, as this differs from the systemd default for 'TimeOutStopSec` which is 90 // seconds. let termination_timeout = match pod_spec.termination_grace_period_seconds { - None => DEFAULT_TERMINATION_TIMEOUT, + None => DEFAULT_TERMINATION_TIMEOUT_SECS, Some(specified_timeout) => specified_timeout, } .to_string();