-
Notifications
You must be signed in to change notification settings - Fork 228
USHIFT-6887: Fix microshift-low-latency for RHEL10 #6590
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| # hadolint global ignore=DL3059 | ||
| FROM localhost/rhel102-bootc-source-optionals:latest | ||
|
|
||
| # Enable microshift-tuned service | ||
| RUN systemctl enable microshift-tuned | ||
|
|
||
| # Configure MicroShift for low-latency workloads | ||
| RUN cat > /etc/microshift/config.yaml <<'EOF' | ||
| kubelet: | ||
| cpuManagerPolicy: static | ||
| cpuManagerPolicyOptions: | ||
| full-pcpus-only: "true" | ||
| cpuManagerReconcilePeriod: 5s | ||
| memoryManagerPolicy: Static | ||
| topologyManagerPolicy: single-numa-node | ||
| reservedSystemCPUs: 0-1 | ||
| reservedMemory: | ||
| - limits: | ||
| memory: 1100Mi | ||
| numaNode: 0 | ||
| kubeReserved: | ||
| memory: 500Mi | ||
| systemReserved: | ||
| memory: 500Mi | ||
| evictionHard: | ||
| imagefs.available: 15% | ||
| memory.available: 100Mi | ||
| nodefs.available: 10% | ||
| nodefs.inodesFree: 5% | ||
| evictionPressureTransitionPeriod: 0s | ||
| EOF | ||
|
|
||
| # Configure tuned baseline variables | ||
| RUN cat > /etc/tuned/microshift-baseline-variables.conf <<'EOF' | ||
| # Isolated cores should be complementary to kubelet's reserved CPUs. | ||
| # Isolated and reserved CPUs should contain all online CPUs. | ||
| # Core #3 is for testing offlining hence skipped. | ||
| isolated_cores=2,4-5 | ||
| hugepages_size=2M | ||
| hugepages=10 | ||
| additional_args=test1=on test2=true dummy | ||
| offline_cpu_set=3 | ||
| EOF | ||
|
|
||
| # Configure MicroShift tuned profile | ||
| RUN cat > /etc/microshift/tuned.yaml <<'EOF' | ||
| profile: microshift-baseline | ||
| reboot_after_apply: True | ||
| EOF |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| # hadolint global ignore=DL3059 | ||
| FROM localhost/rhel98-bootc-source-optionals:latest | ||
|
|
||
| # Enable microshift-tuned service | ||
| RUN systemctl enable microshift-tuned | ||
|
|
||
| # Configure MicroShift for low-latency workloads | ||
| RUN cat > /etc/microshift/config.yaml <<'EOF' | ||
| kubelet: | ||
| cpuManagerPolicy: static | ||
| cpuManagerPolicyOptions: | ||
| full-pcpus-only: "true" | ||
| cpuManagerReconcilePeriod: 5s | ||
| memoryManagerPolicy: Static | ||
| topologyManagerPolicy: single-numa-node | ||
| reservedSystemCPUs: 0-1 | ||
| reservedMemory: | ||
| - limits: | ||
| memory: 1100Mi | ||
| numaNode: 0 | ||
| kubeReserved: | ||
| memory: 500Mi | ||
| systemReserved: | ||
| memory: 500Mi | ||
| evictionHard: | ||
| imagefs.available: 15% | ||
| memory.available: 100Mi | ||
| nodefs.available: 10% | ||
| nodefs.inodesFree: 5% | ||
| evictionPressureTransitionPeriod: 0s | ||
| EOF | ||
|
|
||
| # Configure tuned baseline variables | ||
| RUN cat > /etc/tuned/microshift-baseline-variables.conf <<'EOF' | ||
| # Isolated cores should be complementary to kubelet's reserved CPUs. | ||
| # Isolated and reserved CPUs should contain all online CPUs. | ||
| # Core #3 is for testing offlining hence skipped. | ||
| isolated_cores=2,4-5 | ||
| hugepages_size=2M | ||
| hugepages=10 | ||
| additional_args=test1=on test2=true dummy | ||
| offline_cpu_set=3 | ||
| EOF | ||
|
|
||
| # Configure MicroShift tuned profile | ||
| RUN cat > /etc/microshift/tuned.yaml <<'EOF' | ||
| profile: microshift-baseline | ||
| reboot_after_apply: True | ||
| EOF |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,50 @@ | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| #!/bin/bash | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| export SKIP_GREENBOOT=true | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| export TEST_RANDOMIZATION=none | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| # Sourced from scenario.sh and uses functions defined there. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| start_image="rhel102-bootc-source-tuned" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| scenario_create_vms() { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| exit_if_image_not_found "${start_image}" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| prepare_kickstart host1 kickstart-bootc.ks.template "${start_image}" true | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| launch_vm rhel102-bootc --vm_vcpus 6 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| scenario_remove_vms() { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| exit_if_image_not_found "${start_image}" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| remove_vm host1 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| scenario_run_tests() { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| exit_if_image_not_found "${start_image}" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| # Should not be run immediately after creating VM because of | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| # microshift-tuned rebooting the node to activate the profile. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| local -r start_time=$(date +%s) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| while true; do | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| boot_num=$(run_command_on_vm host1 "sudo journalctl --list-boots --quiet | wc -l" || true) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| boot_num="${boot_num%$'\r'*}" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| if [[ "${boot_num}" -ge 2 ]]; then | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| break | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| fi | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| if [ $(( $(date +%s) - start_time )) -gt 60 ]; then | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| echo "Timed out waiting for VM having 2 boots" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| exit 1 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| fi | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| sleep 5 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+28
to
+39
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # Read the specific file and check lines around 28-39
cat -n "test/scenarios-bootc/el10/periodics/el102-src@low-latency.sh" | sed -n '20,45p'Repository: openshift/microshift Length of output: 1062 🏁 Script executed: # Search for similar boot-polling patterns to understand the "hardening" mentioned
rg "boot_num|list-boots" --type sh -B 3 -A 3Repository: openshift/microshift Length of output: 10872 🏁 Script executed: # Search for TUNED_REBOOT_WAIT_TIMEOUT to see if it's used elsewhere
rg "TUNED_REBOOT_WAIT_TIMEOUT" --type shRepository: openshift/microshift Length of output: 46 🏁 Script executed: # Search for boot_num with regex validation (the hardening pattern)
rg 'boot_num.*=~|boot_num.*\[0-9\]' --type sh -B 2 -A 2Repository: openshift/microshift Length of output: 46 🏁 Script executed: # Search for longer timeout values in similar context
rg 'boot_wait_timeout|TUNED_REBOOT_WAIT_TIMEOUT|boot.*timeout.*18[0-9]|boot.*timeout.*2[0-9]{2}' --type sh -B 3 -A 3Repository: openshift/microshift Length of output: 46 🏁 Script executed: # Look for helper functions or common boot polling patterns
rg 'wait.*boot|boot.*poll' --type sh -iRepository: openshift/microshift Length of output: 3821 🏁 Script executed: # Check if there are any helper functions related to boot polling
rg 'run_command_on_vm' --type sh -A 5 | head -50Repository: openshift/microshift Length of output: 4035 🏁 Script executed: # Look at the test/bin/scenario.sh to see if there's a better pattern
cat test/bin/scenario.sh | head -100Repository: openshift/microshift Length of output: 3645 🏁 Script executed: # Check if there are any recent commits or patterns with numeric validation
rg '\[0-9\]\+|\^\[0-9\]' --type sh -B 2 -A 2 | head -40Repository: openshift/microshift Length of output: 3239 Harden boot-polling to prevent CI flakes. The current code lacks numeric validation for Proposed fix scenario_run_tests() {
exit_if_image_not_found "${start_image}"
# Should not be run immediately after creating VM because of
# microshift-tuned rebooting the node to activate the profile.
local -r start_time=$(date +%s)
+ local -r boot_wait_timeout="${TUNED_REBOOT_WAIT_TIMEOUT:-180}"
while true; do
- boot_num=$(run_command_on_vm host1 "sudo journalctl --list-boots --quiet | wc -l" || true)
+ local boot_num
+ boot_num=$(run_command_on_vm host1 "sudo journalctl --list-boots --quiet | wc -l" 2>/dev/null || true)
boot_num="${boot_num%$'\r'*}"
- if [[ "${boot_num}" -ge 2 ]]; then
+ if [[ "${boot_num}" =~ ^[0-9]+$ ]] && [[ "${boot_num}" -ge 2 ]]; then
break
fi
- if [ $(( $(date +%s) - start_time )) -gt 60 ]; then
+ if [ $(( $(date +%s) - start_time )) -gt "${boot_wait_timeout}" ]; then
echo "Timed out waiting for VM having 2 boots"
exit 1
fi
sleep 5
done📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| done | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| # --exitonfailure because tests within suites are meant to be ordered, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| # so don't advance to next test if current failed. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| run_tests host1 \ | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| --exitonfailure \ | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| suites/tuned/microshift-tuned.robot \ | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| suites/tuned/workload-partitioning.robot \ | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| suites/tuned/uncore-cache.robot | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| #!/bin/bash | ||
|
|
||
| export SKIP_GREENBOOT=true | ||
| export TEST_RANDOMIZATION=none | ||
|
|
||
| # Sourced from scenario.sh and uses functions defined there. | ||
|
|
||
| start_image="rhel98-bootc-source-tuned" | ||
|
|
||
| scenario_create_vms() { | ||
| exit_if_image_not_found "${start_image}" | ||
|
|
||
| prepare_kickstart host1 kickstart-bootc.ks.template "${start_image}" true | ||
| launch_vm rhel98-bootc --vm_vcpus 6 | ||
| } | ||
|
|
||
| scenario_remove_vms() { | ||
| exit_if_image_not_found "${start_image}" | ||
|
|
||
| remove_vm host1 | ||
| } | ||
|
|
||
| scenario_run_tests() { | ||
| exit_if_image_not_found "${start_image}" | ||
|
|
||
| # Should not be run immediately after creating VM because of | ||
| # microshift-tuned rebooting the node to activate the profile. | ||
| local -r start_time=$(date +%s) | ||
| while true; do | ||
| boot_num=$(run_command_on_vm host1 "sudo journalctl --list-boots --quiet | wc -l" || true) | ||
| boot_num="${boot_num%$'\r'*}" | ||
| if [[ "${boot_num}" -ge 2 ]]; then | ||
| break | ||
| fi | ||
| if [ $(( $(date +%s) - start_time )) -gt 60 ]; then | ||
| echo "Timed out waiting for VM having 2 boots" | ||
| exit 1 | ||
| fi | ||
| sleep 5 | ||
|
Comment on lines
+28
to
+39
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
set -euo pipefail
# Verify the current code pattern in both low-latency scenario files.
for f in \
test/scenarios-bootc/el9/periodics/el98-src@low-latency.sh \
test/scenarios-bootc/el10/periodics/el102-src@low-latency.sh
do
echo "== ${f} =="
rg -n 'boot_num=.*journalctl|"\$\{boot_num\}" -ge 2| -gt 60' "${f}"
done
# Reproduce bash integer-compare behavior with empty value.
bash -lc 'boot_num=""; if [[ "${boot_num}" -ge 2 ]]; then echo "ok"; fi' || trueRepository: openshift/microshift Length of output: 735 Harden boot-count polling to prevent flaky failures. The 🤖 Prompt for AI Agents |
||
| done | ||
|
|
||
| # --exitonfailure because tests within suites are meant to be ordered, | ||
| # so don't advance to next test if current failed. | ||
|
|
||
| run_tests host1 \ | ||
| --exitonfailure \ | ||
| suites/tuned/microshift-tuned.robot \ | ||
| suites/tuned/workload-partitioning.robot \ | ||
| suites/tuned/uncore-cache.robot | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.