From 2791232b7316626b167ed6aa95f6cf90f65329f7 Mon Sep 17 00:00:00 2001 From: Enrique Vallespi Gil Date: Wed, 15 Apr 2026 11:28:07 +0200 Subject: [PATCH] [os_must_gather] Add exit rc to os-must-gather main task With this we allow to run the rescue block. Previously any timeout in the main task would return the code from the echo which was always success. Also, we're removing from the rescue block the always block. And we've create a always section for finding existing os-must-gather directories and the symlink creation. Also we've changed the dest-dir for the generic fallback command to match the same folder of the symlink. Removed oc inspect as we're not getting so many errors from oc adm must-gather so probably this wouldn't be usefull. Finally, we've parametriced SOS_EDPM as cifmw_os_must_gather_sos_edpm and we've added default value to "all" Co-Authored-By: Claude Opus 4.6 Signed-off-by: Enrique Vallespi Gil --- roles/os_must_gather/README.md | 1 + roles/os_must_gather/defaults/main.yml | 1 + .../molecule/timeout/converge.yml | 38 +++++++++ .../molecule/timeout/molecule.yml | 50 +++++++++++ .../molecule/timeout/prepare.yml | 60 +++++++++++++ .../molecule/timeout/verify.yml | 57 +++++++++++++ roles/os_must_gather/tasks/main.yml | 85 ++++++++----------- 7 files changed, 242 insertions(+), 50 deletions(-) create mode 100644 roles/os_must_gather/molecule/timeout/converge.yml create mode 100644 roles/os_must_gather/molecule/timeout/molecule.yml create mode 100644 roles/os_must_gather/molecule/timeout/prepare.yml create mode 100644 roles/os_must_gather/molecule/timeout/verify.yml diff --git a/roles/os_must_gather/README.md b/roles/os_must_gather/README.md index 92cc182c5..0e891c1cd 100644 --- a/roles/os_must_gather/README.md +++ b/roles/os_must_gather/README.md @@ -12,6 +12,7 @@ testing the new changes. * `cifmw_os_must_gather_output_dir`: (String) Directory to store logs generated by must-gather tool * `cifmw_os_must_gather_repo_path`: (string) Path to local clone of openstack-must-gather git repo * `cifmw_os_must_gather_timeout`: (String) Timeout for must-gather command +* `cifmw_os_must_gather_sos_edpm`: (String) Indicates where to run the SOS report. Default all * `cifmw_os_must_gather_host_network`: (Bool) Flag to gather host network data * `cifmw_os_must_gather_namespaces`: (List) List of namespaces required by the gather task in case of failure * `cifmw_os_must_gather_additional_namespaces`: (String) List of comma separated additional namespaces. Defaults to `kuttl,openshift-storage,sushy-emulator` diff --git a/roles/os_must_gather/defaults/main.yml b/roles/os_must_gather/defaults/main.yml index 6bd0d8167..70211fa0e 100644 --- a/roles/os_must_gather/defaults/main.yml +++ b/roles/os_must_gather/defaults/main.yml @@ -23,6 +23,7 @@ cifmw_os_must_gather_image_registry: "quay.rdoproject.org/openstack-k8s-operator cifmw_os_must_gather_output_dir: "{{ cifmw_basedir }}" cifmw_os_must_gather_output_log_dir: "{{ cifmw_os_must_gather_output_dir }}/logs/openstack-must-gather" cifmw_os_must_gather_repo_path: "{{ ansible_user_dir }}/src/github.com/openstack-k8s-operators/openstack-must-gather" +cifmw_os_must_gather_sos_edpm: "all" cifmw_os_must_gather_timeout: "30m" cifmw_os_must_gather_volume_percentage: 80 cifmw_os_must_gather_additional_namespaces: "kuttl,openshift-storage,openshift-marketplace,openshift-operators,sushy-emulator,tobiko" diff --git a/roles/os_must_gather/molecule/timeout/converge.yml b/roles/os_must_gather/molecule/timeout/converge.yml new file mode 100644 index 000000000..3603049d7 --- /dev/null +++ b/roles/os_must_gather/molecule/timeout/converge.yml @@ -0,0 +1,38 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +- name: Converge - Test timeout handling + hosts: all + vars: + cifmw_path: "{{ ansible_user_dir }}/mock-bin:{{ ansible_env.PATH }}" + cifmw_openshift_kubeconfig: "{{ ansible_user_dir }}/fake-kubeconfig" + cifmw_os_must_gather_timeout: "5s" + cifmw_basedir: "{{ ansible_user_dir }}/test-output" + cifmw_os_must_gather_output_dir: "{{ cifmw_basedir }}" + cifmw_os_must_gather_output_log_dir: "{{ cifmw_os_must_gather_output_dir }}/logs/openstack-must-gather" + cifmw_os_must_gather_image: "fake-image:latest" + zuul_change_list: [] + tasks: + - name: Run os_must_gather role (expect timeout failures) + block: + - name: Include os_must_gather role + ansible.builtin.include_role: + name: os_must_gather + rescue: + - name: Note that failure occurred + ansible.builtin.debug: + msg: "Role failed as expected due to simulated timeout (rc=124)" diff --git a/roles/os_must_gather/molecule/timeout/molecule.yml b/roles/os_must_gather/molecule/timeout/molecule.yml new file mode 100644 index 000000000..1dd7a7ff4 --- /dev/null +++ b/roles/os_must_gather/molecule/timeout/molecule.yml @@ -0,0 +1,50 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +dependency: + name: galaxy + options: + requirements-file: ../../../../requirements.yml + +driver: + name: podman + +platforms: + - name: instance + hostname: instance + image: "ubi9/ubi-init" + registry: + url: "registry.access.redhat.com" + command: "sleep infinity" + privileged: true + ulimits: + - host + +provisioner: + name: ansible + log: true + inventory: + hosts: + all: + hosts: + instance: + ansible_python_interpreter: /usr/bin/python3 + env: + ANSIBLE_STDOUT_CALLBACK: yaml + config_options: + defaults: + remote_tmp: /tmp diff --git a/roles/os_must_gather/molecule/timeout/prepare.yml b/roles/os_must_gather/molecule/timeout/prepare.yml new file mode 100644 index 000000000..37964c805 --- /dev/null +++ b/roles/os_must_gather/molecule/timeout/prepare.yml @@ -0,0 +1,60 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +- name: Prepare timeout test environment + hosts: all + tasks: + - name: Create mock bin directory + ansible.builtin.file: + path: "{{ ansible_user_dir }}/mock-bin" + state: directory + mode: '0755' + + - name: Create mock oc command that simulates timeout + ansible.builtin.copy: + dest: "{{ ansible_user_dir }}/mock-bin/oc" + mode: '0755' + content: | + #!/bin/bash + echo "Mock oc adm must-gather starting..." + echo "Command: $@" + + # Sleep to simulate some work being done + sleep 2 + + # Simulate timeout by exiting with code 124 (timeout command's exit code) + echo "Simulating timeout..." + exit 124 + + - name: Create fake kubeconfig + ansible.builtin.copy: + dest: "{{ ansible_user_dir }}/fake-kubeconfig" + mode: '0644' + content: | + apiVersion: v1 + kind: Config + current-context: fake + + - name: Create output directories + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: '0755' + loop: + - "{{ ansible_user_dir }}/test-output" + - "{{ ansible_user_dir }}/test-output/artifacts" + - "{{ ansible_user_dir }}/test-output/logs/openstack-must-gather" diff --git a/roles/os_must_gather/molecule/timeout/verify.yml b/roles/os_must_gather/molecule/timeout/verify.yml new file mode 100644 index 000000000..b786d43b0 --- /dev/null +++ b/roles/os_must_gather/molecule/timeout/verify.yml @@ -0,0 +1,57 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +- name: Verify timeout handling + hosts: all + tasks: + - name: Check that ci_script logs directory exists + ansible.builtin.stat: + path: "{{ ansible_user_dir }}/test-output/logs" + register: logs_dir + + - name: Assert logs directory was created + ansible.builtin.assert: + that: + - logs_dir.stat.exists + - logs_dir.stat.isdir + fail_msg: "Logs directory not found" + success_msg: "Logs directory exists" + + - name: Find must-gather script log + ansible.builtin.find: + paths: "{{ ansible_user_dir }}/test-output/logs" + patterns: "ci_script_*_run_openstack_must_gather.log" + register: must_gather_logs + + - name: Assert must-gather log was created + ansible.builtin.assert: + that: + - must_gather_logs.matched > 0 + fail_msg: "Must-gather log file not found" + success_msg: "Must-gather log file exists" + + - name: Read must-gather log + ansible.builtin.slurp: + src: "{{ must_gather_logs.files[0].path }}" + register: log_content + when: must_gather_logs.matched > 0 + + - name: Verify timeout message in log + ansible.builtin.assert: + that: + - "'The must gather command did not finish on time!' in (log_content.content | b64decode)" + when: must_gather_logs.matched > 0 diff --git a/roles/os_must_gather/tasks/main.yml b/roles/os_must_gather/tasks/main.yml index 72fce2b80..27a8c3829 100644 --- a/roles/os_must_gather/tasks/main.yml +++ b/roles/os_must_gather/tasks/main.yml @@ -61,7 +61,7 @@ environment: KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}" PATH: "{{ cifmw_path }}" - SOS_EDPM: "all" + SOS_EDPM: "{{ cifmw_os_must_gather_sos_edpm }}" SOS_DECOMPRESS: "0" OPENSTACK_DATABASES: "{{ cifmw_os_must_gather_dump_db }}" OMC: "{{ cifmw_os_must_gather_omc }}" @@ -79,14 +79,40 @@ SOS_EDPM=$SOS_EDPM OMC=$OMC SOS_DECOMPRESS=$SOS_DECOMPRESS - gather - 2>&1 || { + gather || { rc=$? if [ $rc -eq 124 ]; then echo "The must gather command did not finish on time!" echo "{{ shell_cmd_timeout }} seconds was not enough to finish the task." fi + exit $rc } + register: _must_gather_result + + always: + - name: Log openstack-must-gather failure + when: + - _must_gather_result is defined + - _must_gather_result.rc is defined + - _must_gather_result.rc != 0 + ansible.builtin.debug: + msg: "OpenStack must-gather failed with rc={{ _must_gather_result.rc }}" + + - name: Run fallback generic must-gather command without SOS report when timed out + when: + - _must_gather_result is defined + - _must_gather_result.rc is defined + - _must_gather_result.rc == 124 + environment: + KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}" + PATH: "{{ cifmw_path }}" + ansible.builtin.command: + cmd: >- + timeout {{ (cifmw_os_must_gather_timeout | community.general.to_seconds) + 120 }} + oc adm must-gather + --dest-dir {{ cifmw_os_must_gather_output_log_dir }} + --timeout {{ cifmw_os_must_gather_timeout }} + --volume-percentage={{ cifmw_os_must_gather_volume_percentage }} - name: Find existing os-must-gather directories ansible.builtin.find: @@ -95,52 +121,11 @@ depth: 1 register: _os_gather_latest_dir - - name: Create a symlink to newest os-must-gather directory - ansible.builtin.file: - src: "{{ (_os_gather_latest_dir.files | sort(attribute='mtime', reverse=True) | first).path | basename }}" - dest: "{{ cifmw_os_must_gather_output_log_dir }}/latest" - state: link - - rescue: - - name: Openstack-must-gather failure + - name: Symlink to newest log folder and run top commands + when: _os_gather_latest_dir.files | length > 0 block: - - name: Log openstack-must-gather failure - ansible.builtin.debug: - msg: "OpenStack must-gather failed, running fallback generic must-gather" - - - name: Run fallback generic must-gather command - environment: - KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}" - PATH: "{{ cifmw_path }}" - ansible.builtin.command: - cmd: >- - timeout {{ (cifmw_os_must_gather_timeout | community.general.to_seconds) + 120 }} - oc adm must-gather - --dest-dir {{ ansible_user_dir }}/ci-framework-data/must-gather - --timeout {{ cifmw_os_must_gather_timeout }} - --volume-percentage={{ cifmw_os_must_gather_volume_percentage }} - always: - - name: Create oc_inspect log directory + - name: Create a symlink to newest os-must-gather directory ansible.builtin.file: - path: "{{ cifmw_os_must_gather_output_dir }}/logs/oc_inspect" - state: directory - mode: "0755" - - - name: Inspect the cluster after must-gather failure - ignore_errors: true # noqa: ignore-errors - environment: - KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}" - PATH: "{{ cifmw_path }}" - cifmw.general.ci_script: - output_dir: "{{ cifmw_os_must_gather_output_dir }}/artifacts" - script: | - oc adm inspect namespace/{{ item }} --dest-dir={{ cifmw_os_must_gather_output_dir }}/logs/oc_inspect - loop: >- - {{ - ( - cifmw_os_must_gather_namespaces | default([]) + - ( - cifmw_os_must_gather_additional_namespaces | split(',') | list - ) - ) | unique - }} + src: "{{ (_os_gather_latest_dir.files | sort(attribute='mtime', reverse=True) | first).path | basename }}" + dest: "{{ cifmw_os_must_gather_output_log_dir }}/latest" + state: link