diff --git a/roles/os_must_gather/README.md b/roles/os_must_gather/README.md index 92cc182c5..0e891c1cd 100644 --- a/roles/os_must_gather/README.md +++ b/roles/os_must_gather/README.md @@ -12,6 +12,7 @@ testing the new changes. * `cifmw_os_must_gather_output_dir`: (String) Directory to store logs generated by must-gather tool * `cifmw_os_must_gather_repo_path`: (string) Path to local clone of openstack-must-gather git repo * `cifmw_os_must_gather_timeout`: (String) Timeout for must-gather command +* `cifmw_os_must_gather_sos_edpm`: (String) Indicates where to run the SOS report. Default all * `cifmw_os_must_gather_host_network`: (Bool) Flag to gather host network data * `cifmw_os_must_gather_namespaces`: (List) List of namespaces required by the gather task in case of failure * `cifmw_os_must_gather_additional_namespaces`: (String) List of comma separated additional namespaces. Defaults to `kuttl,openshift-storage,sushy-emulator` diff --git a/roles/os_must_gather/defaults/main.yml b/roles/os_must_gather/defaults/main.yml index 6bd0d8167..70211fa0e 100644 --- a/roles/os_must_gather/defaults/main.yml +++ b/roles/os_must_gather/defaults/main.yml @@ -23,6 +23,7 @@ cifmw_os_must_gather_image_registry: "quay.rdoproject.org/openstack-k8s-operator cifmw_os_must_gather_output_dir: "{{ cifmw_basedir }}" cifmw_os_must_gather_output_log_dir: "{{ cifmw_os_must_gather_output_dir }}/logs/openstack-must-gather" cifmw_os_must_gather_repo_path: "{{ ansible_user_dir }}/src/github.com/openstack-k8s-operators/openstack-must-gather" +cifmw_os_must_gather_sos_edpm: "all" cifmw_os_must_gather_timeout: "30m" cifmw_os_must_gather_volume_percentage: 80 cifmw_os_must_gather_additional_namespaces: "kuttl,openshift-storage,openshift-marketplace,openshift-operators,sushy-emulator,tobiko" diff --git a/roles/os_must_gather/molecule/timeout/converge.yml b/roles/os_must_gather/molecule/timeout/converge.yml new file mode 100644 index 000000000..3603049d7 --- /dev/null +++ b/roles/os_must_gather/molecule/timeout/converge.yml @@ -0,0 +1,38 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +- name: Converge - Test timeout handling + hosts: all + vars: + cifmw_path: "{{ ansible_user_dir }}/mock-bin:{{ ansible_env.PATH }}" + cifmw_openshift_kubeconfig: "{{ ansible_user_dir }}/fake-kubeconfig" + cifmw_os_must_gather_timeout: "5s" + cifmw_basedir: "{{ ansible_user_dir }}/test-output" + cifmw_os_must_gather_output_dir: "{{ cifmw_basedir }}" + cifmw_os_must_gather_output_log_dir: "{{ cifmw_os_must_gather_output_dir }}/logs/openstack-must-gather" + cifmw_os_must_gather_image: "fake-image:latest" + zuul_change_list: [] + tasks: + - name: Run os_must_gather role (expect timeout failures) + block: + - name: Include os_must_gather role + ansible.builtin.include_role: + name: os_must_gather + rescue: + - name: Note that failure occurred + ansible.builtin.debug: + msg: "Role failed as expected due to simulated timeout (rc=124)" diff --git a/roles/os_must_gather/molecule/timeout/molecule.yml b/roles/os_must_gather/molecule/timeout/molecule.yml new file mode 100644 index 000000000..1dd7a7ff4 --- /dev/null +++ b/roles/os_must_gather/molecule/timeout/molecule.yml @@ -0,0 +1,50 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +dependency: + name: galaxy + options: + requirements-file: ../../../../requirements.yml + +driver: + name: podman + +platforms: + - name: instance + hostname: instance + image: "ubi9/ubi-init" + registry: + url: "registry.access.redhat.com" + command: "sleep infinity" + privileged: true + ulimits: + - host + +provisioner: + name: ansible + log: true + inventory: + hosts: + all: + hosts: + instance: + ansible_python_interpreter: /usr/bin/python3 + env: + ANSIBLE_STDOUT_CALLBACK: yaml + config_options: + defaults: + remote_tmp: /tmp diff --git a/roles/os_must_gather/molecule/timeout/prepare.yml b/roles/os_must_gather/molecule/timeout/prepare.yml new file mode 100644 index 000000000..37964c805 --- /dev/null +++ b/roles/os_must_gather/molecule/timeout/prepare.yml @@ -0,0 +1,60 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +- name: Prepare timeout test environment + hosts: all + tasks: + - name: Create mock bin directory + ansible.builtin.file: + path: "{{ ansible_user_dir }}/mock-bin" + state: directory + mode: '0755' + + - name: Create mock oc command that simulates timeout + ansible.builtin.copy: + dest: "{{ ansible_user_dir }}/mock-bin/oc" + mode: '0755' + content: | + #!/bin/bash + echo "Mock oc adm must-gather starting..." + echo "Command: $@" + + # Sleep to simulate some work being done + sleep 2 + + # Simulate timeout by exiting with code 124 (timeout command's exit code) + echo "Simulating timeout..." + exit 124 + + - name: Create fake kubeconfig + ansible.builtin.copy: + dest: "{{ ansible_user_dir }}/fake-kubeconfig" + mode: '0644' + content: | + apiVersion: v1 + kind: Config + current-context: fake + + - name: Create output directories + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: '0755' + loop: + - "{{ ansible_user_dir }}/test-output" + - "{{ ansible_user_dir }}/test-output/artifacts" + - "{{ ansible_user_dir }}/test-output/logs/openstack-must-gather" diff --git a/roles/os_must_gather/molecule/timeout/verify.yml b/roles/os_must_gather/molecule/timeout/verify.yml new file mode 100644 index 000000000..b786d43b0 --- /dev/null +++ b/roles/os_must_gather/molecule/timeout/verify.yml @@ -0,0 +1,57 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +- name: Verify timeout handling + hosts: all + tasks: + - name: Check that ci_script logs directory exists + ansible.builtin.stat: + path: "{{ ansible_user_dir }}/test-output/logs" + register: logs_dir + + - name: Assert logs directory was created + ansible.builtin.assert: + that: + - logs_dir.stat.exists + - logs_dir.stat.isdir + fail_msg: "Logs directory not found" + success_msg: "Logs directory exists" + + - name: Find must-gather script log + ansible.builtin.find: + paths: "{{ ansible_user_dir }}/test-output/logs" + patterns: "ci_script_*_run_openstack_must_gather.log" + register: must_gather_logs + + - name: Assert must-gather log was created + ansible.builtin.assert: + that: + - must_gather_logs.matched > 0 + fail_msg: "Must-gather log file not found" + success_msg: "Must-gather log file exists" + + - name: Read must-gather log + ansible.builtin.slurp: + src: "{{ must_gather_logs.files[0].path }}" + register: log_content + when: must_gather_logs.matched > 0 + + - name: Verify timeout message in log + ansible.builtin.assert: + that: + - "'The must gather command did not finish on time!' in (log_content.content | b64decode)" + when: must_gather_logs.matched > 0 diff --git a/roles/os_must_gather/tasks/main.yml b/roles/os_must_gather/tasks/main.yml index 72fce2b80..27a8c3829 100644 --- a/roles/os_must_gather/tasks/main.yml +++ b/roles/os_must_gather/tasks/main.yml @@ -61,7 +61,7 @@ environment: KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}" PATH: "{{ cifmw_path }}" - SOS_EDPM: "all" + SOS_EDPM: "{{ cifmw_os_must_gather_sos_edpm }}" SOS_DECOMPRESS: "0" OPENSTACK_DATABASES: "{{ cifmw_os_must_gather_dump_db }}" OMC: "{{ cifmw_os_must_gather_omc }}" @@ -79,14 +79,40 @@ SOS_EDPM=$SOS_EDPM OMC=$OMC SOS_DECOMPRESS=$SOS_DECOMPRESS - gather - 2>&1 || { + gather || { rc=$? if [ $rc -eq 124 ]; then echo "The must gather command did not finish on time!" echo "{{ shell_cmd_timeout }} seconds was not enough to finish the task." fi + exit $rc } + register: _must_gather_result + + always: + - name: Log openstack-must-gather failure + when: + - _must_gather_result is defined + - _must_gather_result.rc is defined + - _must_gather_result.rc != 0 + ansible.builtin.debug: + msg: "OpenStack must-gather failed with rc={{ _must_gather_result.rc }}" + + - name: Run fallback generic must-gather command without SOS report when timed out + when: + - _must_gather_result is defined + - _must_gather_result.rc is defined + - _must_gather_result.rc == 124 + environment: + KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}" + PATH: "{{ cifmw_path }}" + ansible.builtin.command: + cmd: >- + timeout {{ (cifmw_os_must_gather_timeout | community.general.to_seconds) + 120 }} + oc adm must-gather + --dest-dir {{ cifmw_os_must_gather_output_log_dir }} + --timeout {{ cifmw_os_must_gather_timeout }} + --volume-percentage={{ cifmw_os_must_gather_volume_percentage }} - name: Find existing os-must-gather directories ansible.builtin.find: @@ -95,52 +121,11 @@ depth: 1 register: _os_gather_latest_dir - - name: Create a symlink to newest os-must-gather directory - ansible.builtin.file: - src: "{{ (_os_gather_latest_dir.files | sort(attribute='mtime', reverse=True) | first).path | basename }}" - dest: "{{ cifmw_os_must_gather_output_log_dir }}/latest" - state: link - - rescue: - - name: Openstack-must-gather failure + - name: Symlink to newest log folder and run top commands + when: _os_gather_latest_dir.files | length > 0 block: - - name: Log openstack-must-gather failure - ansible.builtin.debug: - msg: "OpenStack must-gather failed, running fallback generic must-gather" - - - name: Run fallback generic must-gather command - environment: - KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}" - PATH: "{{ cifmw_path }}" - ansible.builtin.command: - cmd: >- - timeout {{ (cifmw_os_must_gather_timeout | community.general.to_seconds) + 120 }} - oc adm must-gather - --dest-dir {{ ansible_user_dir }}/ci-framework-data/must-gather - --timeout {{ cifmw_os_must_gather_timeout }} - --volume-percentage={{ cifmw_os_must_gather_volume_percentage }} - always: - - name: Create oc_inspect log directory + - name: Create a symlink to newest os-must-gather directory ansible.builtin.file: - path: "{{ cifmw_os_must_gather_output_dir }}/logs/oc_inspect" - state: directory - mode: "0755" - - - name: Inspect the cluster after must-gather failure - ignore_errors: true # noqa: ignore-errors - environment: - KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}" - PATH: "{{ cifmw_path }}" - cifmw.general.ci_script: - output_dir: "{{ cifmw_os_must_gather_output_dir }}/artifacts" - script: | - oc adm inspect namespace/{{ item }} --dest-dir={{ cifmw_os_must_gather_output_dir }}/logs/oc_inspect - loop: >- - {{ - ( - cifmw_os_must_gather_namespaces | default([]) + - ( - cifmw_os_must_gather_additional_namespaces | split(',') | list - ) - ) | unique - }} + src: "{{ (_os_gather_latest_dir.files | sort(attribute='mtime', reverse=True) | first).path | basename }}" + dest: "{{ cifmw_os_must_gather_output_log_dir }}/latest" + state: link