Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions roles/os_must_gather/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ testing the new changes.
* `cifmw_os_must_gather_output_dir`: (String) Directory to store logs generated by must-gather tool
* `cifmw_os_must_gather_repo_path`: (string) Path to local clone of openstack-must-gather git repo
* `cifmw_os_must_gather_timeout`: (String) Timeout for must-gather command
* `cifmw_os_must_gather_sos_edpm`: (String) Indicates where to run the SOS report. Default all
* `cifmw_os_must_gather_host_network`: (Bool) Flag to gather host network data
* `cifmw_os_must_gather_namespaces`: (List) List of namespaces required by the gather task in case of failure
* `cifmw_os_must_gather_additional_namespaces`: (String) List of comma separated additional namespaces. Defaults to `kuttl,openshift-storage,sushy-emulator`
Expand Down
1 change: 1 addition & 0 deletions roles/os_must_gather/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ cifmw_os_must_gather_image_registry: "quay.rdoproject.org/openstack-k8s-operator
cifmw_os_must_gather_output_dir: "{{ cifmw_basedir }}"
cifmw_os_must_gather_output_log_dir: "{{ cifmw_os_must_gather_output_dir }}/logs/openstack-must-gather"
cifmw_os_must_gather_repo_path: "{{ ansible_user_dir }}/src/github.com/openstack-k8s-operators/openstack-must-gather"
cifmw_os_must_gather_sos_edpm: "all"
cifmw_os_must_gather_timeout: "30m"
cifmw_os_must_gather_volume_percentage: 80
cifmw_os_must_gather_additional_namespaces: "kuttl,openshift-storage,openshift-marketplace,openshift-operators,sushy-emulator,tobiko"
Expand Down
38 changes: 38 additions & 0 deletions roles/os_must_gather/molecule/timeout/converge.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
---
# Copyright Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.


- name: Converge - Test timeout handling
hosts: all
vars:
cifmw_path: "{{ ansible_user_dir }}/mock-bin:{{ ansible_env.PATH }}"
cifmw_openshift_kubeconfig: "{{ ansible_user_dir }}/fake-kubeconfig"
cifmw_os_must_gather_timeout: "5s"
cifmw_basedir: "{{ ansible_user_dir }}/test-output"
cifmw_os_must_gather_output_dir: "{{ cifmw_basedir }}"
cifmw_os_must_gather_output_log_dir: "{{ cifmw_os_must_gather_output_dir }}/logs/openstack-must-gather"
cifmw_os_must_gather_image: "fake-image:latest"
zuul_change_list: []
tasks:
- name: Run os_must_gather role (expect timeout failures)
block:
- name: Include os_must_gather role
ansible.builtin.include_role:
name: os_must_gather
rescue:
- name: Note that failure occurred
ansible.builtin.debug:
msg: "Role failed as expected due to simulated timeout (rc=124)"
50 changes: 50 additions & 0 deletions roles/os_must_gather/molecule/timeout/molecule.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
---
# Copyright Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.


dependency:
name: galaxy
options:
requirements-file: ../../../../requirements.yml

driver:
name: podman

platforms:
- name: instance
hostname: instance
image: "ubi9/ubi-init"
registry:
url: "registry.access.redhat.com"
command: "sleep infinity"
privileged: true
ulimits:
- host

provisioner:
name: ansible
log: true
inventory:
hosts:
all:
hosts:
instance:
ansible_python_interpreter: /usr/bin/python3
env:
ANSIBLE_STDOUT_CALLBACK: yaml
config_options:
defaults:
remote_tmp: /tmp
60 changes: 60 additions & 0 deletions roles/os_must_gather/molecule/timeout/prepare.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
---
# Copyright Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.


- name: Prepare timeout test environment
hosts: all
tasks:
- name: Create mock bin directory
ansible.builtin.file:
path: "{{ ansible_user_dir }}/mock-bin"
state: directory
mode: '0755'

- name: Create mock oc command that simulates timeout
ansible.builtin.copy:
dest: "{{ ansible_user_dir }}/mock-bin/oc"
mode: '0755'
content: |
#!/bin/bash
echo "Mock oc adm must-gather starting..."
echo "Command: $@"

# Sleep to simulate some work being done
sleep 2

# Simulate timeout by exiting with code 124 (timeout command's exit code)
echo "Simulating timeout..."
exit 124

- name: Create fake kubeconfig
ansible.builtin.copy:
dest: "{{ ansible_user_dir }}/fake-kubeconfig"
mode: '0644'
content: |
apiVersion: v1
kind: Config
current-context: fake

- name: Create output directories
ansible.builtin.file:
path: "{{ item }}"
state: directory
mode: '0755'
loop:
- "{{ ansible_user_dir }}/test-output"
- "{{ ansible_user_dir }}/test-output/artifacts"
- "{{ ansible_user_dir }}/test-output/logs/openstack-must-gather"
57 changes: 57 additions & 0 deletions roles/os_must_gather/molecule/timeout/verify.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
---
# Copyright Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.


- name: Verify timeout handling
hosts: all
tasks:
- name: Check that ci_script logs directory exists
ansible.builtin.stat:
path: "{{ ansible_user_dir }}/test-output/logs"
register: logs_dir

- name: Assert logs directory was created
ansible.builtin.assert:
that:
- logs_dir.stat.exists
- logs_dir.stat.isdir
fail_msg: "Logs directory not found"
success_msg: "Logs directory exists"

- name: Find must-gather script log
ansible.builtin.find:
paths: "{{ ansible_user_dir }}/test-output/logs"
patterns: "ci_script_*_run_openstack_must_gather.log"
register: must_gather_logs

- name: Assert must-gather log was created
ansible.builtin.assert:
that:
- must_gather_logs.matched > 0
fail_msg: "Must-gather log file not found"
success_msg: "Must-gather log file exists"

- name: Read must-gather log
ansible.builtin.slurp:
src: "{{ must_gather_logs.files[0].path }}"
register: log_content
when: must_gather_logs.matched > 0

- name: Verify timeout message in log
ansible.builtin.assert:
that:
- "'The must gather command did not finish on time!' in (log_content.content | b64decode)"
when: must_gather_logs.matched > 0
85 changes: 35 additions & 50 deletions roles/os_must_gather/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
environment:
KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}"
PATH: "{{ cifmw_path }}"
SOS_EDPM: "all"
SOS_EDPM: "{{ cifmw_os_must_gather_sos_edpm }}"
SOS_DECOMPRESS: "0"
OPENSTACK_DATABASES: "{{ cifmw_os_must_gather_dump_db }}"
OMC: "{{ cifmw_os_must_gather_omc }}"
Expand All @@ -79,14 +79,40 @@
SOS_EDPM=$SOS_EDPM
OMC=$OMC
SOS_DECOMPRESS=$SOS_DECOMPRESS
gather
2>&1 || {
gather || {
rc=$?
if [ $rc -eq 124 ]; then
echo "The must gather command did not finish on time!"
echo "{{ shell_cmd_timeout }} seconds was not enough to finish the task."
fi
exit $rc
}
register: _must_gather_result

always:
- name: Log openstack-must-gather failure
when:
- _must_gather_result is defined
- _must_gather_result.rc is defined
- _must_gather_result.rc != 0
ansible.builtin.debug:
msg: "OpenStack must-gather failed with rc={{ _must_gather_result.rc }}"

- name: Run fallback generic must-gather command without SOS report when timed out
when:
- _must_gather_result is defined
- _must_gather_result.rc is defined
- _must_gather_result.rc == 124
environment:
KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}"
PATH: "{{ cifmw_path }}"
ansible.builtin.command:
cmd: >-
timeout {{ (cifmw_os_must_gather_timeout | community.general.to_seconds) + 120 }}
oc adm must-gather
--dest-dir {{ cifmw_os_must_gather_output_log_dir }}
--timeout {{ cifmw_os_must_gather_timeout }}
--volume-percentage={{ cifmw_os_must_gather_volume_percentage }}

- name: Find existing os-must-gather directories
ansible.builtin.find:
Expand All @@ -95,52 +121,11 @@
depth: 1
register: _os_gather_latest_dir

- name: Create a symlink to newest os-must-gather directory
ansible.builtin.file:
src: "{{ (_os_gather_latest_dir.files | sort(attribute='mtime', reverse=True) | first).path | basename }}"
dest: "{{ cifmw_os_must_gather_output_log_dir }}/latest"
state: link

rescue:
- name: Openstack-must-gather failure
- name: Symlink to newest log folder and run top commands
when: _os_gather_latest_dir.files | length > 0
block:
- name: Log openstack-must-gather failure
ansible.builtin.debug:
msg: "OpenStack must-gather failed, running fallback generic must-gather"

- name: Run fallback generic must-gather command
environment:
KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}"
PATH: "{{ cifmw_path }}"
ansible.builtin.command:
cmd: >-
timeout {{ (cifmw_os_must_gather_timeout | community.general.to_seconds) + 120 }}
oc adm must-gather
--dest-dir {{ ansible_user_dir }}/ci-framework-data/must-gather
--timeout {{ cifmw_os_must_gather_timeout }}
--volume-percentage={{ cifmw_os_must_gather_volume_percentage }}
always:
- name: Create oc_inspect log directory
- name: Create a symlink to newest os-must-gather directory
ansible.builtin.file:
path: "{{ cifmw_os_must_gather_output_dir }}/logs/oc_inspect"
state: directory
mode: "0755"

- name: Inspect the cluster after must-gather failure
ignore_errors: true # noqa: ignore-errors
environment:
KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}"
PATH: "{{ cifmw_path }}"
cifmw.general.ci_script:
output_dir: "{{ cifmw_os_must_gather_output_dir }}/artifacts"
script: |
oc adm inspect namespace/{{ item }} --dest-dir={{ cifmw_os_must_gather_output_dir }}/logs/oc_inspect
loop: >-
{{
(
cifmw_os_must_gather_namespaces | default([]) +
(
cifmw_os_must_gather_additional_namespaces | split(',') | list
)
) | unique
}}
src: "{{ (_os_gather_latest_dir.files | sort(attribute='mtime', reverse=True) | first).path | basename }}"
dest: "{{ cifmw_os_must_gather_output_log_dir }}/latest"
state: link
Loading