From 5f6f42f71cfe363acb2b09d4036a6795cd8fe8ff Mon Sep 17 00:00:00 2001 From: Bohdan Dobrelia Date: Mon, 20 Apr 2026 12:54:09 +0200 Subject: [PATCH] [ci_local_storage] Fall back to oc debug for PV dir creation When no Ansible inventory host matches a k8s node hostname (e.g. bare metal SNO where the node is not SSH-accessible), the role silently skips directory creation while still creating PVs that reference non-existent paths. Add an oc debug fallback that creates directories on each node via a debug pod. Generated-by: claude-4.6-opus-high Signed-off-by: Bohdan Dobrelia --- docs/dictionary/en-custom.txt | 1 + roles/ci_local_storage/README.md | 18 ++++++ roles/ci_local_storage/defaults/main.yml | 1 + .../molecule/default/converge.yml | 62 +++++++++++++++++++ roles/ci_local_storage/tasks/cleanup.yml | 5 ++ roles/ci_local_storage/tasks/main.yml | 5 ++ .../tasks/uncovered_node_dirs.yml | 43 +++++++++++++ 7 files changed, 135 insertions(+) create mode 100644 roles/ci_local_storage/tasks/uncovered_node_dirs.yml diff --git a/docs/dictionary/en-custom.txt b/docs/dictionary/en-custom.txt index a45fddfc9..afd227276 100644 --- a/docs/dictionary/en-custom.txt +++ b/docs/dictionary/en-custom.txt @@ -14,6 +14,7 @@ LLM MachineConfig NICs NodeHealthCheck +PV RHCOS SNO Sinha diff --git a/roles/ci_local_storage/README.md b/roles/ci_local_storage/README.md index 39be46e36..f8ca27be4 100644 --- a/roles/ci_local_storage/README.md +++ b/roles/ci_local_storage/README.md @@ -17,9 +17,12 @@ If apply, please explain the privilege escalation done in this role. * `cifmw_cls_create_ee_storage`: (Bool) Param to create ee_storage. Defaults to `false`. * `cifmw_cls_namespace`: (String) The namespace where OCP resources will be installed. Defaults to `openstack`. * `cifmw_cls_action`: (String) Action to perform, can be `create` or `clean`. Defaults to `create`. +* `cifmw_cls_oc_debug_fallback`: (Bool) Use `oc debug node/` to create PV directories on k8s nodes that are not reachable via SSH from the Ansible inventory. When enabled, the role computes which k8s nodes have no matching SSH-reachable inventory host and falls back to `oc debug` for those nodes. Applies to both create and cleanup. Defaults to `false`. Use it with an SNO BM setup. * `cifmw_cls_storage_manifest`: (Dict) The storage manifest resource to be used to initiate storage class. ## Examples + +### Standard (CRC / VM-based) ```YAML - hosts: localhost vars: @@ -32,3 +35,18 @@ If apply, please explain the privilege escalation done in this role. - ansible.builtin.include_role: name: ci_local_storage ``` + +### Baremetal SNO +On bare-metal Single Node OpenShift the k8s node is typically not present +in the Ansible inventory for SSH access. Enable `cifmw_cls_oc_debug_fallback` +so the role uses `oc debug node/` to manage PV directories instead: +```YAML + - hosts: localhost + vars: + cifmw_openshift_kubeconfig: "{{ ansible_user_dir }}/.kube/kubeconfig" + cifmw_cls_pv_count: 20 + cifmw_cls_oc_debug_fallback: true + tasks: + - ansible.builtin.include_role: + name: ci_local_storage +``` diff --git a/roles/ci_local_storage/defaults/main.yml b/roles/ci_local_storage/defaults/main.yml index c10e036e3..854c15e8c 100644 --- a/roles/ci_local_storage/defaults/main.yml +++ b/roles/ci_local_storage/defaults/main.yml @@ -28,6 +28,7 @@ cifmw_cls_storage_provisioner: cifmw cifmw_cls_create_ee_storage: false cifmw_cls_namespace: openstack cifmw_cls_action: create +cifmw_cls_oc_debug_fallback: false cifmw_cls_storage_manifest: kind: StorageClass diff --git a/roles/ci_local_storage/molecule/default/converge.yml b/roles/ci_local_storage/molecule/default/converge.yml index 74526bc50..264278fca 100644 --- a/roles/ci_local_storage/molecule/default/converge.yml +++ b/roles/ci_local_storage/molecule/default/converge.yml @@ -137,3 +137,65 @@ }} ansible.builtin.assert: that: "cifmw_cls_namespace not in ns_names" + + - name: Test oc-debug fallback path (uncovered_node_dirs.yml) + vars: + cifmw_cls_pv_count: 3 + cifmw_cls_local_storage_name: /mnt/openstack-fallback + cifmw_cls_oc_debug_fallback: true + block: + - name: Get k8s node names for fallback test + kubernetes.core.k8s_info: + kubeconfig: "{{ cifmw_openshift_kubeconfig }}" + kind: Node + register: _fb_k8s_nodes + + - name: Simulate no SSH-reachable hosts matching k8s nodes + ansible.builtin.set_fact: + cifmw_ci_local_storage_k8s_hostnames: + - "{{ _fb_k8s_nodes.resources[0].metadata.name }}" + _hostnames: + results: [] + + - name: Run uncovered_node_dirs.yml (create) + vars: + cifmw_cls_action: create + ansible.builtin.include_tasks: + file: "{{ playbook_dir }}/../../tasks/uncovered_node_dirs.yml" + + - name: Assert uncovered nodes were identified + ansible.builtin.assert: + that: + - _cls_uncovered_nodes | length == 1 + + - name: Verify directories created on node + delegate_to: crc + become: true + register: _fb_check + ansible.builtin.stat: + path: "/mnt/openstack-fallback/pv{{ '%02d' | format(item | int) }}" + loop: "{{ range(1, 4) }}" + + - name: Assert all fallback directories exist + ansible.builtin.assert: + that: item.stat.exists + loop: "{{ _fb_check.results }}" + loop_control: + label: "{{ item.stat.path }}" + + - name: Run uncovered_node_dirs.yml (cleanup) + vars: + cifmw_cls_action: clean + ansible.builtin.include_tasks: + file: "{{ playbook_dir }}/../../tasks/uncovered_node_dirs.yml" + + - name: Verify fallback directories removed + delegate_to: crc + become: true + register: _fb_removed + ansible.builtin.stat: + path: "/mnt/openstack-fallback" + + - name: Assert fallback directory tree is gone + ansible.builtin.assert: + that: not _fb_removed.stat.exists diff --git a/roles/ci_local_storage/tasks/cleanup.yml b/roles/ci_local_storage/tasks/cleanup.yml index 111470e98..cadc83450 100644 --- a/roles/ci_local_storage/tasks/cleanup.yml +++ b/roles/ci_local_storage/tasks/cleanup.yml @@ -55,6 +55,11 @@ loop_control: loop_var: host +- name: Delete PV directories on nodes unreachable via SSH + when: + - cifmw_cls_oc_debug_fallback | bool + ansible.builtin.include_tasks: uncovered_node_dirs.yml + - name: Remove the cifmw_cls_namespace namespace kubernetes.core.k8s: state: absent diff --git a/roles/ci_local_storage/tasks/main.yml b/roles/ci_local_storage/tasks/main.yml index 169f58161..02fdd96da 100644 --- a/roles/ci_local_storage/tasks/main.yml +++ b/roles/ci_local_storage/tasks/main.yml @@ -52,6 +52,11 @@ loop_control: loop_var: host +- name: Manage PV directories on nodes unreachable via SSH + when: + - cifmw_cls_oc_debug_fallback | bool + ansible.builtin.include_tasks: uncovered_node_dirs.yml + - name: Generate pv related storage manifest file ansible.builtin.template: src: storage.yaml.j2 diff --git a/roles/ci_local_storage/tasks/uncovered_node_dirs.yml b/roles/ci_local_storage/tasks/uncovered_node_dirs.yml new file mode 100644 index 000000000..59ea4255b --- /dev/null +++ b/roles/ci_local_storage/tasks/uncovered_node_dirs.yml @@ -0,0 +1,43 @@ +--- +- name: Identify k8s nodes not reachable via SSH (SNO BM) + vars: + _ssh_covered: >- + {{ + _hostnames.results | + default([]) | + selectattr('stdout', 'defined') | + map(attribute='stdout') | + list + }} + ansible.builtin.set_fact: + _cls_uncovered_nodes: >- + {{ + cifmw_ci_local_storage_k8s_hostnames | + difference(_ssh_covered) + }} + +- name: Manage PV directories via oc debug for unreachable nodes + when: + - _cls_uncovered_nodes | length > 0 + vars: + _action_script: >- + {% if cifmw_cls_action == 'create' %} + for i in $(seq -w 1 {{ cifmw_cls_pv_count | int }}); + do mkdir -p '{{ cifmw_cls_local_storage_name }}'/pv$i && + chmod 0775 '{{ cifmw_cls_local_storage_name }}'/pv$i; done + {% else %} + rm -rf '{{ cifmw_cls_local_storage_name }}' + {% endif %} + ansible.builtin.command: + cmd: >- + oc debug node/{{ node_name }} + --kubeconfig={{ cifmw_openshift_kubeconfig }} + -- chroot /host bash -c "{{ _action_script }}" + changed_when: false + register: _cls_oc_debug + retries: 3 + delay: 10 + until: _cls_oc_debug.rc == 0 + loop: "{{ _cls_uncovered_nodes }}" + loop_control: + loop_var: node_name