diff --git a/src/connectedk8s/azext_connectedk8s/_constants.py b/src/connectedk8s/azext_connectedk8s/_constants.py index e6be92a8fbc..03f327f262d 100644 --- a/src/connectedk8s/azext_connectedk8s/_constants.py +++ b/src/connectedk8s/azext_connectedk8s/_constants.py @@ -135,6 +135,8 @@ Diagnoser_Container_Check_Failed_Fault_Type = "Error occured while performing the diagnoser container checks" Cluster_DNS_Check_Fault_Type = "Error occured while performing cluster DNS check" Outbound_Connectivity_Check_Fault_Type = "Error occured while performing outbound connectivity check in the cluster" +Outbound_Connectivity_Failed_Fault_Type = "Outbound network connectivity failed in cluster diagnostic checks" +DNS_Failed_Fault_Type = "DNS resolution failed in cluster diagnostic checks" MSI_Cert_Check_Fault_Type = "Error occurred while trying to perform MSI ceritificate presence check" Cluster_Security_Policy_Check_Fault_Type = "Error occured while performing cluster security policy check" KAP_Cert_Check_Fault_Type = "Error occurred while trying to perform KAP ceritificate presence check" @@ -167,6 +169,9 @@ Arc_Agents_Logs = "arc_agents_logs" Arc_Deployment_Logs = "arc_deployment_logs" Arc_Diagnostic_Logs = "arc_diagnostic_logs" +Pre_Onboarding_Check_Logs = "pre_onboarding_check_logs" +Pre_Onboarding_Helm_Charts_Folder_Name = 'PreOnboardingChecksCharts' +Pre_Onboarding_Helm_Charts_Release_Name = 'cluster-diagnostic-checks' Describe_Non_Ready_Arc_Agents = "describe_non_ready_arc_agents" Agent_State = "agent_state.txt" Arc_Agents_Events = "arc_agent_events.txt" @@ -176,7 +181,14 @@ K8s_Cluster_Info = "k8s_cluster_info.txt" Outbound_Network_Connectivity_Check = "outbound_network_connectivity_check.txt" Events_of_Incomplete_Diagnoser_Job = "diagnoser_failure_events.txt" - +# Connect Precheck Diagnoser constants +Cluster_Diagnostic_Checks_Job_Registry_Path = "mcr.microsoft.com/azurearck8s/helmchart/stable/clusterdiagnosticchecks:0.1.0" +Cluster_Diagnostic_Checks_Helm_Install_Failed_Fault_Type = "Error while installing cluster diagnostic checks helm release" +Cluster_Diagnostic_Checks_Execution_Failed_Fault_Type = "Error occured while executing cluster diagnostic checks" +Cluster_Diagnostic_Checks_Release_Cleanup_Failed = "Error occured while cleaning up the cluster diagnostic checks helm release" +Cluster_Diagnostic_Checks_Job_Not_Scheduled = 'Unable to schedule cluster-diagnostic-checks job' +Cluster_Diagnostic_Checks_Job_Not_Complete = 'Unable to complete cluster-diagnostic-checks job after scheduling' +Pre_Onboarding_Diagnostic_Checks_Execution_Failed = 'Exception occured while trying to execute pre-onboarding diagnostic checks' # Diagnostic Results Name Outbound_Connectivity_Check_Result_String = "Outbound Network Connectivity Result:" DNS_Check_Result_String = "DNS Result:" diff --git a/src/connectedk8s/azext_connectedk8s/_precheckutils.py b/src/connectedk8s/azext_connectedk8s/_precheckutils.py new file mode 100644 index 00000000000..a44a03fe836 --- /dev/null +++ b/src/connectedk8s/azext_connectedk8s/_precheckutils.py @@ -0,0 +1,221 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +import os +import shutil +import subprocess +from subprocess import Popen, PIPE +import time +import requests +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry +import json +from kubernetes import client, config, watch, utils +from knack.util import CLIError +from knack.log import get_logger +from knack.prompting import NoTTYException, prompt_y_n +from azure.cli.core.commands.client_factory import get_subscription_id +from azure.cli.core.util import send_raw_request +from azure.cli.core import telemetry +from azure.core.exceptions import ResourceNotFoundError, HttpResponseError +from msrest.exceptions import AuthenticationError, HttpOperationError, TokenExpiredError +from msrest.exceptions import ValidationError as MSRestValidationError +from kubernetes.client.rest import ApiException +from azext_connectedk8s._client_factory import _resource_client_factory, _resource_providers_client +import azext_connectedk8s._constants as consts +import azext_connectedk8s._utils as azext_utils +from kubernetes import client as kube_client +from azure.cli.core import get_default_cli +from azure.cli.core.azclierror import CLIInternalError, ClientRequestError, ArgumentUsageError, ManualInterrupt, AzureResponseError, AzureInternalError, ValidationError +from argparse import Namespace +from pydoc import cli +from logging import exception +import yaml +import json +import datetime +from subprocess import Popen, PIPE, run, STDOUT, call, DEVNULL +import shutil +from knack.log import get_logger +from azure.cli.core import telemetry +import azext_connectedk8s._constants as consts +logger = get_logger(__name__) +# pylint: disable=unused-argument, too-many-locals, too-many-branches, too-many-statements, line-too-long +# pylint: disable + +diagnoser_output = [] + + +def fetch_diagnostic_checks_results(corev1_api_instance, batchv1_api_instance, helm_client_location, kubectl_client_location, kube_config, kube_context, location, http_proxy, https_proxy, no_proxy, proxy_cert, azure_cloud, filepath_with_timestamp, storage_space_available): + global diagnoser_output + try: + # Setting DNS and Outbound Check as working + dns_check = "Starting" + outbound_connectivity_check = "Starting" + # Executing the cluster_diagnostic_checks job and fetching the logs obtained + cluster_diagnostic_checks_container_log = executing_cluster_diagnostic_checks_job(corev1_api_instance, batchv1_api_instance, helm_client_location, kubectl_client_location, kube_config, kube_context, location, http_proxy, https_proxy, no_proxy, proxy_cert, azure_cloud) + # If cluster_diagnostic_checks_container_log is not empty then only we will check for the results + if(cluster_diagnostic_checks_container_log is not None and cluster_diagnostic_checks_container_log != ""): + cluster_diagnostic_checks_container_log_list = cluster_diagnostic_checks_container_log.split("\n") + cluster_diagnostic_checks_container_log_list.pop(-1) + dns_check_log = "" + counter_container_logs = 1 + # For retrieving only cluster_diagnostic_checks logs from the output + for outputs in cluster_diagnostic_checks_container_log_list: + if consts.Outbound_Connectivity_Check_Result_String in outputs: + counter_container_logs = 1 + elif consts.DNS_Check_Result_String in outputs: + dns_check_log += outputs + counter_container_logs = 0 + elif counter_container_logs == 0: + dns_check_log += " " + outputs + dns_check, storage_space_available = azext_utils.check_cluster_DNS(dns_check_log, filepath_with_timestamp, storage_space_available, diagnoser_output) + outbound_connectivity_check, storage_space_available = azext_utils.check_cluster_outbound_connectivity(cluster_diagnostic_checks_container_log_list[-1], filepath_with_timestamp, storage_space_available, diagnoser_output) + else: + return consts.Diagnostic_Check_Incomplete, storage_space_available + + # If both the check passed then we will return cluster diagnostic checks Passed + if(dns_check == consts.Diagnostic_Check_Passed and outbound_connectivity_check == consts.Diagnostic_Check_Passed): + return consts.Diagnostic_Check_Passed, storage_space_available + # If any of the check remain Incomplete than we will return Incomplete + elif(dns_check == consts.Diagnostic_Check_Incomplete or outbound_connectivity_check == consts.Diagnostic_Check_Incomplete): + return consts.Diagnostic_Check_Incomplete, storage_space_available + else: + return consts.Diagnostic_Check_Failed, storage_space_available + + # To handle any exception that may occur during the execution + except Exception as e: + logger.warning("An exception has occured while trying to execute cluster diagnostic checks container on the cluster. Exception: {}".format(str(e)) + "\n") + telemetry.set_exception(exception=e, fault_type=consts.Cluster_Diagnostic_Checks_Execution_Failed_Fault_Type, summary="Error occured while executing the cluster diagnostic checks container") + + return consts.Diagnostic_Check_Incomplete, storage_space_available + + +def executing_cluster_diagnostic_checks_job(corev1_api_instance, batchv1_api_instance, helm_client_location, kubectl_client_location, kube_config, kube_context, location, http_proxy, https_proxy, no_proxy, proxy_cert, azure_cloud): + job_name = "cluster-diagnostic-checks-job" + # Setting the log output as Empty + cluster_diagnostic_checks_container_log = "" + + cmd_helm_delete = [helm_client_location, "uninstall", "cluster-diagnostic-checks", "-n", "azure-arc-release"] + if kube_config: + cmd_helm_delete.extend(["--kubeconfig", kube_config]) + if kube_context: + cmd_helm_delete.extend(["--kube-context", kube_context]) + + # To handle the user keyboard Interrupt + try: + # Executing the cluster diagnostic checks job yaml + config.load_kube_config(kube_config, kube_context) + # Attempting deletion of cluster diagnostic checks resources to handle the scenario if any stale resources are present + response_kubectl_delete_helm = Popen(cmd_helm_delete, stdout=PIPE, stderr=PIPE) + output_kubectl_delete_helm, error_kubectl_delete_helm = response_kubectl_delete_helm.communicate() + # If any error occured while execution of delete command + if (response_kubectl_delete_helm != 0): + # Converting the string of multiple errors to list + error_msg_list = error_kubectl_delete_helm.decode("ascii").split("\n") + error_msg_list.pop(-1) + valid_exception_list = [] + # Checking if any exception occured or not + exception_occured_counter = 0 + for ind_errors in error_msg_list: + if('not found' in ind_errors or 'deleted' in ind_errors): + pass + else: + valid_exception_list.append(ind_errors) + exception_occured_counter = 1 + # If any exception occured we will print the exception and return + if exception_occured_counter == 1: + logger.warning("Cleanup of previous diagnostic checks helm release failed and hence couldn't install the new helm release. Please cleanup older release using \"helm delete cluster-diagnostic-checks -n azuer-arc-release\" and try onboarding again") + telemetry.set_exception(exception=error_kubectl_delete_helm.decode("ascii"), fault_type=consts.Cluster_Diagnostic_Checks_Release_Cleanup_Failed, summary="Error while executing cluster diagnostic checks Job") + return + + chart_path = azext_utils.get_chart_path(consts.Cluster_Diagnostic_Checks_Job_Registry_Path, kube_config, kube_context, helm_client_location, consts.Pre_Onboarding_Helm_Charts_Folder_Name, consts.Pre_Onboarding_Helm_Charts_Release_Name) + + helm_install_release_cluster_diagnostic_checks(chart_path, location, http_proxy, https_proxy, no_proxy, proxy_cert, azure_cloud, kube_config, kube_context, helm_client_location) + + # Watching for cluster diagnostic checks container to reach in completed stage + w = watch.Watch() + is_job_complete = False + is_job_scheduled = False + # To watch for changes in the pods states till it reach completed state or exit if it takes more than 180 seconds + for event in w.stream(batchv1_api_instance.list_namespaced_job, namespace='azure-arc-release', label_selector="", timeout_seconds=60): + try: + # Checking if job get scheduled or not + if event["object"].metadata.name == "cluster-diagnostic-checks-job": + is_job_scheduled = True + # Checking if job reached completed stage or not + if event["object"].metadata.name == "cluster-diagnostic-checks-job" and event["object"].status.conditions[0].type == "Complete": + is_job_complete = True + w.stop() + except Exception as e: + continue + else: + continue + + if (is_job_scheduled is False): + telemetry.set_exception(exception="Couldn't schedule cluster diagnostic checks job in the cluster", fault_type=consts.Cluster_Diagnostic_Checks_Job_Not_Scheduled, + summary="Couldn't schedule cluster diagnostic checks job in the cluster") + logger.warning("Unable to schedule the cluster diagnostic checks job in the kubernetes cluster. The possible reasons can be presence of a security policy or security context constraint (SCC) or it may happen becuase of lack of ResourceQuota.\n") + Popen(cmd_helm_delete, stdout=PIPE, stderr=PIPE) + return + elif (is_job_scheduled is True and is_job_complete is False): + telemetry.set_exception(exception="Couldn't complete cluster diagnostic checks job after scheduling in the cluster", fault_type=consts.Cluster_Diagnostic_Checks_Job_Not_Complete, + summary="Couldn't complete cluster diagnostic checks job after scheduling in the cluster") + logger.warning("Cluster diagnostics job didn't reach completed state in the kubernetes cluster. The possible reasons can be resource constraints on the cluster.\n") + Popen(cmd_helm_delete, stdout=PIPE, stderr=PIPE) + return + else: + # Fetching the cluster diagnostic checks Container logs + all_pods = corev1_api_instance.list_namespaced_pod('azure-arc-release') + # Traversing through all agents + for each_pod in all_pods.items: + # Fetching the current Pod name and creating a folder with that name inside the timestamp folder + pod_name = each_pod.metadata.name + if(pod_name.startswith(job_name)): + # Creating a text file with the name of the container and adding that containers logs in it + cluster_diagnostic_checks_container_log = corev1_api_instance.read_namespaced_pod_log(name=pod_name, container="cluster-diagnostic-checks-container", namespace='azure-arc-release') + # Clearing all the resources after fetching the cluster diagnostic checks container logs + Popen(cmd_helm_delete, stdout=PIPE, stderr=PIPE) + + # To handle any exception that may occur during the execution + except Exception as e: + logger.warning("An exception has occured while trying to execute the cluster diagnostic checks in the cluster. Exception: {}".format(str(e)) + "\n") + Popen(cmd_helm_delete, stdout=PIPE, stderr=PIPE) + telemetry.set_exception(exception=e, fault_type=consts.Cluster_Diagnostic_Checks_Execution_Failed_Fault_Type, summary="Error while executing cluster diagnostic checks Job") + return + + return cluster_diagnostic_checks_container_log + + +def helm_install_release_cluster_diagnostic_checks(chart_path, location, http_proxy, https_proxy, no_proxy, proxy_cert, azure_cloud, kube_config, kube_context, helm_client_location, onboarding_timeout="60"): + cmd_helm_install = [helm_client_location, "upgrade", "--install", "cluster-diagnostic-checks", chart_path, "--namespace", "{}".format(consts.Release_Install_Namespace), "--create-namespace", "--output", "json"] + # To set some other helm parameters through file + cmd_helm_install.extend(["--set", "global.location={}".format(location)]) + cmd_helm_install.extend(["--set", "global.azureCloud={}".format(azure_cloud)]) + if https_proxy: + cmd_helm_install.extend(["--set", "global.httpsProxy={}".format(https_proxy)]) + if http_proxy: + cmd_helm_install.extend(["--set", "global.httpProxy={}".format(http_proxy)]) + if no_proxy: + cmd_helm_install.extend(["--set", "global.noProxy={}".format(no_proxy)]) + if proxy_cert: + cmd_helm_install.extend(["--set-file", "global.proxyCert={}".format(proxy_cert)]) + + if kube_config: + cmd_helm_install.extend(["--kubeconfig", kube_config]) + if kube_context: + cmd_helm_install.extend(["--kube-context", kube_context]) + + # Change --timeout format for helm client to understand + onboarding_timeout = onboarding_timeout + "s" + cmd_helm_install.extend(["--wait", "--timeout", "{}".format(onboarding_timeout)]) + + response_helm_install = Popen(cmd_helm_install, stdout=PIPE, stderr=PIPE) + _, error_helm_install = response_helm_install.communicate() + if response_helm_install.returncode != 0: + if ('forbidden' in error_helm_install.decode("ascii") or 'timed out waiting for the condition' in error_helm_install.decode("ascii")): + telemetry.set_user_fault() + telemetry.set_exception(exception=error_helm_install.decode("ascii"), fault_type=consts.Cluster_Diagnostic_Checks_Helm_Install_Failed_Fault_Type, + summary='Unable to install cluster diagnostic checks helm release') + raise CLIInternalError("Unable to install cluster diagnostic checks helm release: " + error_helm_install.decode("ascii")) diff --git a/src/connectedk8s/azext_connectedk8s/_troubleshootutils.py b/src/connectedk8s/azext_connectedk8s/_troubleshootutils.py index 24b18d383dd..d7224406aac 100644 --- a/src/connectedk8s/azext_connectedk8s/_troubleshootutils.py +++ b/src/connectedk8s/azext_connectedk8s/_troubleshootutils.py @@ -16,55 +16,13 @@ from knack.log import get_logger from azure.cli.core import telemetry import azext_connectedk8s._constants as consts +import azext_connectedk8s._utils as azext_utils logger = get_logger(__name__) # pylint: disable=unused-argument, too-many-locals, too-many-branches, too-many-statements, line-too-long diagnoser_output = [] -def create_folder_diagnosticlogs(time_stamp): - - global diagnoser_output - try: - # Fetching path to user directory to create the arc diagnostic folder - home_dir = os.path.expanduser('~') - filepath = os.path.join(home_dir, '.azure', consts.Arc_Diagnostic_Logs) - # Creating Diagnostic folder and its subfolder with the given timestamp and cluster name to store all the logs - try: - os.mkdir(filepath) - except FileExistsError: - pass - filepath_with_timestamp = os.path.join(filepath, time_stamp) - try: - os.mkdir(filepath_with_timestamp) - except FileExistsError: - # Deleting the folder if present with the same timestamp to prevent overriding in the same folder and then creating it again - shutil.rmtree(filepath_with_timestamp, ignore_errors=True) - os.mkdir(filepath_with_timestamp) - pass - - return filepath_with_timestamp, True - - # For handling storage or OS exception that may occur during the execution - except OSError as e: - if "[Errno 28]" in str(e): - shutil.rmtree(filepath_with_timestamp, ignore_errors=False, onerror=None) - telemetry.set_exception(exception=e, fault_type=consts.No_Storage_Space_Available_Fault_Type, summary="No space left on device") - return "", False - else: - logger.warning("An exception has occured while creating the diagnostic logs folder in your local machine. Exception: {}".format(str(e)) + "\n") - telemetry.set_exception(exception=e, fault_type=consts.Diagnostics_Folder_Creation_Failed_Fault_Type, summary="Error while trying to create diagnostic logs folder") - diagnoser_output.append("An exception has occured while creating the diagnostic logs folder in your local machine. Exception: {}".format(str(e)) + "\n") - return "", False - - # To handle any exception that may occur during the execution - except Exception as e: - logger.warning("An exception has occured while creating the diagnostic logs folder in your local machine. Exception: {}".format(str(e)) + "\n") - telemetry.set_exception(exception=e, fault_type=consts.Diagnostics_Folder_Creation_Failed_Fault_Type, summary="Error while trying to create diagnostic logs folder") - diagnoser_output.append("An exception has occured while creating the diagnostic logs folder in your local machine. Exception: {}".format(str(e)) + "\n") - return "", False - - def fetch_kubectl_cluster_info(filepath_with_timestamp, storage_space_available, kubectl_client_location, kube_config, kube_context): global diagnoser_output @@ -493,8 +451,8 @@ def check_diagnoser_container(corev1_api_instance, batchv1_api_instance, filepat counter_container_logs = 0 elif counter_container_logs == 0: dns_check_log += " " + outputs - dns_check, storage_space_available = check_cluster_DNS(dns_check_log, filepath_with_timestamp, storage_space_available) - outbound_connectivity_check, storage_space_available = check_cluster_outbound_connectivity(diagnoser_container_log_list[-1], filepath_with_timestamp, storage_space_available) + dns_check, storage_space_available = azext_utils.check_cluster_DNS(dns_check_log, filepath_with_timestamp, storage_space_available, diagnoser_output) + outbound_connectivity_check, storage_space_available = azext_utils.check_cluster_outbound_connectivity(diagnoser_container_log_list[-1], filepath_with_timestamp, storage_space_available, diagnoser_output) else: return consts.Diagnostic_Check_Incomplete, storage_space_available @@ -737,93 +695,6 @@ def executing_diagnoser_job(corev1_api_instance, batchv1_api_instance, filepath_ return diagnoser_container_log -def check_cluster_DNS(dns_check_log, filepath_with_timestamp, storage_space_available): - - global diagnoser_output - try: - if consts.DNS_Check_Result_String not in dns_check_log: - return consts.Diagnostic_Check_Incomplete, storage_space_available - formatted_dns_log = dns_check_log.replace('\t', '') - # Validating if DNS is working or not and displaying proper result - if("NXDOMAIN" in formatted_dns_log or "connection timed out" in formatted_dns_log): - logger.warning("Error: We found an issue with the DNS resolution on your cluster. For details about debugging DNS issues visit 'https://kubernetes.io/docs/tasks/administer-cluster/dns-debugging-resolution/'.\n") - diagnoser_output.append("Error: We found an issue with the DNS resolution on your cluster. For details about debugging DNS issues visit 'https://kubernetes.io/docs/tasks/administer-cluster/dns-debugging-resolution/'.\n") - if storage_space_available: - dns_check_path = os.path.join(filepath_with_timestamp, consts.DNS_Check) - with open(dns_check_path, 'w+') as dns: - dns.write(formatted_dns_log + "\nWe found an issue with the DNS resolution on your cluster.") - return consts.Diagnostic_Check_Failed, storage_space_available - else: - if storage_space_available: - dns_check_path = os.path.join(filepath_with_timestamp, consts.DNS_Check) - with open(dns_check_path, 'w+') as dns: - dns.write(formatted_dns_log + "\nCluster DNS check passed successfully.") - return consts.Diagnostic_Check_Passed, storage_space_available - - # For handling storage or OS exception that may occur during the execution - except OSError as e: - if "[Errno 28]" in str(e): - storage_space_available = False - telemetry.set_exception(exception=e, fault_type=consts.No_Storage_Space_Available_Fault_Type, summary="No space left on device") - shutil.rmtree(filepath_with_timestamp, ignore_errors=False, onerror=None) - else: - logger.warning("An exception has occured while performing the DNS check on the cluster. Exception: {}".format(str(e)) + "\n") - telemetry.set_exception(exception=e, fault_type=consts.Cluster_DNS_Check_Fault_Type, summary="Error occured while performing cluster DNS check") - diagnoser_output.append("An exception has occured while performing the DNS check on the cluster. Exception: {}".format(str(e)) + "\n") - - # To handle any exception that may occur during the execution - except Exception as e: - logger.warning("An exception has occured while performing the DNS check on the cluster. Exception: {}".format(str(e)) + "\n") - telemetry.set_exception(exception=e, fault_type=consts.Cluster_DNS_Check_Fault_Type, summary="Error occured while performing cluster DNS check") - diagnoser_output.append("An exception has occured while performing the DNS check on the cluster. Exception: {}".format(str(e)) + "\n") - - return consts.Diagnostic_Check_Incomplete, storage_space_available - - -def check_cluster_outbound_connectivity(outbound_connectivity_check_log, filepath_with_timestamp, storage_space_available): - - global diagnoser_output - try: - outbound_connectivity_response = outbound_connectivity_check_log[-1:-4:-1] - outbound_connectivity_response = outbound_connectivity_response[::-1] - if consts.Outbound_Connectivity_Check_Result_String not in outbound_connectivity_check_log: - return consts.Diagnostic_Check_Incomplete, storage_space_available - # Validating if outbound connectiivty is working or not and displaying proper result - if(outbound_connectivity_response != "000"): - if storage_space_available: - outbound_connectivity_check_path = os.path.join(filepath_with_timestamp, consts.Outbound_Network_Connectivity_Check) - with open(outbound_connectivity_check_path, 'w+') as outbound: - outbound.write("Response code " + outbound_connectivity_response + "\nOutbound network connectivity check passed successfully.") - return consts.Diagnostic_Check_Passed, storage_space_available - else: - logger.warning("Error: We found an issue with outbound network connectivity from the cluster.\nIf your cluster is behind an outbound proxy server, please ensure that you have passed proxy parameters during the onboarding of your cluster.\nFor more details visit 'https://docs.microsoft.com/en-us/azure/azure-arc/kubernetes/quickstart-connect-cluster?tabs=azure-cli#connect-using-an-outbound-proxy-server'.\nPlease ensure to meet the following network requirements 'https://docs.microsoft.com/en-us/azure/azure-arc/kubernetes/quickstart-connect-cluster?tabs=azure-cli#meet-network-requirements' \n") - diagnoser_output.append("Error: We found an issue with outbound network connectivity from the cluster.\nIf your cluster is behind an outbound proxy server, please ensure that you have passed proxy parameters during the onboarding of your cluster.\nFor more details visit 'https://docs.microsoft.com/en-us/azure/azure-arc/kubernetes/quickstart-connect-cluster?tabs=azure-cli#connect-using-an-outbound-proxy-server'.\nPlease ensure to meet the following network requirements 'https://docs.microsoft.com/en-us/azure/azure-arc/kubernetes/quickstart-connect-cluster?tabs=azure-cli#meet-network-requirements' \n") - if storage_space_available: - outbound_connectivity_check_path = os.path.join(filepath_with_timestamp, consts.Outbound_Network_Connectivity_Check) - with open(outbound_connectivity_check_path, 'w+') as outbound: - outbound.write("Response code " + outbound_connectivity_response + "\nWe found an issue with Outbound network connectivity from the cluster.") - return consts.Diagnostic_Check_Failed, storage_space_available - - # For handling storage or OS exception that may occur during the execution - except OSError as e: - if "[Errno 28]" in str(e): - storage_space_available = False - telemetry.set_exception(exception=e, fault_type=consts.No_Storage_Space_Available_Fault_Type, summary="No space left on device") - shutil.rmtree(filepath_with_timestamp, ignore_errors=False, onerror=None) - else: - logger.warning("An exception has occured while performing the outbound connectivity check on the cluster. Exception: {}".format(str(e)) + "\n") - telemetry.set_exception(exception=e, fault_type=consts.Outbound_Connectivity_Check_Fault_Type, summary="Error occured while performing outbound connectivity check in the cluster") - diagnoser_output.append("An exception has occured while performing the outbound connectivity check on the cluster. Exception: {}".format(str(e)) + "\n") - - # To handle any exception that may occur during the execution - except Exception as e: - logger.warning("An exception has occured while performing the outbound connectivity check on the cluster. Exception: {}".format(str(e)) + "\n") - telemetry.set_exception(exception=e, fault_type=consts.Outbound_Connectivity_Check_Fault_Type, summary="Error occured while performing outbound connectivity check in the cluster") - diagnoser_output.append("An exception has occured while performing the outbound connectivity check on the cluster. Exception: {}".format(str(e)) + "\n") - - return consts.Diagnostic_Check_Incomplete, storage_space_available - - def check_msi_certificate_presence(corev1_api_instance): global diagnoser_output @@ -997,47 +868,3 @@ def describe_non_ready_agent_log(filepath_with_timestamp, corev1_api_instance, a diagnoser_output.append("An exception has occured while storing stuck agent logs in the user local machine. Exception: {}".format(str(e)) + "\n") return storage_space_available - - -def fetching_cli_output_logs(filepath_with_timestamp, storage_space_available, flag): - - # This function is used to store the output that is obtained throughout the Diagnoser process - global diagnoser_output - try: - # If storage space is available then only we store the output - if storage_space_available: - # Path to store the diagnoser results - cli_output_logger_path = os.path.join(filepath_with_timestamp, consts.Diagnoser_Results) - # If any results are obtained during the process than we will add it to the text file. - if len(diagnoser_output) > 0: - with open(cli_output_logger_path, 'w+') as cli_output_writer: - for output in diagnoser_output: - cli_output_writer.write(output + "\n") - # If flag is 0 that means that process was terminated using the Keyboard Interrupt so adding that also to the text file - if flag == 0: - cli_output_writer.write("Process terminated externally.\n") - - # If no issues was found during the whole troubleshoot execution - elif flag: - with open(cli_output_logger_path, 'w+') as cli_output_writer: - cli_output_writer.write("The diagnoser didn't find any issues on the cluster.\n") - # If process was terminated by user - else: - with open(cli_output_logger_path, 'w+') as cli_output_writer: - cli_output_writer.write("Process terminated externally.\n") - - return consts.Diagnostic_Check_Passed - - # For handling storage or OS exception that may occur during the execution - except OSError as e: - if "[Errno 28]" in str(e): - storage_space_available = False - telemetry.set_exception(exception=e, fault_type=consts.No_Storage_Space_Available_Fault_Type, summary="No space left on device") - shutil.rmtree(filepath_with_timestamp, ignore_errors=False, onerror=None) - - # To handle any exception that may occur during the execution - except Exception as e: - logger.warning("An exception has occured while trying to store the diagnoser results. Exception: {}".format(str(e)) + "\n") - telemetry.set_exception(exception=e, fault_type=consts.Diagnoser_Result_Fault_Type, summary="Error while storing the diagnoser results") - - return consts.Diagnostic_Check_Failed diff --git a/src/connectedk8s/azext_connectedk8s/_utils.py b/src/connectedk8s/azext_connectedk8s/_utils.py index 053ecda4f3f..6efe15b118c 100644 --- a/src/connectedk8s/azext_connectedk8s/_utils.py +++ b/src/connectedk8s/azext_connectedk8s/_utils.py @@ -25,6 +25,8 @@ from kubernetes.client.rest import ApiException from azext_connectedk8s._client_factory import _resource_client_factory, _resource_providers_client import azext_connectedk8s._constants as consts +import azext_connectedk8s._precheckutils as precheckutils +import azext_connectedk8s._troubleshootutils as troubleshootutils from kubernetes import client as kube_client from azure.cli.core import get_default_cli from azure.cli.core.azclierror import CLIInternalError, ClientRequestError, ArgumentUsageError, ManualInterrupt, AzureResponseError, AzureInternalError, ValidationError @@ -69,27 +71,32 @@ def validate_location(cmd, location): break -def get_chart_path(registry_path, kube_config, kube_context, helm_client_location): +def get_chart_path(registry_path, kube_config, kube_context, helm_client_location, chart_folder_name='AzureArcCharts', chart_name='azure-arc-k8sagents'): # Pulling helm chart from registry os.environ['HELM_EXPERIMENTAL_OCI'] = '1' - pull_helm_chart(registry_path, kube_config, kube_context, helm_client_location) + pull_helm_chart(registry_path, kube_config, kube_context, helm_client_location, chart_name) # Exporting helm chart after cleanup - chart_export_path = os.path.join(os.path.expanduser('~'), '.azure', 'AzureArcCharts') + chart_export_path = os.path.join(os.path.expanduser('~'), '.azure', chart_folder_name) try: if os.path.isdir(chart_export_path): shutil.rmtree(chart_export_path) except: - logger.warning("Unable to cleanup the azure-arc helm charts already present on the machine. In case of failure, please cleanup the directory '%s' and try again.", chart_export_path) - export_helm_chart(registry_path, chart_export_path, kube_config, kube_context, helm_client_location) + logger.warning("Unable to cleanup the {} already present on the machine. In case of failure, please cleanup the directory '{}' and try again.".format(chart_folder_name, chart_export_path)) + + export_helm_chart(registry_path, chart_export_path, kube_config, kube_context, helm_client_location, chart_name) # Returning helm chart path - helm_chart_path = os.path.join(chart_export_path, 'azure-arc-k8sagents') - chart_path = os.getenv('HELMCHART') if os.getenv('HELMCHART') else helm_chart_path + helm_chart_path = os.path.join(chart_export_path, chart_name) + if chart_folder_name == consts.Pre_Onboarding_Helm_Charts_Folder_Name: + chart_path = helm_chart_path + else: + chart_path = os.getenv('HELMCHART') if os.getenv('HELMCHART') else helm_chart_path + return chart_path -def pull_helm_chart(registry_path, kube_config, kube_context, helm_client_location): +def pull_helm_chart(registry_path, kube_config, kube_context, helm_client_location, chart_name='azure-arc-k8sagents'): cmd_helm_chart_pull = [helm_client_location, "chart", "pull", registry_path] if kube_config: cmd_helm_chart_pull.extend(["--kubeconfig", kube_config]) @@ -99,11 +106,11 @@ def pull_helm_chart(registry_path, kube_config, kube_context, helm_client_locati _, error_helm_chart_pull = response_helm_chart_pull.communicate() if response_helm_chart_pull.returncode != 0: telemetry.set_exception(exception=error_helm_chart_pull.decode("ascii"), fault_type=consts.Pull_HelmChart_Fault_Type, - summary='Unable to pull helm chart from the registry') - raise CLIInternalError("Unable to pull helm chart from the registry '{}': ".format(registry_path) + error_helm_chart_pull.decode("ascii")) + summary="Unable to pull {} helm charts from the registry".format(chart_name)) + raise CLIInternalError("Unable to pull {} helm chart from the registry '{}': ".format(chart_name, registry_path) + error_helm_chart_pull.decode("ascii")) -def export_helm_chart(registry_path, chart_export_path, kube_config, kube_context, helm_client_location): +def export_helm_chart(registry_path, chart_export_path, kube_config, kube_context, helm_client_location, chart_name='azure-arc-k8sagents'): cmd_helm_chart_export = [helm_client_location, "chart", "export", registry_path, "--destination", chart_export_path] if kube_config: cmd_helm_chart_export.extend(["--kubeconfig", kube_config]) @@ -113,8 +120,181 @@ def export_helm_chart(registry_path, chart_export_path, kube_config, kube_contex _, error_helm_chart_export = response_helm_chart_export.communicate() if response_helm_chart_export.returncode != 0: telemetry.set_exception(exception=error_helm_chart_export.decode("ascii"), fault_type=consts.Export_HelmChart_Fault_Type, - summary='Unable to export helm chart from the registry') - raise CLIInternalError("Unable to export helm chart from the registry '{}': ".format(registry_path) + error_helm_chart_export.decode("ascii")) + summary='Unable to export {} helm chart from the registry'.format(chart_name)) + raise CLIInternalError("Unable to export {} helm chart from the registry '{}': ".format(chart_name, registry_path) + error_helm_chart_export.decode("ascii")) + + +def check_cluster_DNS(dns_check_log, filepath_with_timestamp, storage_space_available, diagnoser_output): + + try: + if consts.DNS_Check_Result_String not in dns_check_log: + return consts.Diagnostic_Check_Incomplete, storage_space_available + formatted_dns_log = dns_check_log.replace('\t', '') + # Validating if DNS is working or not and displaying proper result + if("NXDOMAIN" in formatted_dns_log or "connection timed out" in formatted_dns_log): + logger.warning("Error: We found an issue with the DNS resolution on your cluster. For details about debugging DNS issues visit 'https://kubernetes.io/docs/tasks/administer-cluster/dns-debugging-resolution/'.\n") + diagnoser_output.append("Error: We found an issue with the DNS resolution on your cluster. For details about debugging DNS issues visit 'https://kubernetes.io/docs/tasks/administer-cluster/dns-debugging-resolution/'.\n") + if storage_space_available: + dns_check_path = os.path.join(filepath_with_timestamp, consts.DNS_Check) + with open(dns_check_path, 'w+') as dns: + dns.write(formatted_dns_log + "\nWe found an issue with the DNS resolution on your cluster.") + return consts.Diagnostic_Check_Failed, storage_space_available + else: + if storage_space_available: + dns_check_path = os.path.join(filepath_with_timestamp, consts.DNS_Check) + with open(dns_check_path, 'w+') as dns: + dns.write(formatted_dns_log + "\nCluster DNS check passed successfully.") + return consts.Diagnostic_Check_Passed, storage_space_available + + # For handling storage or OS exception that may occur during the execution + except OSError as e: + if "[Errno 28]" in str(e): + storage_space_available = False + telemetry.set_exception(exception=e, fault_type=consts.No_Storage_Space_Available_Fault_Type, summary="No space left on device") + shutil.rmtree(filepath_with_timestamp, ignore_errors=False, onerror=None) + else: + logger.warning("An exception has occured while performing the DNS check on the cluster. Exception: {}".format(str(e)) + "\n") + telemetry.set_exception(exception=e, fault_type=consts.Cluster_DNS_Check_Fault_Type, summary="Error occured while performing cluster DNS check") + diagnoser_output.append("An exception has occured while performing the DNS check on the cluster. Exception: {}".format(str(e)) + "\n") + + # To handle any exception that may occur during the execution + except Exception as e: + logger.warning("An exception has occured while performing the DNS check on the cluster. Exception: {}".format(str(e)) + "\n") + telemetry.set_exception(exception=e, fault_type=consts.Cluster_DNS_Check_Fault_Type, summary="Error occured while performing cluster DNS check") + diagnoser_output.append("An exception has occured while performing the DNS check on the cluster. Exception: {}".format(str(e)) + "\n") + + return consts.Diagnostic_Check_Incomplete, storage_space_available + + +def check_cluster_outbound_connectivity(outbound_connectivity_check_log, filepath_with_timestamp, storage_space_available, diagnoser_output): + + try: + outbound_connectivity_response = outbound_connectivity_check_log[-1:-4:-1] + outbound_connectivity_response = outbound_connectivity_response[::-1] + if consts.Outbound_Connectivity_Check_Result_String not in outbound_connectivity_check_log: + return consts.Diagnostic_Check_Incomplete, storage_space_available + # Validating if outbound connectiivty is working or not and displaying proper result + if(outbound_connectivity_response != "000"): + if storage_space_available: + outbound_connectivity_check_path = os.path.join(filepath_with_timestamp, consts.Outbound_Network_Connectivity_Check) + with open(outbound_connectivity_check_path, 'w+') as outbound: + outbound.write("Response code " + outbound_connectivity_response + "\nOutbound network connectivity check passed successfully.") + return consts.Diagnostic_Check_Passed, storage_space_available + else: + logger.warning("Error: We found an issue with outbound network connectivity from the cluster.\nIf your cluster is behind an outbound proxy server, please ensure that you have passed proxy parameters during the onboarding of your cluster.\nFor more details visit 'https://docs.microsoft.com/en-us/azure/azure-arc/kubernetes/quickstart-connect-cluster?tabs=azure-cli#connect-using-an-outbound-proxy-server'.\nPlease ensure to meet the following network requirements 'https://docs.microsoft.com/en-us/azure/azure-arc/kubernetes/quickstart-connect-cluster?tabs=azure-cli#meet-network-requirements' \n") + diagnoser_output.append("Error: We found an issue with outbound network connectivity from the cluster.\nIf your cluster is behind an outbound proxy server, please ensure that you have passed proxy parameters during the onboarding of your cluster.\nFor more details visit 'https://docs.microsoft.com/en-us/azure/azure-arc/kubernetes/quickstart-connect-cluster?tabs=azure-cli#connect-using-an-outbound-proxy-server'.\nPlease ensure to meet the following network requirements 'https://docs.microsoft.com/en-us/azure/azure-arc/kubernetes/quickstart-connect-cluster?tabs=azure-cli#meet-network-requirements' \n") + if storage_space_available: + outbound_connectivity_check_path = os.path.join(filepath_with_timestamp, consts.Outbound_Network_Connectivity_Check) + with open(outbound_connectivity_check_path, 'w+') as outbound: + outbound.write("Response code " + outbound_connectivity_response + "\nWe found an issue with Outbound network connectivity from the cluster.") + return consts.Diagnostic_Check_Failed, storage_space_available + + # For handling storage or OS exception that may occur during the execution + except OSError as e: + if "[Errno 28]" in str(e): + storage_space_available = False + telemetry.set_exception(exception=e, fault_type=consts.No_Storage_Space_Available_Fault_Type, summary="No space left on device") + shutil.rmtree(filepath_with_timestamp, ignore_errors=False, onerror=None) + else: + logger.warning("An exception has occured while performing the outbound connectivity check on the cluster. Exception: {}".format(str(e)) + "\n") + telemetry.set_exception(exception=e, fault_type=consts.Outbound_Connectivity_Check_Fault_Type, summary="Error occured while performing outbound connectivity check in the cluster") + diagnoser_output.append("An exception has occured while performing the outbound connectivity check on the cluster. Exception: {}".format(str(e)) + "\n") + + # To handle any exception that may occur during the execution + except Exception as e: + logger.warning("An exception has occured while performing the outbound connectivity check on the cluster. Exception: {}".format(str(e)) + "\n") + telemetry.set_exception(exception=e, fault_type=consts.Outbound_Connectivity_Check_Fault_Type, summary="Error occured while performing outbound connectivity check in the cluster") + diagnoser_output.append("An exception has occured while performing the outbound connectivity check on the cluster. Exception: {}".format(str(e)) + "\n") + + return consts.Diagnostic_Check_Incomplete, storage_space_available + + +def fetching_cli_output_logs(filepath_with_timestamp, storage_space_available, flag, for_preonboarding_checks=False): + + # This function is used to store the output that is obtained throughout the Diagnoser process + if for_preonboarding_checks: + diagnoser_output = precheckutils.diagnoser_output + else: + diagnoser_output = troubleshootutils.diagnoser_output + + try: + # If storage space is available then only we store the output + if storage_space_available: + # Path to store the diagnoser results + cli_output_logger_path = os.path.join(filepath_with_timestamp, consts.Diagnoser_Results) + # If any results are obtained during the process than we will add it to the text file. + if len(diagnoser_output) > 0: + with open(cli_output_logger_path, 'w+') as cli_output_writer: + for output in diagnoser_output: + cli_output_writer.write(output + "\n") + # If flag is 0 that means that process was terminated using the Keyboard Interrupt so adding that also to the text file + if flag == 0: + cli_output_writer.write("Process terminated externally.\n") + + # If no issues was found during the whole troubleshoot execution + elif flag: + with open(cli_output_logger_path, 'w+') as cli_output_writer: + cli_output_writer.write("The diagnoser didn't find any issues on the cluster.\n") + # If process was terminated by user + else: + with open(cli_output_logger_path, 'w+') as cli_output_writer: + cli_output_writer.write("Process terminated externally.\n") + + return consts.Diagnostic_Check_Passed + + # For handling storage or OS exception that may occur during the execution + except OSError as e: + if "[Errno 28]" in str(e): + storage_space_available = False + telemetry.set_exception(exception=e, fault_type=consts.No_Storage_Space_Available_Fault_Type, summary="No space left on device") + shutil.rmtree(filepath_with_timestamp, ignore_errors=False, onerror=None) + + # To handle any exception that may occur during the execution + except Exception as e: + logger.warning("An exception has occured while trying to store the diagnoser results. Exception: {}".format(str(e)) + "\n") + telemetry.set_exception(exception=e, fault_type=consts.Diagnoser_Result_Fault_Type, summary="Error while storing the diagnoser results") + + return consts.Diagnostic_Check_Failed + + +def create_folder_diagnosticlogs(time_stamp, folder_name): + + try: + # Fetching path to user directory to create the arc diagnostic folder + home_dir = os.path.expanduser('~') + filepath = os.path.join(home_dir, '.azure', folder_name) + # Creating Diagnostic folder and its subfolder with the given timestamp and cluster name to store all the logs + try: + os.mkdir(filepath) + except FileExistsError: + pass + filepath_with_timestamp = os.path.join(filepath, time_stamp) + try: + os.mkdir(filepath_with_timestamp) + except FileExistsError: + # Deleting the folder if present with the same timestamp to prevent overriding in the same folder and then creating it again + shutil.rmtree(filepath_with_timestamp, ignore_errors=True) + os.mkdir(filepath_with_timestamp) + pass + + return filepath_with_timestamp, True + + # For handling storage or OS exception that may occur during the execution + except OSError as e: + if "[Errno 28]" in str(e): + shutil.rmtree(filepath_with_timestamp, ignore_errors=False, onerror=None) + telemetry.set_exception(exception=e, fault_type=consts.No_Storage_Space_Available_Fault_Type, summary="No space left on device") + return "", False + else: + logger.warning("An exception has occured while creating the diagnostic logs folder in your local machine. Exception: {}".format(str(e)) + "\n") + telemetry.set_exception(exception=e, fault_type=consts.Diagnostics_Folder_Creation_Failed_Fault_Type, summary="Error while trying to create diagnostic logs folder") + return "", False + + # To handle any exception that may occur during the execution + except Exception as e: + logger.warning("An exception has occured while creating the diagnostic logs folder in your local machine. Exception: {}".format(str(e)) + "\n") + telemetry.set_exception(exception=e, fault_type=consts.Diagnostics_Folder_Creation_Failed_Fault_Type, summary="Error while trying to create diagnostic logs folder") + return "", False def add_helm_repo(kube_config, kube_context, helm_client_location): diff --git a/src/connectedk8s/azext_connectedk8s/custom.py b/src/connectedk8s/azext_connectedk8s/custom.py index 541f1eebf20..69fffe4387a 100644 --- a/src/connectedk8s/azext_connectedk8s/custom.py +++ b/src/connectedk8s/azext_connectedk8s/custom.py @@ -48,6 +48,7 @@ import azext_connectedk8s._utils as utils import azext_connectedk8s._clientproxyutils as clientproxyutils import azext_connectedk8s._troubleshootutils as troubleshootutils +import azext_connectedk8s._precheckutils as precheckutils from glob import glob from .vendored_sdks.models import ConnectedCluster, ConnectedClusterIdentity, ConnectedClusterPatch, ListClusterUserCredentialProperties from .vendored_sdks.preview_2022_10_01.models import ConnectedCluster as ConnectedClusterPreview @@ -137,6 +138,55 @@ def create_connectedk8s(cmd, client, resource_group_name, cluster_name, correlat api_instance = kube_client.CoreV1Api() node_api_response = utils.validate_node_api_response(api_instance, None) + # Pre onboarding checks + try: + kubectl_client_location = install_kubectl_client() + helm_client_location = install_helm_client() + diagnostic_checks = "Failed" + batchv1_api_instance = kube_client.BatchV1Api() + storage_space_available = True + + current_time = time.ctime(time.time()) + time_stamp = "" + for elements in current_time: + if(elements == ' '): + time_stamp += '-' + continue + elif(elements == ':'): + time_stamp += '.' + continue + time_stamp += elements + time_stamp = cluster_name + '-' + time_stamp + + # Generate the diagnostic folder in a given location + filepath_with_timestamp, diagnostic_folder_status = utils.create_folder_diagnosticlogs(time_stamp, consts.Pre_Onboarding_Check_Logs) + + if(diagnostic_folder_status is not True): + storage_space_available = False + + # Performing cluster-diagnostic-checks + diagnostic_checks, storage_space_available = precheckutils.fetch_diagnostic_checks_results(api_instance, batchv1_api_instance, helm_client_location, kubectl_client_location, kube_config, kube_context, location, http_proxy, https_proxy, no_proxy, proxy_cert, azure_cloud, filepath_with_timestamp, storage_space_available) + utils.fetching_cli_output_logs(filepath_with_timestamp, storage_space_available, 1, True) + + except Exception as e: + telemetry.set_exception(exception="An exception has occured while trying to execute pre-onboarding diagnostic checks : {}".format(str(e)), + fault_type=consts.Pre_Onboarding_Diagnostic_Checks_Execution_Failed, summary="An exception has occured while trying to execute pre-onboarding diagnostic checks : {}".format(str(e))) + raise CLIInternalError("An exception has occured while trying to execute pre-onboarding diagnostic checks : {}".format(str(e))) + + # Handling the user manual interrupt + except KeyboardInterrupt: + try: + utils.fetching_cli_output_logs(filepath_with_timestamp, storage_space_available, 0, True) + except Exception as e: + pass + raise ManualInterrupt('Process terminated externally.') + + # If the checks didnt pass then stop the onboarding + if diagnostic_checks != consts.Diagnostic_Check_Passed: + if storage_space_available: + logger.warning("The pre-check result logs logs have been saved at this path:" + filepath_with_timestamp + " .\nThese logs can be attached while filing a support ticket for further assistance.\n") + raise ValidationError("One or more pre-onboarding diagnostic checks failed and hence not proceeding with cluster onboarding. Please resolve them and try onboarding again.") + required_node_exists = check_linux_amd64_node(node_api_response) if not required_node_exists: telemetry.set_user_fault() @@ -2235,7 +2285,7 @@ def troubleshoot(cmd, client, resource_group_name, cluster_name, kube_config=Non time_stamp += elements time_stamp = cluster_name + '-' + time_stamp # Generate the diagnostic folder in a given location - filepath_with_timestamp, diagnostic_folder_status = troubleshootutils.create_folder_diagnosticlogs(time_stamp) + filepath_with_timestamp, diagnostic_folder_status = utils.create_folder_diagnosticlogs(time_stamp, consts.Arc_Diagnostic_Logs) if(diagnostic_folder_status is not True): storage_space_available = False @@ -2315,7 +2365,7 @@ def troubleshoot(cmd, client, resource_group_name, cluster_name, kube_config=Non diagnostic_checks[consts.Diagnoser_Check], storage_space_available = troubleshootutils.check_diagnoser_container(corev1_api_instance, batchv1_api_instance, filepath_with_timestamp, storage_space_available, absolute_path, probable_sufficient_resource_for_agents, helm_client_location, kubectl_client_location, release_namespace, diagnostic_checks[consts.KAP_Security_Policy_Check], kube_config, kube_context) # Adding cli output to the logs - diagnostic_checks[consts.Storing_Diagnoser_Results_Logs] = troubleshootutils.fetching_cli_output_logs(filepath_with_timestamp, storage_space_available, 1) + diagnostic_checks[consts.Storing_Diagnoser_Results_Logs] = utils.fetching_cli_output_logs(filepath_with_timestamp, storage_space_available, 1) # If all the checks passed then display no error found all_checks_passed = True @@ -2336,7 +2386,7 @@ def troubleshoot(cmd, client, resource_group_name, cluster_name, kube_config=Non # Handling the user manual interrupt except KeyboardInterrupt: try: - troubleshootutils.fetching_cli_output_logs(filepath_with_timestamp, storage_space_available, 0) + utils.fetching_cli_output_logs(filepath_with_timestamp, storage_space_available, 0) except Exception as e: pass raise ManualInterrupt('Process terminated externally.')