From 8463c67cf7d3242de862c9fdaeee63809e175ec5 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Thu, 1 Jul 2021 09:06:48 -0700 Subject: [PATCH 01/14] conf test updates --- test/e2e/conformance.yaml | 15 +++ test/e2e/src/core/Dockerfile | 17 ++- test/e2e/src/core/e2e_tests.sh | 142 +++++++++++++++++++++ test/e2e/src/core/setup_failure_handler.py | 18 +++ 4 files changed, 191 insertions(+), 1 deletion(-) create mode 100644 test/e2e/conformance.yaml create mode 100644 test/e2e/src/core/setup_failure_handler.py diff --git a/test/e2e/conformance.yaml b/test/e2e/conformance.yaml new file mode 100644 index 000000000..3c264b12a --- /dev/null +++ b/test/e2e/conformance.yaml @@ -0,0 +1,15 @@ +sonobuoy-config: + driver: Job + plugin-name: azure-arc-ci-conformance + result-format: junit +spec: + image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest02152021 + imagePullPolicy: Always + name: plugin + resources: {} + volumes: + - name: results + emptyDir: {} + volumeMounts: + - mountPath: /tmp/results + name: results \ No newline at end of file diff --git a/test/e2e/src/core/Dockerfile b/test/e2e/src/core/Dockerfile index 9f85bdf4c..cd85aee40 100644 --- a/test/e2e/src/core/Dockerfile +++ b/test/e2e/src/core/Dockerfile @@ -1,11 +1,26 @@ FROM python:3.6 -RUN pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org pytest pytest-xdist filelock requests kubernetes adal msrestazure +RUN pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org pytest pytest-xdist filelock requests kubernetes adal msrestazure RUN curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash \ && helm version +RUN apt-get update && apt-get -y upgrade && \ + apt-get -f -y install curl apt-transport-https lsb-release gnupg python3-pip python-pip && \ + curl -sL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > /etc/apt/trusted.gpg.d/microsoft.asc.gpg && \ + CLI_REPO=$(lsb_release -cs) && \ + echo "deb [arch=amd64] https://packages.microsoft.com/repos/azure-cli/ ${CLI_REPO} main" \ + > /etc/apt/sources.list.d/azure-cli.list && \ + apt-get update && \ + apt-get install -y azure-cli && \ + rm -rf /var/lib/apt/lists/* + +RUN python3 -m pip install junit_xml + +COPY --from=lachlanevenson/k8s-kubectl:v1.20.5 /usr/local/bin/kubectl /usr/local/bin/kubectl + COPY ./core/e2e_tests.sh / +COPY ./core/setup_failure_handler.py / COPY ./core/pytest.ini /e2etests/ COPY ./core/conftest.py /e2etests/ COPY ./core/helper.py /e2etests/ diff --git a/test/e2e/src/core/e2e_tests.sh b/test/e2e/src/core/e2e_tests.sh index 3bfafdce9..67edbb026 100644 --- a/test/e2e/src/core/e2e_tests.sh +++ b/test/e2e/src/core/e2e_tests.sh @@ -1,7 +1,110 @@ #!/bin/sh +set -x +set -e results_dir="${RESULTS_DIR:-/tmp/results}" +function waitForResources { + available=false + max_retries=60 + sleep_seconds=10 + RESOURCETYPE=$1 + NAMESPACE=$2 + RESOURCE=$3 + for i in $(seq 1 $max_retries) + do + if [[ ! $(kubectl wait --for=condition=available ${RESOURCETYPE} ${RESOURCE} --all --namespace ${NAMESPACE}) ]]; then + sleep ${sleep_seconds} + else + available=true + break + fi + done + + echo "$available" +} + +function validateCommonParameters { + if [[ -z "${TENANT_ID}" ]]; then + echo "ERROR: parameter TENANT_ID is required." > ${results_dir}/error + python3 setup_failure_handler.py + fi + if [[ -z "${CLIENT_ID}" ]]; then + echo "ERROR: parameter CLIENT_ID is required." > ${results_dir}/error + python3 setup_failure_handler.py + fi + + if [[ -z "${CLIENT_SECRET}" ]]; then + echo "ERROR: parameter CLIENT_SECRET is required." > ${results_dir}/error + python3 setup_failure_handler.py + fi +} + +function validateArcConfTestParameters { + if [[ -z "${SUBSCRIPTION_ID}" ]]; then + echo "ERROR: parameter SUBSCRIPTION_ID is required." > ${results_dir}/error + python3 setup_failure_handler.py + fi + + if [[ -z "${RESOURCE_GROUP}" ]]; then + echo "ERROR: parameter RESOURCE_GROUP is required." > ${results_dir}/error + python3 setup_failure_handler.py + fi + + if [[ -z "${CLUSTER_NAME}" ]]; then + echo "ERROR: parameter CLUSTER_NAME is required." > ${results_dir}/error + python3 setup_failure_handler.py + fi + + if [[ -z "${CI_ARC_RELEASE_TRAIN}" ]]; then + echo "ERROR: parameter CI_ARC_RELEASE_TRAIN is required." > ${results_dir}/error + python3 setup_failure_handler.py + fi + + if [[ -z "${CI_ARC_VERSION}" ]]; then + echo "ERROR: parameter CI_ARC_VERSION is required." > ${results_dir}/error + python3 setup_failure_handler.py + fi + + if [[ -z "${CI_TEST_BRANCH}" ]]; then + echo "ERROR: parameter CI_TEST_BRANCH is required." > ${results_dir}/error + python3 setup_failure_handler.py + fi +} + +function addArcK8sCLIExtension { + az extension add --name k8s-extension 2> ${results_dir}/error || python3 setup_failure_handler.py +} + +function createArcCIExtension { + az k8s-extension create \ + --cluster-name $CLUSTER_NAME \ + --resource-group $RESOURCE_GROUP \ + --cluster-type connectedClusters \ + --extension-type Microsoft.AzureMonitor.Containers \ + --subscription $SUBSCRIPTION_ID \ + --scope cluster \ + --release-train $CI_ARC_RELEASE_TRAIN \ + --name azuremonitor-containers \ + --version $CI_ARC_VERSION 2> ${results_dir}/error || python3 setup_failure_handler.py +} + +function deleteArcCIExtension { + az k8s-extension delete --name azuremonitor-containers \ + --cluster-type connectedClusters \ + --cluster-name $CLUSTER_NAME \ + --resource-group $RESOURCE_GROUP || python3 setup_failure_handler.py +} + +function login_to_azure { + # Login with service principal + az login --service-principal \ + -u ${CLIENT_ID} \ + -p ${CLIENT_SECRET} \ + --tenant ${TENANT_ID} 2> ${results_dir}/error || python3 setup_failure_handler.py +} + + # saveResults prepares the results for handoff to the Sonobuoy worker. # See: https://github.com/vmware-tanzu/sonobuoy/blob/master/docs/plugins.md saveResults() { @@ -17,6 +120,42 @@ saveResults() { # Ensure that we tell the Sonobuoy worker we are done regardless of results. trap saveResults EXIT +# validate common params +validateCommonParameters + +# validate params +validateArcConfTestParameters + +# login to azure +login_to_azure + +# Wait for resources in ARC ns +waitSuccessArc="$(waitForResources deployment azure-arc)" +if [ "${waitSuccessArc}" == false ]; then + echo "deployment is not avilable in namespace - azure-arc" + exit 1 +fi + +# add CLI extension +addArcK8sCLIExtension + +# add ARC K8s container insights extension +createArcCIExtension + +# Wait for deployment resources in kube-system ns +waitSuccessArc="$(waitForResources deployment omsagent-rs kube-system)" +if [ "${waitSuccessArc}" == false ]; then + echo "omsagent-rs deployment is not avilable in namespace - kube-system" + exit 1 +fi + +# Wait for ds resources in kube-system ns +# waitSuccessArc="$(waitForResources ds omsagent kube-system)" +# if [ "${waitSuccessArc}" == false ]; then +# echo "omsagent is not avilable in namespace - kube-system" +# exit 1 +# fi + # The variable 'TEST_LIST' should be provided if we want to run specific tests. If not provided, all tests are run NUM_PROCESS=$(pytest /e2etests/ --collect-only -k "$TEST_NAME_LIST" -m "$TEST_MARKER_LIST" | grep " Date: Sun, 4 Jul 2021 10:07:46 -0700 Subject: [PATCH 02/14] clean up --- test/e2e/e2e-tests.yaml | 19 +++++++------ test/e2e/src/common/constants.py | 2 +- test/e2e/src/core/e2e_tests.sh | 38 ++++++++++++++----------- test/e2e/src/tests/test_rs_workflows.py | 6 ++-- 4 files changed, 36 insertions(+), 29 deletions(-) diff --git a/test/e2e/e2e-tests.yaml b/test/e2e/e2e-tests.yaml index 06dfa1fb0..50ac49d2e 100644 --- a/test/e2e/e2e-tests.yaml +++ b/test/e2e/e2e-tests.yaml @@ -68,7 +68,7 @@ data: containers: [] restartPolicy: Never serviceAccountName: sonobuoy-serviceaccount - nodeSelector: + nodeSelector: kubernetes.io/os: linux tolerations: - effect: NoSchedule @@ -84,8 +84,11 @@ data: result-format: junit spec: env: + # this should be false if the test environment is ARC K8s + - name: IS_NON_ARC_K8S_TEST_ENVIRONMENT + value: "true" # Update values of CLIENT_ID, CLIENT_SECRET of the service principal which has permission to query LA ad Metrics API - # Update value of TENANT_ID corresponding your Azure Service principal + # Update value of TENANT_ID corresponding your Azure Service principal - name: CLIENT_ID value: "SP_CLIENT_ID_VALUE" - name: CLIENT_SECRET @@ -93,14 +96,14 @@ data: - name: TENANT_ID value: "SP_TENANT_ID_VALUE" - name: DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES - value: "10" + value: "10" - name: DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES - value: "10" + value: "10" - name: AGENT_POD_EXPECTED_RESTART_COUNT - value: "0" + value: "0" - name: AZURE_CLOUD - value: "AZURE_PUBLIC_CLOUD" - # image tag should be updated if new tests being added after this image + value: "AZURE_PUBLIC_CLOUD" + # image tag should be updated if new tests being added after this image image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest02152021 imagePullPolicy: IfNotPresent name: plugin @@ -144,7 +147,7 @@ spec: name: output-volume restartPolicy: Never serviceAccountName: sonobuoy-serviceaccount - nodeSelector: + nodeSelector: kubernetes.io/os: linux tolerations: - key: "kubernetes.io/e2e-evict-taint-key" diff --git a/test/e2e/src/common/constants.py b/test/e2e/src/common/constants.py index 770964cb5..076440d55 100644 --- a/test/e2e/src/common/constants.py +++ b/test/e2e/src/common/constants.py @@ -48,7 +48,7 @@ AGENT_DEPLOYMENT_PODS_LABEL_SELECTOR = 'rsName=omsagent-rs' AGENT_DAEMON_SET_PODS_LABEL_SELECTOR = 'component=oms-agent' -AGENT_OMSAGENT_LOG_PATH = '/var/opt/microsoft/omsagent/log/omsagent.log' +AGENT_FLUENTD_LOG_PATH = '/var/opt/microsoft/docker-cimprov/log/fluentd.log' AGENT_REPLICASET_WORKFLOWS = ["kubePodInventoryEmitStreamSuccess", "kubeNodeInventoryEmitStreamSuccess"] # override this through setting enviornment variable if the expected restart count is > 0 for example applying configmap diff --git a/test/e2e/src/core/e2e_tests.sh b/test/e2e/src/core/e2e_tests.sh index 67edbb026..3286aa6b2 100644 --- a/test/e2e/src/core/e2e_tests.sh +++ b/test/e2e/src/core/e2e_tests.sh @@ -123,25 +123,29 @@ trap saveResults EXIT # validate common params validateCommonParameters -# validate params -validateArcConfTestParameters - -# login to azure -login_to_azure - -# Wait for resources in ARC ns -waitSuccessArc="$(waitForResources deployment azure-arc)" -if [ "${waitSuccessArc}" == false ]; then - echo "deployment is not avilable in namespace - azure-arc" - exit 1 +if [ "${IS_NON_ARC_K8S_TEST_ENVIRONMENT}" == "true" ]; then + echo "skipping installing of ARC K8s container insights extension since the test environment is non-arc K8s" +else + # validate params + validateArcConfTestParameters + + # login to azure + login_to_azure + + # Wait for resources in ARC ns + waitSuccessArc="$(waitForResources deployment azure-arc)" + if [ "${waitSuccessArc}" == false ]; then + echo "deployment is not avilable in namespace - azure-arc" + exit 1 + fi + + # add CLI extension + addArcK8sCLIExtension + + # add ARC K8s container insights extension + createArcCIExtension fi -# add CLI extension -addArcK8sCLIExtension - -# add ARC K8s container insights extension -createArcCIExtension - # Wait for deployment resources in kube-system ns waitSuccessArc="$(waitForResources deployment omsagent-rs kube-system)" if [ "${waitSuccessArc}" == false ]; then diff --git a/test/e2e/src/tests/test_rs_workflows.py b/test/e2e/src/tests/test_rs_workflows.py index aef422171..b644dbcc0 100755 --- a/test/e2e/src/tests/test_rs_workflows.py +++ b/test/e2e/src/tests/test_rs_workflows.py @@ -35,9 +35,9 @@ def test_rs_workflows(env_dict): rspodName = pod_list.items[0].metadata.name if not rspodName: pytest.fail("replicaset pod name should not be null or empty") - + logcontent = get_log_file_content( - api_instance, constants.AGENT_RESOURCES_NAMESPACE, rspodName, constants.AGENT_OMSAGENT_LOG_PATH) + api_instance, constants.AGENT_RESOURCES_NAMESPACE, rspodName, constants.AGENT_FLUENTD_LOG_PATH) if not logcontent: pytest.fail("logcontent should not be null or empty for rs pod: {}".format(rspodName)) loglines = logcontent.split("\n") @@ -78,7 +78,7 @@ def test_rs_workflows(env_dict): if IsKubeContainerPerfInventorySuccessful == False: pytest.fail("KubeContainerPerfInventory stream not emitted successfully from pod:" + rspodName) - + if IsKubeServicesInventorySuccessful == False: pytest.fail("KubeServicesInventory stream not emitted successfully from pod:" + rspodName) From 451fa8872a1f9aade6f692935877f524a4eb9972 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Mon, 5 Jul 2021 22:33:39 -0700 Subject: [PATCH 03/14] wip --- test/e2e/conformance.yaml | 2 +- test/e2e/src/core/e2e_tests.sh | 160 ++++++++++++++++++++++++--------- 2 files changed, 119 insertions(+), 43 deletions(-) diff --git a/test/e2e/conformance.yaml b/test/e2e/conformance.yaml index 3c264b12a..120db05e6 100644 --- a/test/e2e/conformance.yaml +++ b/test/e2e/conformance.yaml @@ -3,7 +3,7 @@ sonobuoy-config: plugin-name: azure-arc-ci-conformance result-format: junit spec: - image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest02152021 + image: docker.io/ganga1980/ciconftest:07052021-2 imagePullPolicy: Always name: plugin resources: {} diff --git a/test/e2e/src/core/e2e_tests.sh b/test/e2e/src/core/e2e_tests.sh index 3286aa6b2..6c5b1a553 100644 --- a/test/e2e/src/core/e2e_tests.sh +++ b/test/e2e/src/core/e2e_tests.sh @@ -1,19 +1,23 @@ -#!/bin/sh +#!/bin/bash set -x set -e results_dir="${RESULTS_DIR:-/tmp/results}" -function waitForResources { +waitForResources() { available=false max_retries=60 sleep_seconds=10 - RESOURCETYPE=$1 - NAMESPACE=$2 + NAMESPACE=$1 + RESOURCETYPE=$2 RESOURCE=$3 + # if resource not specified, set to --all + if [ -z $RESOURCE ]; then + RESOURCE="--all" + fi for i in $(seq 1 $max_retries) do - if [[ ! $(kubectl wait --for=condition=available ${RESOURCETYPE} ${RESOURCE} --all --namespace ${NAMESPACE}) ]]; then + if [[ ! $(kubectl wait --for=condition=available ${RESOURCETYPE} ${RESOURCE} --namespace ${NAMESPACE}) ]]; then sleep ${sleep_seconds} else available=true @@ -24,84 +28,134 @@ function waitForResources { echo "$available" } -function validateCommonParameters { - if [[ -z "${TENANT_ID}" ]]; then - echo "ERROR: parameter TENANT_ID is required." > ${results_dir}/error - python3 setup_failure_handler.py + +waitForArcK8sClusterCreated() { + connectivityState=false + max_retries=60 + sleep_seconds=10 + for i in $(seq 1 $max_retries) + do + clusterState=$(az connectedk8s show --name $CLUSTER_NAME --resource-group $RESOURCE_GROUP --query connectivityStatus -o json) + clusterState=$(echo $clusterState | tr -d '"' | tr -d '"\r\n') + echo "cluster current state: ${clusterState}" + if [[ ("${clusterState}" == "Connected") || ("${clusterState}" == "Connecting") ]]; then + connectivityState=true + break + else + sleep ${sleep_seconds} + fi + done + echo "Arc K8s cluster connectivityState: $connectivityState" +} + +waitForCIExtensionInstalled() { + installedState=false + max_retries=60 + sleep_seconds=10 + for i in $(seq 1 $max_retries) + do + installState=$(az k8s-extension show --cluster-name $CLUSTER_NAME --resource-group $RESOURCE_GROUP --cluster-type connectedClusters --name azuremonitor-containers --query installState -o json) + installState=$(echo $installState | tr -d '"' | tr -d '"\r\n') + echo "extension install state: ${installState}" + if [ "${installState}" == "Installed" ]; then + installedState=true + break + else + sleep ${sleep_seconds} + fi + done + echo "installedState: $installedState" +} + +validateCommonParameters() { + if [ -z $TENANT_ID ]; then + echo "ERROR: parameter TENANT_ID is required." > ${results_dir}/error + python3 setup_failure_handler.py fi - if [[ -z "${CLIENT_ID}" ]]; then + if [ -z $CLIENT_ID ]; then echo "ERROR: parameter CLIENT_ID is required." > ${results_dir}/error python3 setup_failure_handler.py fi - if [[ -z "${CLIENT_SECRET}" ]]; then + if [ -z $CLIENT_SECRET ]; then echo "ERROR: parameter CLIENT_SECRET is required." > ${results_dir}/error python3 setup_failure_handler.py fi } -function validateArcConfTestParameters { - if [[ -z "${SUBSCRIPTION_ID}" ]]; then - echo "ERROR: parameter SUBSCRIPTION_ID is required." > ${results_dir}/error - python3 setup_failure_handler.py +validateArcConfTestParameters() { + if [ -z $SUBSCRIPTION_ID ]; then + echo "ERROR: parameter SUBSCRIPTION_ID is required." > ${results_dir}/error + python3 setup_failure_handler.py fi - if [[ -z "${RESOURCE_GROUP}" ]]; then + if [ -z $RESOURCE_GROUP ]]; then echo "ERROR: parameter RESOURCE_GROUP is required." > ${results_dir}/error python3 setup_failure_handler.py fi - if [[ -z "${CLUSTER_NAME}" ]]; then + if [ -z $CLUSTER_NAME ]; then echo "ERROR: parameter CLUSTER_NAME is required." > ${results_dir}/error python3 setup_failure_handler.py fi - if [[ -z "${CI_ARC_RELEASE_TRAIN}" ]]; then + if [ -z $CI_ARC_RELEASE_TRAIN ]; then echo "ERROR: parameter CI_ARC_RELEASE_TRAIN is required." > ${results_dir}/error python3 setup_failure_handler.py fi - if [[ -z "${CI_ARC_VERSION}" ]]; then + if [ -z $CI_ARC_VERSION ]; then echo "ERROR: parameter CI_ARC_VERSION is required." > ${results_dir}/error python3 setup_failure_handler.py fi +} - if [[ -z "${CI_TEST_BRANCH}" ]]; then - echo "ERROR: parameter CI_TEST_BRANCH is required." > ${results_dir}/error - python3 setup_failure_handler.py - fi +addArcConnectedK8sExtension() { + echo "adding Arc K8s connectedk8s extension" + az extension add --name connectedk8s 2> ${results_dir}/error || python3 setup_failure_handler.py } -function addArcK8sCLIExtension { +addArcK8sCLIExtension() { + echo "adding Arc K8s k8s-extension extension" az extension add --name k8s-extension 2> ${results_dir}/error || python3 setup_failure_handler.py } -function createArcCIExtension { +createArcCIExtension() { + echo "creating extension type: Microsoft.AzureMonitor.Containers with release train: ${CI_ARC_RELEASE_TRAIN} and version: ${CI_ARC_VERSION}" az k8s-extension create \ --cluster-name $CLUSTER_NAME \ --resource-group $RESOURCE_GROUP \ --cluster-type connectedClusters \ --extension-type Microsoft.AzureMonitor.Containers \ - --subscription $SUBSCRIPTION_ID \ --scope cluster \ --release-train $CI_ARC_RELEASE_TRAIN \ --name azuremonitor-containers \ - --version $CI_ARC_VERSION 2> ${results_dir}/error || python3 setup_failure_handler.py + --version $CI_ARC_VERSION + # 2> ${results_dir}/error || python3 setup_failure_handler.py } -function deleteArcCIExtension { +showArcCIExtension() { + echo "arc ci extension status" + az k8s-extension show --cluster-name $CLUSTER_NAME --resource-group $RESOURCE_GROUP --cluster-type connectedClusters --name azuremonitor-containers +} + +deleteArcCIExtension() { az k8s-extension delete --name azuremonitor-containers \ --cluster-type connectedClusters \ --cluster-name $CLUSTER_NAME \ --resource-group $RESOURCE_GROUP || python3 setup_failure_handler.py } -function login_to_azure { +login_to_azure() { # Login with service principal + echo "login to azure using the SP creds" az login --service-principal \ -u ${CLIENT_ID} \ -p ${CLIENT_SECRET} \ --tenant ${TENANT_ID} 2> ${results_dir}/error || python3 setup_failure_handler.py + + echo "setting subscription: ${SUBSCRIPTION_ID} as default subscription" + az account set -s $SUBSCRIPTION_ID } @@ -123,7 +177,17 @@ trap saveResults EXIT # validate common params validateCommonParameters -if [ "${IS_NON_ARC_K8S_TEST_ENVIRONMENT}" == "true" ]; then +IS_ARC_K8S_ENV="true" +if [ -z $IS_NON_ARC_K8S_TEST_ENVIRONMENT ]; then + echo "arc k8s environment" +else + if [ "$IS_NON_ARC_K8S_TEST_ENVIRONMENT" = "true" ]; then + IS_ARC_K8S_ENV="false" + echo "non arc k8s environment" + fi +fi + +if [ "$IS_ARC_K8S_ENV" = "false" ]; then echo "skipping installing of ARC K8s container insights extension since the test environment is non-arc K8s" else # validate params @@ -133,29 +197,41 @@ else login_to_azure # Wait for resources in ARC ns - waitSuccessArc="$(waitForResources deployment azure-arc)" - if [ "${waitSuccessArc}" == false ]; then - echo "deployment is not avilable in namespace - azure-arc" - exit 1 - fi +# waitSuccessArc="$(waitForResources azure-arc deployment)" +# if [ "${waitSuccessArc}" = "false" ]; then +# echo "deployment is not avilable in namespace - azure-arc" +# exit 1 +# fi + + # add arc k8s connectedk8s extension + addArcConnectedK8sExtension + + # wait for Arc K8s cluster to be created + waitForArcK8sClusterCreated # add CLI extension addArcK8sCLIExtension # add ARC K8s container insights extension createArcCIExtension + + # show the ci extension status + showArcCIExtension + + #wait for extension state to be installed + waitForCIExtensionInstalled fi # Wait for deployment resources in kube-system ns -waitSuccessArc="$(waitForResources deployment omsagent-rs kube-system)" -if [ "${waitSuccessArc}" == false ]; then - echo "omsagent-rs deployment is not avilable in namespace - kube-system" - exit 1 -fi +# waitSuccessArc="$(waitForResources kube-system deployment omsagent-rs)" +# if [ "${waitSuccessArc}" = "false" ]; then +# echo "omsagent-rs deployment is not avilable in namespace - kube-system" +# exit 1 +# fi # Wait for ds resources in kube-system ns -# waitSuccessArc="$(waitForResources ds omsagent kube-system)" -# if [ "${waitSuccessArc}" == false ]; then +# waitSuccessArc="$(waitForResources kube-system ds omsagent)" +# if [ "${waitSuccessArc}" = "false" ]; then # echo "omsagent is not avilable in namespace - kube-system" # exit 1 # fi From 5c1d2ee67c195fffb31901ad011766c6ac766bb3 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 6 Jul 2021 15:26:34 -0700 Subject: [PATCH 04/14] update with mcr cidev image --- test/e2e/conformance.yaml | 2 +- test/e2e/e2e-tests.yaml | 4 +-- test/e2e/src/common/constants.py | 3 +- test/e2e/src/core/conftest.py | 33 +++++++++++----------- test/e2e/src/core/e2e_tests.sh | 31 +++++++------------- test/e2e/src/tests/test_resource_status.py | 13 +++++++-- 6 files changed, 42 insertions(+), 44 deletions(-) diff --git a/test/e2e/conformance.yaml b/test/e2e/conformance.yaml index 120db05e6..2ce721b25 100644 --- a/test/e2e/conformance.yaml +++ b/test/e2e/conformance.yaml @@ -3,7 +3,7 @@ sonobuoy-config: plugin-name: azure-arc-ci-conformance result-format: junit spec: - image: docker.io/ganga1980/ciconftest:07052021-2 + image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest07062021 imagePullPolicy: Always name: plugin resources: {} diff --git a/test/e2e/e2e-tests.yaml b/test/e2e/e2e-tests.yaml index 50ac49d2e..e606e7a3b 100644 --- a/test/e2e/e2e-tests.yaml +++ b/test/e2e/e2e-tests.yaml @@ -84,7 +84,7 @@ data: result-format: junit spec: env: - # this should be false if the test environment is ARC K8s + # this should be false if the test environment is non ARC K8s for example AKS - name: IS_NON_ARC_K8S_TEST_ENVIRONMENT value: "true" # Update values of CLIENT_ID, CLIENT_SECRET of the service principal which has permission to query LA ad Metrics API @@ -104,7 +104,7 @@ data: - name: AZURE_CLOUD value: "AZURE_PUBLIC_CLOUD" # image tag should be updated if new tests being added after this image - image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest02152021 + image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest07062021 imagePullPolicy: IfNotPresent name: plugin resources: {} diff --git a/test/e2e/src/common/constants.py b/test/e2e/src/common/constants.py index 076440d55..879169572 100644 --- a/test/e2e/src/common/constants.py +++ b/test/e2e/src/common/constants.py @@ -47,7 +47,8 @@ AGENT_WIN_DAEMONSET_NAME = 'omsagent-win' AGENT_DEPLOYMENT_PODS_LABEL_SELECTOR = 'rsName=omsagent-rs' -AGENT_DAEMON_SET_PODS_LABEL_SELECTOR = 'component=oms-agent' +AGENT_DAEMON_SET_PODS_LABEL_SELECTOR = 'dsName=omsagent-ds' +AGENT_DAEMON_SET_PODS_LABEL_SELECTOR_NON_ARC = 'component=oms-agent' AGENT_FLUENTD_LOG_PATH = '/var/opt/microsoft/docker-cimprov/log/fluentd.log' AGENT_REPLICASET_WORKFLOWS = ["kubePodInventoryEmitStreamSuccess", "kubeNodeInventoryEmitStreamSuccess"] diff --git a/test/e2e/src/core/conftest.py b/test/e2e/src/core/conftest.py index e659d5189..6f3e6cf1c 100644 --- a/test/e2e/src/core/conftest.py +++ b/test/e2e/src/core/conftest.py @@ -22,42 +22,43 @@ def env_dict(): create_results_dir('/tmp/results') # Setting some environment variables - env_dict['SETUP_LOG_FILE'] = '/tmp/results/setup' + env_dict['SETUP_LOG_FILE'] = '/tmp/results/setup' env_dict['TEST_AGENT_LOG_FILE'] = '/tmp/results/containerinsights' env_dict['NUM_TESTS_COMPLETED'] = 0 - + print("Starting setup...") append_result_output("Starting setup...\n", env_dict['SETUP_LOG_FILE']) - + # Collecting environment variables env_dict['TENANT_ID'] = os.getenv('TENANT_ID') env_dict['CLIENT_ID'] = os.getenv('CLIENT_ID') env_dict['CLIENT_SECRET'] = os.getenv('CLIENT_SECRET') - + env_dict['IS_NON_ARC_K8S_TEST_ENVIRONMENT'] = os.getenv('IS_NON_ARC_K8S_TEST_ENVIRONMENT') + # get default query time interval for log analytics queries queryTimeInterval = int(os.getenv('DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES')) if os.getenv('DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES') else constants.DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES # add minute suffix since this format required for LA queries env_dict['DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES'] = str(queryTimeInterval) + "m" - + # get default query time interval for metrics queries env_dict['DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES'] = int(os.getenv('DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES')) if os.getenv('DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES') else constants.DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES - - - # expected agent pod restart count + + + # expected agent pod restart count env_dict['AGENT_POD_EXPECTED_RESTART_COUNT'] = int(os.getenv('AGENT_POD_EXPECTED_RESTART_COUNT')) if os.getenv('AGENT_POD_EXPECTED_RESTART_COUNT') else constants.AGENT_POD_EXPECTED_RESTART_COUNT # default to azure public cloud if AZURE_CLOUD not specified env_dict['AZURE_ENDPOINTS'] = constants.AZURE_CLOUD_DICT.get(os.getenv('AZURE_CLOUD')) if os.getenv('AZURE_CLOUD') else constants.AZURE_PUBLIC_CLOUD_ENDPOINTS - + if not env_dict.get('TENANT_ID'): pytest.fail('ERROR: variable TENANT_ID is required.') - + if not env_dict.get('CLIENT_ID'): pytest.fail('ERROR: variable CLIENT_ID is required.') - + if not env_dict.get('CLIENT_SECRET'): pytest.fail('ERROR: variable CLIENT_SECRET is required.') - + print("Setup Complete.") append_result_output("Setup Complete.\n", env_dict['SETUP_LOG_FILE']) @@ -66,22 +67,22 @@ def env_dict(): else: with Path.open(my_file, "rb") as f: env_dict = pickle.load(f) - + yield env_dict - + my_file = Path("env.pkl") with FileLock(str(my_file) + ".lock"): with Path.open(my_file, "rb") as f: env_dict = pickle.load(f) env_dict['NUM_TESTS_COMPLETED'] = 1 + env_dict.get('NUM_TESTS_COMPLETED') - if env_dict['NUM_TESTS_COMPLETED'] == int(os.getenv('NUM_TESTS')): + if env_dict['NUM_TESTS_COMPLETED'] == int(os.getenv('NUM_TESTS')): # Checking if cleanup is required. if os.getenv('SKIP_CLEANUP'): return print('Starting cleanup...') append_result_output("Starting Cleanup...\n", env_dict['SETUP_LOG_FILE']) - + print("Cleanup Complete.") append_result_output("Cleanup Complete.\n", env_dict['SETUP_LOG_FILE']) return diff --git a/test/e2e/src/core/e2e_tests.sh b/test/e2e/src/core/e2e_tests.sh index 6c5b1a553..97937db71 100644 --- a/test/e2e/src/core/e2e_tests.sh +++ b/test/e2e/src/core/e2e_tests.sh @@ -98,16 +98,6 @@ validateArcConfTestParameters() { echo "ERROR: parameter CLUSTER_NAME is required." > ${results_dir}/error python3 setup_failure_handler.py fi - - if [ -z $CI_ARC_RELEASE_TRAIN ]; then - echo "ERROR: parameter CI_ARC_RELEASE_TRAIN is required." > ${results_dir}/error - python3 setup_failure_handler.py - fi - - if [ -z $CI_ARC_VERSION ]; then - echo "ERROR: parameter CI_ARC_VERSION is required." > ${results_dir}/error - python3 setup_failure_handler.py - fi } addArcConnectedK8sExtension() { @@ -121,17 +111,16 @@ addArcK8sCLIExtension() { } createArcCIExtension() { - echo "creating extension type: Microsoft.AzureMonitor.Containers with release train: ${CI_ARC_RELEASE_TRAIN} and version: ${CI_ARC_VERSION}" - az k8s-extension create \ - --cluster-name $CLUSTER_NAME \ - --resource-group $RESOURCE_GROUP \ - --cluster-type connectedClusters \ - --extension-type Microsoft.AzureMonitor.Containers \ - --scope cluster \ - --release-train $CI_ARC_RELEASE_TRAIN \ - --name azuremonitor-containers \ - --version $CI_ARC_VERSION - # 2> ${results_dir}/error || python3 setup_failure_handler.py + echo "creating extension type: Microsoft.AzureMonitor.Containers" + basicparameters="--cluster-name $CLUSTER_NAME --resource-group $RESOURCE_GROUP --cluster-type connectedClusters --extension-type Microsoft.AzureMonitor.Containers --scope cluster --name azuremonitor-containers" + if [ ! -z "$CI_ARC_RELEASE_TRAIN" ]; then + basicparameters="$basicparameters --release-train $CI_ARC_RELEASE_TRAIN" + fi + if [ ! -z "$CI_ARC_VERSION" ]; then + basicparameters="$basicparameters --version $CI_ARC_VERSION" + fi + + az k8s-extension create $basicparameters --configuration-settings omsagent.ISTEST=true 2> ${results_dir}/error || python3 setup_failure_handler.py } showArcCIExtension() { diff --git a/test/e2e/src/tests/test_resource_status.py b/test/e2e/src/tests/test_resource_status.py index bb63dac7c..f2b5569e9 100755 --- a/test/e2e/src/tests/test_resource_status.py +++ b/test/e2e/src/tests/test_resource_status.py @@ -20,7 +20,7 @@ def test_resource_status(env_dict): #config.load_kube_config() except Exception as e: pytest.fail("Error loading the in-cluster config: " + str(e)) - + # checking the deployment status check_kubernetes_deployment_status( constants.AGENT_RESOURCES_NAMESPACE, constants.AGENT_DEPLOYMENT_NAME, env_dict['TEST_AGENT_LOG_FILE']) @@ -29,14 +29,21 @@ def test_resource_status(env_dict): check_kubernetes_daemonset_status( constants.AGENT_RESOURCES_NAMESPACE, constants.AGENT_DAEMONSET_NAME, env_dict['TEST_AGENT_LOG_FILE']) - expectedPodRestartCount = env_dict['AGENT_POD_EXPECTED_RESTART_COUNT'] + expectedPodRestartCount = env_dict['AGENT_POD_EXPECTED_RESTART_COUNT'] # checking deployment pod status check_kubernetes_pods_status(constants.AGENT_RESOURCES_NAMESPACE, constants.AGENT_DEPLOYMENT_PODS_LABEL_SELECTOR, expectedPodRestartCount, env_dict['TEST_AGENT_LOG_FILE']) # checking daemonset pod status - check_kubernetes_pods_status(constants.AGENT_RESOURCES_NAMESPACE, + isNonArcK8Environment = env_dict.get('IS_NON_ARC_K8S_TEST_ENVIRONMENT') + + if not isNonArcK8Environment: + check_kubernetes_pods_status(constants.AGENT_RESOURCES_NAMESPACE, constants.AGENT_DAEMON_SET_PODS_LABEL_SELECTOR, expectedPodRestartCount, env_dict['TEST_AGENT_LOG_FILE']) + else: + check_kubernetes_pods_status(constants.AGENT_RESOURCES_NAMESPACE, + constants.AGENT_DAEMON_SET_PODS_LABEL_SELECTOR_NON_ARC, expectedPodRestartCount, env_dict['TEST_AGENT_LOG_FILE']) + append_result_output("test_resource_status end \n", env_dict['TEST_AGENT_LOG_FILE']) From 9ecfbde3017b371abfa37679f14f3141ea92a5f1 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 6 Jul 2021 17:13:43 -0700 Subject: [PATCH 05/14] handle log path --- test/e2e/conformance.yaml | 2 +- test/e2e/src/common/constants.py | 1 + test/e2e/src/core/conftest.py | 3 + test/e2e/src/tests/test_ds_workflows.py | 22 ++- test/e2e/src/tests/test_e2e_workflows.py | 178 +++++++++++------------ test/e2e/src/tests/test_rs_workflows.py | 7 +- 6 files changed, 116 insertions(+), 97 deletions(-) diff --git a/test/e2e/conformance.yaml b/test/e2e/conformance.yaml index 2ce721b25..49ae8562a 100644 --- a/test/e2e/conformance.yaml +++ b/test/e2e/conformance.yaml @@ -3,7 +3,7 @@ sonobuoy-config: plugin-name: azure-arc-ci-conformance result-format: junit spec: - image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest07062021 + image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest07062021-2 imagePullPolicy: Always name: plugin resources: {} diff --git a/test/e2e/src/common/constants.py b/test/e2e/src/common/constants.py index 879169572..bd4c8be88 100644 --- a/test/e2e/src/common/constants.py +++ b/test/e2e/src/common/constants.py @@ -50,6 +50,7 @@ AGENT_DAEMON_SET_PODS_LABEL_SELECTOR = 'dsName=omsagent-ds' AGENT_DAEMON_SET_PODS_LABEL_SELECTOR_NON_ARC = 'component=oms-agent' AGENT_FLUENTD_LOG_PATH = '/var/opt/microsoft/docker-cimprov/log/fluentd.log' +AGENT_OMSAGENT_LOG_PATH = '/var/opt/microsoft/omsagent/log/omsagent.log' AGENT_REPLICASET_WORKFLOWS = ["kubePodInventoryEmitStreamSuccess", "kubeNodeInventoryEmitStreamSuccess"] # override this through setting enviornment variable if the expected restart count is > 0 for example applying configmap diff --git a/test/e2e/src/core/conftest.py b/test/e2e/src/core/conftest.py index 6f3e6cf1c..17d4cce2f 100644 --- a/test/e2e/src/core/conftest.py +++ b/test/e2e/src/core/conftest.py @@ -34,6 +34,9 @@ def env_dict(): env_dict['CLIENT_ID'] = os.getenv('CLIENT_ID') env_dict['CLIENT_SECRET'] = os.getenv('CLIENT_SECRET') env_dict['IS_NON_ARC_K8S_TEST_ENVIRONMENT'] = os.getenv('IS_NON_ARC_K8S_TEST_ENVIRONMENT') + # released agent for Arc K8s still uses omsagent and when we rollout the agent with mdsd + # this shouldnt set after agent rollout with mdsd + env_dict['USING_OMSAGENT_BASE_AGENT'] = os.getenv('USING_OMSAGENT_BASE_AGENT') # get default query time interval for log analytics queries queryTimeInterval = int(os.getenv('DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES')) if os.getenv('DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES') else constants.DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES diff --git a/test/e2e/src/tests/test_ds_workflows.py b/test/e2e/src/tests/test_ds_workflows.py index 81ef08325..1ef251cba 100755 --- a/test/e2e/src/tests/test_ds_workflows.py +++ b/test/e2e/src/tests/test_ds_workflows.py @@ -18,24 +18,34 @@ def test_ds_workflows(env_dict): env_dict['TEST_AGENT_LOG_FILE']) # Loading in-cluster kube-config try: - config.load_incluster_config() + config.load_incluster_config() except Exception as e: pytest.fail("Error loading the in-cluster config: " + str(e)) print("getting daemonset pod list") api_instance = client.CoreV1Api() - pod_list = get_pod_list(api_instance, constants.AGENT_RESOURCES_NAMESPACE, - constants.AGENT_DAEMON_SET_PODS_LABEL_SELECTOR) + + daemonsetPodLabelSelector = constants.AGENT_DAEMON_SET_PODS_LABEL_SELECTOR + isNonArcK8Environment = env_dict.get('IS_NON_ARC_K8S_TEST_ENVIRONMENT') + if isNonArcK8Environment: + daemonsetPodLabelSelector = constants.AGENT_DAEMON_SET_PODS_LABEL_SELECTOR_NON_ARC + + pod_list = get_pod_list(api_instance, constants.AGENT_RESOURCES_NAMESPACE, daemonsetPodLabelSelector) if not pod_list: pytest.fail("daemonset pod_list shouldnt be null or empty") if len(pod_list.items) <= 0: pytest.fail("number of items in daemonset pod list should be greater than 0") - + + isOMSBaseAgent = env_dict.get('USING_OMSAGENT_BASE_AGENT') + agentLogPath = constants.AGENT_FLUENTD_LOG_PATH + if isOMSBaseAgent: + agentLogPath = constants.AGENT_OMSAGENT_LOG_PATH + for podItem in pod_list.items: podName = podItem.metadata.name logcontent = get_log_file_content( - api_instance, constants.AGENT_RESOURCES_NAMESPACE, podName, constants.AGENT_OMSAGENT_LOG_PATH) + api_instance, constants.AGENT_RESOURCES_NAMESPACE, podName, agentLogPath) if not logcontent: pytest.fail("logcontent should not be null or empty for pod: " + podName) loglines = logcontent.split("\n") @@ -53,7 +63,7 @@ def test_ds_workflows(env_dict): if IsContainerPerfEmitStream == False: pytest.fail("ContainerPerf stream not emitted successfully from pod:" + podName) if IsContainerInventoryStream == False: - pytest.fail("ContainerInventory stream not emitted successfully from pod:" + podName) + pytest.fail("ContainerInventory stream not emitted successfully from pod:" + podName) append_result_output("test_ds_workflows end \n", env_dict['TEST_AGENT_LOG_FILE']) diff --git a/test/e2e/src/tests/test_e2e_workflows.py b/test/e2e/src/tests/test_e2e_workflows.py index 11a8e18e3..6f7c53e99 100755 --- a/test/e2e/src/tests/test_e2e_workflows.py +++ b/test/e2e/src/tests/test_e2e_workflows.py @@ -25,31 +25,31 @@ def test_e2e_workflows(env_dict): queryTimeInterval = env_dict['DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES'] if not queryTimeInterval: pytest.fail("DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES should not be null or empty") - + # get the cluster resource id from replicaset pod envvars - api_instance = client.CoreV1Api() + api_instance = client.CoreV1Api() pod_list = get_pod_list(api_instance, constants.AGENT_RESOURCES_NAMESPACE, constants.AGENT_DEPLOYMENT_PODS_LABEL_SELECTOR) - + if not pod_list: pytest.fail("pod_list shouldnt be null or empty") if len(pod_list.items) <= 0: pytest.fail("number of items in pod list should be greater than 0") - + envVars = pod_list.items[0].spec.containers[0].env if not envVars: pytest.fail("environment variables should be defined in the replicaset pod") - + clusterResourceId = '' for env in envVars: if env.name == "AKS_RESOURCE_ID": clusterResourceId = env.value print("cluster resource id: {}".format(clusterResourceId)) - + if not clusterResourceId: pytest.fail("failed to get clusterResourceId from replicaset pod environment variables") - + # fetch AAD token for log analytics resource for the queries tenant_id = env_dict.get('TENANT_ID') authority_uri = env_dict.get('AZURE_ENDPOINTS').get('activeDirectory') + tenant_id @@ -59,102 +59,102 @@ def test_e2e_workflows(env_dict): aad_token = fetch_aad_token(client_id, client_secret, authority_uri, resource) if not aad_token: pytest.fail("failed to fetch AAD token") - + access_token = aad_token.get('accessToken') if not access_token: pytest.fail("access_token shouldnt be null or empty") - - # validate e2e workflows by checking data in log analytics workspace through resource centric queries + + # validate e2e workflows by checking data in log analytics workspace through resource centric queries queryUrl = resource + "/v1" + clusterResourceId + "/query" Headers = { "Authorization": str("Bearer " + access_token), "Content-Type": "application/json" - } - # KubePodInventory + } + # KubePodInventory query = constants.KUBE_POD_INVENTORY_QUERY.format(queryTimeInterval) - params = { 'query': query} + params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('KUBE_POD_INVENTORY')) - + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: pytest.fail("rowCount should be greater than for cluster: {0} and workflow: {1}".format(clusterResourceId, 'KUBE_POD_INVENTORY')) - + # KubeNodeInventory query = constants.KUBE_NODE_INVENTORY_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('KUBE_NODE_INVENTORY')) - + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'KUBE_NODE_INVENTORY')) - + # KubeServices query = constants.KUBE_SERVICES_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('KUBE_SERVICES')) - + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'KUBE_SERVICES')) - + # KubeEvents query = constants.KUBE_EVENTS_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('KUBE_EVENTS')) - + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'KUBE_EVENTS')) - - # Container Node Inventory + + # Container Node Inventory query = constants.CONTAINER_NODE_INVENTORY_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_NODE_INVENTORY')) - + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_NODE_INVENTORY')) - - # Node Perf + + # Node Perf # cpu capacity query = constants.NODE_PERF_CPU_CAPCITY_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: - pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_CPU_CAPCITY')) - + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_CPU_CAPCITY')) + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_CPU_CAPCITY')) - + # memory capacity query = constants.NODE_PERF_MEMORY_CAPCITY_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: - pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_MEMORY_CAPCITY')) - + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_MEMORY_CAPCITY')) + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_MEMORY_CAPCITY')) - - # cpu allocatable + + # cpu allocatable query = constants.NODE_PERF_CPU_ALLOCATABLE_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: - pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_CPU_ALLOCATABLE')) - + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_CPU_ALLOCATABLE')) + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_CPU_ALLOCATABLE')) @@ -164,167 +164,167 @@ def test_e2e_workflows(env_dict): params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: - pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_MEMORY_ALLOCATABLE')) - + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_MEMORY_ALLOCATABLE')) + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_MEMORY_ALLOCATABLE')) - + # cpu usage query = constants.NODE_PERF_CPU_USAGE_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_CPU_USAGE')) - + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_CPU_USAGE')) - + # memory rss usage query = constants.NODE_PERF_MEMORY_RSS_USAGE_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: - pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_MEMORY_RSS_USAGE')) - + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_MEMORY_RSS_USAGE')) + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: - pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_MEMORY_RSS_USAGE')) - + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_MEMORY_RSS_USAGE')) + # memory ws usage query = constants.NODE_PERF_MEMORY_WS_USAGE_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: - pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_MEMORY_WS_USAGE')) - + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_MEMORY_WS_USAGE')) + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: - pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_MEMORY_WS_USAGE')) - + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_MEMORY_WS_USAGE')) + # restartime epoch query = constants.NODE_PERF_RESTART_TIME_EPOCH_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: - pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_RESTART_TIME_EPOCH')) - + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_RESTART_TIME_EPOCH')) + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: - pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_RESTART_TIME_EPOCH')) - + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_RESTART_TIME_EPOCH')) + # Container Perf - # container cpu limits + # container cpu limits query = constants.CONTAINER_PERF_CPU_LIMITS_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: - pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_CPU_LIMITS')) - + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_CPU_LIMITS')) + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_CPU_LIMITS')) - + # container memory limits query = constants.CONTAINER_PERF_MEMORY_LIMITS_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: - pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_MEMORY_LIMITS')) - + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_MEMORY_LIMITS')) + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_MEMORY_LIMITS')) - + # cpu requests query = constants.CONTAINER_PERF_CPU_REQUESTS_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: - pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_CPU_REQUESTS')) - + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_CPU_REQUESTS')) + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: - pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_CPU_REQUESTS')) - + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_CPU_REQUESTS')) + # memory requests query = constants.CONTAINER_PERF_MEMORY_REQUESTS_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: - pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_MEMORY_REQUESTS_QUERY')) - + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_MEMORY_REQUESTS_QUERY')) + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_MEMORY_REQUESTS')) - + # cpu usage query = constants.CONTAINER_PERF_CPU_USAGE_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: - pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_CPU_USAGE')) - + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_CPU_USAGE')) + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: - pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_CPU_USAGE')) - + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_CPU_USAGE')) + # memory rss usage query = constants.CONTAINER_PERF_MEMORY_RSS_USAGE_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: - pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_MEMORY_RSS_USAGE')) - + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_MEMORY_RSS_USAGE')) + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_MEMORY_RSS_USAGE')) - + # memory ws usage query = constants.CONTAINER_PERF_MEMORY_WS_USAGE_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: - pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_MEMORY_WS_USAGE')) - + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_MEMORY_WS_USAGE')) + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: - pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_MEMORY_WS_USAGE')) - + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_MEMORY_WS_USAGE')) + # restart time epoch query = constants.CONTAINER_PERF_RESTART_TIME_EPOCH_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: - pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_RESTART_TIME_EPOCH')) - + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_RESTART_TIME_EPOCH')) + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: - pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_RESTART_TIME_EPOCH')) - - # Container log + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_RESTART_TIME_EPOCH')) + + # Container log query = constants.CONTAINER_LOG_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) if not result: - pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_LOG')) - + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_LOG')) + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_LOG')) - + # InsightsMetrics query = constants.INSIGHTS_METRICS_QUERY.format(queryTimeInterval) params = { 'query': query} result = requests.get(queryUrl, params=params, headers=Headers, verify=False) - if not result: - pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('INSIGHTS_METRICS')) - + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('INSIGHTS_METRICS')) + rowCount = result.json()['tables'][0]['rows'][0][0] if not rowCount: - pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'INSIGHTS_METRICS')) - + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'INSIGHTS_METRICS')) + append_result_output("test_e2e_workflows end \n", env_dict['TEST_AGENT_LOG_FILE']) print("Successfully completed e2e workflows test.") diff --git a/test/e2e/src/tests/test_rs_workflows.py b/test/e2e/src/tests/test_rs_workflows.py index b644dbcc0..1670fae1f 100755 --- a/test/e2e/src/tests/test_rs_workflows.py +++ b/test/e2e/src/tests/test_rs_workflows.py @@ -36,8 +36,13 @@ def test_rs_workflows(env_dict): if not rspodName: pytest.fail("replicaset pod name should not be null or empty") + isOMSBaseAgent = env_dict.get('USING_OMSAGENT_BASE_AGENT') + agentLogPath = constants.AGENT_FLUENTD_LOG_PATH + if isOMSBaseAgent: + agentLogPath = constants.AGENT_OMSAGENT_LOG_PATH + logcontent = get_log_file_content( - api_instance, constants.AGENT_RESOURCES_NAMESPACE, rspodName, constants.AGENT_FLUENTD_LOG_PATH) + api_instance, constants.AGENT_RESOURCES_NAMESPACE, rspodName, agentLogPath) if not logcontent: pytest.fail("logcontent should not be null or empty for rs pod: {}".format(rspodName)) loglines = logcontent.split("\n") From 6c2f7bb3efb83c3030b21970a569779739417ea0 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 6 Jul 2021 17:17:53 -0700 Subject: [PATCH 06/14] cleanup --- test/e2e/src/core/e2e_tests.sh | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/test/e2e/src/core/e2e_tests.sh b/test/e2e/src/core/e2e_tests.sh index 97937db71..660401270 100644 --- a/test/e2e/src/core/e2e_tests.sh +++ b/test/e2e/src/core/e2e_tests.sh @@ -185,13 +185,6 @@ else # login to azure login_to_azure - # Wait for resources in ARC ns -# waitSuccessArc="$(waitForResources azure-arc deployment)" -# if [ "${waitSuccessArc}" = "false" ]; then -# echo "deployment is not avilable in namespace - azure-arc" -# exit 1 -# fi - # add arc k8s connectedk8s extension addArcConnectedK8sExtension @@ -211,20 +204,6 @@ else waitForCIExtensionInstalled fi -# Wait for deployment resources in kube-system ns -# waitSuccessArc="$(waitForResources kube-system deployment omsagent-rs)" -# if [ "${waitSuccessArc}" = "false" ]; then -# echo "omsagent-rs deployment is not avilable in namespace - kube-system" -# exit 1 -# fi - -# Wait for ds resources in kube-system ns -# waitSuccessArc="$(waitForResources kube-system ds omsagent)" -# if [ "${waitSuccessArc}" = "false" ]; then -# echo "omsagent is not avilable in namespace - kube-system" -# exit 1 -# fi - # The variable 'TEST_LIST' should be provided if we want to run specific tests. If not provided, all tests are run NUM_PROCESS=$(pytest /e2etests/ --collect-only -k "$TEST_NAME_LIST" -m "$TEST_MARKER_LIST" | grep " Date: Tue, 6 Jul 2021 22:00:19 -0700 Subject: [PATCH 07/14] clean up --- test/e2e/conformance.yaml | 2 +- test/e2e/e2e-tests.yaml | 2 +- test/e2e/src/common/constants.py | 2 + test/e2e/src/core/conftest.py | 2 + test/e2e/src/tests/test_ds_workflows.py | 6 +++ test/e2e/src/tests/test_e2e_workflows.py | 7 +++ .../tests/test_node_metrics_e2e_workflow.py | 52 +++++++++++-------- .../tests/test_pod_metrics_e2e_workflow.py | 12 +++-- test/e2e/src/tests/test_rs_workflows.py | 7 +++ 9 files changed, 64 insertions(+), 28 deletions(-) diff --git a/test/e2e/conformance.yaml b/test/e2e/conformance.yaml index 49ae8562a..45b8f5e64 100644 --- a/test/e2e/conformance.yaml +++ b/test/e2e/conformance.yaml @@ -3,7 +3,7 @@ sonobuoy-config: plugin-name: azure-arc-ci-conformance result-format: junit spec: - image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest07062021-2 + image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest07062021-5 imagePullPolicy: Always name: plugin resources: {} diff --git a/test/e2e/e2e-tests.yaml b/test/e2e/e2e-tests.yaml index e606e7a3b..26bcc8ad5 100644 --- a/test/e2e/e2e-tests.yaml +++ b/test/e2e/e2e-tests.yaml @@ -104,7 +104,7 @@ data: - name: AZURE_CLOUD value: "AZURE_PUBLIC_CLOUD" # image tag should be updated if new tests being added after this image - image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest07062021 + image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest07062021-4 imagePullPolicy: IfNotPresent name: plugin resources: {} diff --git a/test/e2e/src/common/constants.py b/test/e2e/src/common/constants.py index bd4c8be88..392b10554 100644 --- a/test/e2e/src/common/constants.py +++ b/test/e2e/src/common/constants.py @@ -40,6 +40,8 @@ TIMEOUT = 300 +# WAIT TIME BEFORE READING THE AGENT LOGS +AGENT_WAIT_TIME_SECS = "180" # Azure Monitor for Container Extension related AGENT_RESOURCES_NAMESPACE = 'kube-system' AGENT_DEPLOYMENT_NAME = 'omsagent-rs' diff --git a/test/e2e/src/core/conftest.py b/test/e2e/src/core/conftest.py index 17d4cce2f..02f644a18 100644 --- a/test/e2e/src/core/conftest.py +++ b/test/e2e/src/core/conftest.py @@ -38,6 +38,8 @@ def env_dict(): # this shouldnt set after agent rollout with mdsd env_dict['USING_OMSAGENT_BASE_AGENT'] = os.getenv('USING_OMSAGENT_BASE_AGENT') + waitTimeInterval = int(os.getenv('AGENT_WAIT_TIME_SECS')) if os.getenv('AGENT_WAIT_TIME_SECS') else constants.AGENT_WAIT_TIME_SECS + env_dict['AGENT_WAIT_TIME_SECS'] = waitTimeInterval # get default query time interval for log analytics queries queryTimeInterval = int(os.getenv('DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES')) if os.getenv('DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES') else constants.DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES # add minute suffix since this format required for LA queries diff --git a/test/e2e/src/tests/test_ds_workflows.py b/test/e2e/src/tests/test_ds_workflows.py index 1ef251cba..731957788 100755 --- a/test/e2e/src/tests/test_ds_workflows.py +++ b/test/e2e/src/tests/test_ds_workflows.py @@ -1,5 +1,6 @@ import pytest import constants +import time from kubernetes import client, config from kubernetes_pod_utility import get_pod_list, get_log_file_content @@ -37,6 +38,11 @@ def test_ds_workflows(env_dict): if len(pod_list.items) <= 0: pytest.fail("number of items in daemonset pod list should be greater than 0") + waitTimeSeconds = env_dict['AGENT_WAIT_TIME_SECS'] + print("start: waiting for seconds: {} for agent workflows to get emitted".format(waitTimeSeconds)) + time.sleep(int(waitTimeSeconds)) + print("complete: waiting for seconds: {} for agent workflows to get emitted".format(waitTimeSeconds)) + isOMSBaseAgent = env_dict.get('USING_OMSAGENT_BASE_AGENT') agentLogPath = constants.AGENT_FLUENTD_LOG_PATH if isOMSBaseAgent: diff --git a/test/e2e/src/tests/test_e2e_workflows.py b/test/e2e/src/tests/test_e2e_workflows.py index 6f7c53e99..91353d1ee 100755 --- a/test/e2e/src/tests/test_e2e_workflows.py +++ b/test/e2e/src/tests/test_e2e_workflows.py @@ -1,6 +1,7 @@ import pytest import constants import requests +import time from arm_rest_utility import fetch_aad_token from kubernetes import client, config @@ -70,6 +71,12 @@ def test_e2e_workflows(env_dict): "Authorization": str("Bearer " + access_token), "Content-Type": "application/json" } + + waitTimeSeconds = env_dict['AGENT_WAIT_TIME_SECS'] + print("start: waiting for seconds: {} for agent workflows to get emitted".format(waitTimeSeconds)) + time.sleep(int(waitTimeSeconds)) + print("complete: waiting for seconds: {} for agent workflows to get emitted".format(waitTimeSeconds)) + # KubePodInventory query = constants.KUBE_POD_INVENTORY_QUERY.format(queryTimeInterval) params = { 'query': query} diff --git a/test/e2e/src/tests/test_node_metrics_e2e_workflow.py b/test/e2e/src/tests/test_node_metrics_e2e_workflow.py index 4346f89a8..10b9d7c1f 100755 --- a/test/e2e/src/tests/test_node_metrics_e2e_workflow.py +++ b/test/e2e/src/tests/test_node_metrics_e2e_workflow.py @@ -1,6 +1,7 @@ import pytest import constants import requests +import time from arm_rest_utility import fetch_aad_token from kubernetes import client, config @@ -69,6 +70,11 @@ def test_node_metrics_e2e_workflow(env_dict): if not access_token: pytest.fail("access_token shouldnt be null or empty") + waitTimeSeconds = env_dict['AGENT_WAIT_TIME_SECS'] + print("start: waiting for seconds: {} for agent workflows to get emitted".format(waitTimeSeconds)) + time.sleep(int(waitTimeSeconds)) + print("complete: waiting for seconds: {} for agent workflows to get emitted".format(waitTimeSeconds)) + # validate metrics e2e workflow now = datetime.utcnow() endtime = now.isoformat()[:-3]+'Z' @@ -121,14 +127,14 @@ def test_node_metrics_e2e_workflow(env_dict): for responseVal in responseValues: metricName = responseVal['name']['value'] if metricName != constants.NODE_MEMORY_RSS_METRIC_NAME: - pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_MEMORY_RSS_METRIC_NAME)) - timeseries = responseVal['timeseries'] + pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_MEMORY_RSS_METRIC_NAME)) + timeseries = responseVal['timeseries'] if not timeseries: pytest.fail("metric series shouldnt be null or empty for metric:{0} in namespace: {1}".format( constants.NODE_MEMORY_RSS_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) if len(timeseries) <= 0: pytest.fail("length of timeseries should be greater than for 0 for metric: {0} in namespace :{1}".format(constants.NODE_MEMORY_RSS_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) - + # node metric - memoryRssPercentage custommetricsUrl = '{0}{1}/providers/microsoft.Insights/metrics?timespan={2}/{3}&interval=FULL&metricnames={4}&aggregation={5}&metricNamespace={6}&validatedimensions=false&api-version={7}'.format( resourceManager.rstrip("/"), @@ -170,14 +176,14 @@ def test_node_metrics_e2e_workflow(env_dict): for responseVal in responseValues: metricName = responseVal['name']['value'] if metricName != constants.NODE_MEMORY_RSS_PERCENTAGE_METRIC_NAME: - pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_MEMORY_RSS_PERCENTAGE_METRIC_NAME)) - timeseries = responseVal['timeseries'] + pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_MEMORY_RSS_PERCENTAGE_METRIC_NAME)) + timeseries = responseVal['timeseries'] if not timeseries: pytest.fail("metric series shouldnt be null or empty for metric:{0} in namespace: {1}".format( constants.NODE_MEMORY_RSS_PERCENTAGE_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) if len(timeseries) <= 0: pytest.fail("length of timeseries should be greater than for 0 for metric: {0} in namespace :{1}".format(constants.NODE_MEMORY_RSS_PERCENTAGE_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) - + # node metric - memoryWorkingSetBytes custommetricsUrl = '{0}{1}/providers/microsoft.Insights/metrics?timespan={2}/{3}&interval=FULL&metricnames={4}&aggregation={5}&metricNamespace={6}&validatedimensions=false&api-version={7}'.format( resourceManager.rstrip("/"), @@ -218,14 +224,14 @@ def test_node_metrics_e2e_workflow(env_dict): for responseVal in responseValues: metricName = responseVal['name']['value'] if metricName != constants.NODE_MEMORY_WS_METRIC_NAME: - pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_MEMORY_WS_METRIC_NAME)) - timeseries = responseVal['timeseries'] + pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_MEMORY_WS_METRIC_NAME)) + timeseries = responseVal['timeseries'] if not timeseries: pytest.fail("metric series shouldnt be null or empty for metric:{0} in namespace: {1}".format( constants.NODE_MEMORY_WS_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) if len(timeseries) <= 0: pytest.fail("length of timeseries should be greater than for 0 for metric: {0} in namespace :{1}".format(constants.NODE_MEMORYE_WS_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) - + # node metric - memoryWorkingSetPercentage custommetricsUrl = '{0}{1}/providers/microsoft.Insights/metrics?timespan={2}/{3}&interval=FULL&metricnames={4}&aggregation={5}&metricNamespace={6}&validatedimensions=false&api-version={7}'.format( resourceManager.rstrip("/"), @@ -266,14 +272,14 @@ def test_node_metrics_e2e_workflow(env_dict): for responseVal in responseValues: metricName = responseVal['name']['value'] if metricName != constants.NODE_MEMORY_WS_PERCENTAGE_METRIC_NAME: - pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_MEMORY_WS_PERCENTAGE_METRIC_NAME)) - timeseries = responseVal['timeseries'] + pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_MEMORY_WS_PERCENTAGE_METRIC_NAME)) + timeseries = responseVal['timeseries'] if not timeseries: pytest.fail("metric series shouldnt be null or empty for metric:{0} in namespace: {1}".format( constants.NODE_MEMORY_WS_PERCENTAGE_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) if len(timeseries) <= 0: pytest.fail("length of timeseries should be greater than for 0 for metric: {0} in namespace :{1}".format(constants.NODE_MEMORY_WS_PERCENTAGE_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) - + # node metric - cpuUsageMilliCores custommetricsUrl = '{0}{1}/providers/microsoft.Insights/metrics?timespan={2}/{3}&interval=FULL&metricnames={4}&aggregation={5}&metricNamespace={6}&validatedimensions=false&api-version={7}'.format( resourceManager.rstrip("/"), @@ -313,14 +319,14 @@ def test_node_metrics_e2e_workflow(env_dict): for responseVal in responseValues: metricName = responseVal['name']['value'] if metricName != constants.NODE_CPU_USAGE_MILLI_CORES_METRIC_NAME: - pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_CPU_USAGE_MILLI_CORES_METRIC_NAME)) - timeseries = responseVal['timeseries'] + pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_CPU_USAGE_MILLI_CORES_METRIC_NAME)) + timeseries = responseVal['timeseries'] if not timeseries: pytest.fail("metric series shouldnt be null or empty for metric:{0} in namespace: {1}".format( constants.NODE_CPU_USAGE_MILLI_CORES_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) if len(timeseries) <= 0: pytest.fail("length of timeseries should be greater than for 0 for metric: {0} in namespace :{1}".format(constants.NODE_CPU_USAGE_MILLI_CORES_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) - + # node metric - cpuUsagePercentage custommetricsUrl = '{0}{1}/providers/microsoft.Insights/metrics?timespan={2}/{3}&interval=FULL&metricnames={4}&aggregation={5}&metricNamespace={6}&validatedimensions=false&api-version={7}'.format( resourceManager.rstrip("/"), @@ -360,14 +366,14 @@ def test_node_metrics_e2e_workflow(env_dict): for responseVal in responseValues: metricName = responseVal['name']['value'] if metricName != constants.NODE_CPU_USAGE_PERCENTAGE_METRIC_NAME: - pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_CPU_USAGE_PERCENTAGE_METRIC_NAME)) - timeseries = responseVal['timeseries'] + pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_CPU_USAGE_PERCENTAGE_METRIC_NAME)) + timeseries = responseVal['timeseries'] if not timeseries: pytest.fail("metric series shouldnt be null or empty for metric:{0} in namespace: {1}".format( constants.NODE_CPU_USAGE_PERCENTAGE_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) if len(timeseries) <= 0: - pytest.fail("length of timeseries should be greater than for 0 for metric: {0} in namespace :{1}".format(constants.NODE_CPU_USAGE_PERCENTAGE_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) - + pytest.fail("length of timeseries should be greater than for 0 for metric: {0} in namespace :{1}".format(constants.NODE_CPU_USAGE_PERCENTAGE_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) + # node metric - nodesCount custommetricsUrl = '{0}{1}/providers/microsoft.Insights/metrics?timespan={2}/{3}&interval=FULL&metricnames={4}&aggregation={5}&metricNamespace={6}&validatedimensions=false&api-version={7}'.format( resourceManager.rstrip("/"), @@ -407,14 +413,14 @@ def test_node_metrics_e2e_workflow(env_dict): for responseVal in responseValues: metricName = responseVal['name']['value'] if metricName != constants.NODE_COUNT_METRIC_NAME: - pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_COUNT_METRIC_NAME)) - timeseries = responseVal['timeseries'] + pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_COUNT_METRIC_NAME)) + timeseries = responseVal['timeseries'] if not timeseries: pytest.fail("metric series shouldnt be null or empty for metric:{0} in namespace: {1}".format( constants.NODE_COUNT_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) if len(timeseries) <= 0: - pytest.fail("length of timeseries should be greater than for 0 for metric: {0} in namespace :{1}".format(constants.NODE_COUNT_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) - + pytest.fail("length of timeseries should be greater than for 0 for metric: {0} in namespace :{1}".format(constants.NODE_COUNT_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) + append_result_output("test_node_metrics_e2e_workflow end \n", env_dict['TEST_AGENT_LOG_FILE']) print("Successfully completed node metrics e2e workflow test.") diff --git a/test/e2e/src/tests/test_pod_metrics_e2e_workflow.py b/test/e2e/src/tests/test_pod_metrics_e2e_workflow.py index cd4260f76..f0dd3366a 100755 --- a/test/e2e/src/tests/test_pod_metrics_e2e_workflow.py +++ b/test/e2e/src/tests/test_pod_metrics_e2e_workflow.py @@ -1,6 +1,7 @@ import pytest import constants import requests +import time from arm_rest_utility import fetch_aad_token from kubernetes import client, config @@ -69,6 +70,11 @@ def test_pod_metrics_e2e_workflow(env_dict): if not access_token: pytest.fail("access_token shouldnt be null or empty") + waitTimeSeconds = env_dict['AGENT_WAIT_TIME_SECS'] + print("start: waiting for seconds: {} for agent workflows to get emitted".format(waitTimeSeconds)) + time.sleep(int(waitTimeSeconds)) + print("complete: waiting for seconds: {} for agent workflows to get emitted".format(waitTimeSeconds)) + # validate metrics e2e workflow now = datetime.utcnow() endtime = now.isoformat()[:-3]+'Z' @@ -121,14 +127,14 @@ def test_pod_metrics_e2e_workflow(env_dict): for responseVal in responseValues: metricName = responseVal['name']['value'] if metricName != constants.POD_COUNT_METRIC_NAME: - pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.POD_COUNT_METRIC_NAME)) - timeseries = responseVal['timeseries'] + pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.POD_COUNT_METRIC_NAME)) + timeseries = responseVal['timeseries'] if not timeseries: pytest.fail("metric series shouldnt be null or empty for metric:{0} in namespace: {1}".format( constants.POD_COUNT_METRIC_NAME, constants.POD_METRICS_NAMESPACE)) if len(timeseries) <= 0: pytest.fail("length of timeseries should be greater than for 0 for metric: {0} in namespace :{1}".format(constants.POD_COUNT_METRIC_NAME, constants.POD_METRICS_NAMESPACE)) - + append_result_output("test_pod_metrics_e2e_workflow end \n", env_dict['TEST_AGENT_LOG_FILE']) print("Successfully completed e2e workflows test.") diff --git a/test/e2e/src/tests/test_rs_workflows.py b/test/e2e/src/tests/test_rs_workflows.py index 1670fae1f..36ec05867 100755 --- a/test/e2e/src/tests/test_rs_workflows.py +++ b/test/e2e/src/tests/test_rs_workflows.py @@ -1,5 +1,6 @@ import pytest import constants +import time from kubernetes import client, config from kubernetes_pod_utility import get_pod_list, get_log_file_content @@ -36,6 +37,12 @@ def test_rs_workflows(env_dict): if not rspodName: pytest.fail("replicaset pod name should not be null or empty") + + waitTimeSeconds = env_dict['AGENT_WAIT_TIME_SECS'] + print("start: waiting for seconds: {} for agent workflows to get emitted".format(waitTimeSeconds)) + time.sleep(int(waitTimeSeconds)) + print("complete: waiting for seconds: {} for agent workflows to get emitted".format(waitTimeSeconds)) + isOMSBaseAgent = env_dict.get('USING_OMSAGENT_BASE_AGENT') agentLogPath = constants.AGENT_FLUENTD_LOG_PATH if isOMSBaseAgent: From 35b6977a4c7f4cfa98195b14a6cef266febdead7 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Wed, 7 Jul 2021 14:09:33 -0700 Subject: [PATCH 08/14] wip --- test/e2e/src/tests/test_e2e_workflows.py | 46 +++++++++---------- .../tests/test_node_metrics_e2e_workflow.py | 14 +++--- .../tests/test_pod_metrics_e2e_workflow.py | 3 +- 3 files changed, 31 insertions(+), 32 deletions(-) diff --git a/test/e2e/src/tests/test_e2e_workflows.py b/test/e2e/src/tests/test_e2e_workflows.py index 91353d1ee..02ad8cf14 100755 --- a/test/e2e/src/tests/test_e2e_workflows.py +++ b/test/e2e/src/tests/test_e2e_workflows.py @@ -80,7 +80,7 @@ def test_e2e_workflows(env_dict): # KubePodInventory query = constants.KUBE_POD_INVENTORY_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('KUBE_POD_INVENTORY')) @@ -91,7 +91,7 @@ def test_e2e_workflows(env_dict): # KubeNodeInventory query = constants.KUBE_NODE_INVENTORY_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('KUBE_NODE_INVENTORY')) @@ -102,7 +102,7 @@ def test_e2e_workflows(env_dict): # KubeServices query = constants.KUBE_SERVICES_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('KUBE_SERVICES')) @@ -113,7 +113,7 @@ def test_e2e_workflows(env_dict): # KubeEvents query = constants.KUBE_EVENTS_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('KUBE_EVENTS')) @@ -124,7 +124,7 @@ def test_e2e_workflows(env_dict): # Container Node Inventory query = constants.CONTAINER_NODE_INVENTORY_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_NODE_INVENTORY')) @@ -136,7 +136,7 @@ def test_e2e_workflows(env_dict): # cpu capacity query = constants.NODE_PERF_CPU_CAPCITY_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_CPU_CAPCITY')) @@ -147,7 +147,7 @@ def test_e2e_workflows(env_dict): # memory capacity query = constants.NODE_PERF_MEMORY_CAPCITY_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_MEMORY_CAPCITY')) @@ -158,7 +158,7 @@ def test_e2e_workflows(env_dict): # cpu allocatable query = constants.NODE_PERF_CPU_ALLOCATABLE_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_CPU_ALLOCATABLE')) @@ -169,7 +169,7 @@ def test_e2e_workflows(env_dict): # memory allocatable query = constants.NODE_PERF_MEMORY_ALLOCATABLE_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_MEMORY_ALLOCATABLE')) @@ -180,7 +180,7 @@ def test_e2e_workflows(env_dict): # cpu usage query = constants.NODE_PERF_CPU_USAGE_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_CPU_USAGE')) @@ -191,7 +191,7 @@ def test_e2e_workflows(env_dict): # memory rss usage query = constants.NODE_PERF_MEMORY_RSS_USAGE_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_MEMORY_RSS_USAGE')) @@ -202,7 +202,7 @@ def test_e2e_workflows(env_dict): # memory ws usage query = constants.NODE_PERF_MEMORY_WS_USAGE_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_MEMORY_WS_USAGE')) @@ -213,7 +213,7 @@ def test_e2e_workflows(env_dict): # restartime epoch query = constants.NODE_PERF_RESTART_TIME_EPOCH_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_RESTART_TIME_EPOCH')) @@ -225,7 +225,7 @@ def test_e2e_workflows(env_dict): # container cpu limits query = constants.CONTAINER_PERF_CPU_LIMITS_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_CPU_LIMITS')) @@ -236,7 +236,7 @@ def test_e2e_workflows(env_dict): # container memory limits query = constants.CONTAINER_PERF_MEMORY_LIMITS_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_MEMORY_LIMITS')) @@ -247,7 +247,7 @@ def test_e2e_workflows(env_dict): # cpu requests query = constants.CONTAINER_PERF_CPU_REQUESTS_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_CPU_REQUESTS')) @@ -258,7 +258,7 @@ def test_e2e_workflows(env_dict): # memory requests query = constants.CONTAINER_PERF_MEMORY_REQUESTS_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_MEMORY_REQUESTS_QUERY')) @@ -269,7 +269,7 @@ def test_e2e_workflows(env_dict): # cpu usage query = constants.CONTAINER_PERF_CPU_USAGE_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_CPU_USAGE')) @@ -280,7 +280,7 @@ def test_e2e_workflows(env_dict): # memory rss usage query = constants.CONTAINER_PERF_MEMORY_RSS_USAGE_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_MEMORY_RSS_USAGE')) @@ -291,7 +291,7 @@ def test_e2e_workflows(env_dict): # memory ws usage query = constants.CONTAINER_PERF_MEMORY_WS_USAGE_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_MEMORY_WS_USAGE')) @@ -302,7 +302,7 @@ def test_e2e_workflows(env_dict): # restart time epoch query = constants.CONTAINER_PERF_RESTART_TIME_EPOCH_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_RESTART_TIME_EPOCH')) @@ -313,7 +313,7 @@ def test_e2e_workflows(env_dict): # Container log query = constants.CONTAINER_LOG_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_LOG')) @@ -324,7 +324,7 @@ def test_e2e_workflows(env_dict): # InsightsMetrics query = constants.INSIGHTS_METRICS_QUERY.format(queryTimeInterval) params = { 'query': query} - result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + result = requests.get(queryUrl, params=params, headers=Headers) if not result: pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('INSIGHTS_METRICS')) diff --git a/test/e2e/src/tests/test_node_metrics_e2e_workflow.py b/test/e2e/src/tests/test_node_metrics_e2e_workflow.py index 10b9d7c1f..dfcc89dde 100755 --- a/test/e2e/src/tests/test_node_metrics_e2e_workflow.py +++ b/test/e2e/src/tests/test_node_metrics_e2e_workflow.py @@ -98,7 +98,7 @@ def test_node_metrics_e2e_workflow(env_dict): constants.METRICS_API_VERSION) response = requests.get(custommetricsUrl, params=params, - headers=Headers, verify=False) + headers=Headers) if not response: pytest.fail( @@ -147,7 +147,7 @@ def test_node_metrics_e2e_workflow(env_dict): constants.METRICS_API_VERSION) response = requests.get(custommetricsUrl, params=params, - headers=Headers, verify=False) + headers=Headers) if not response: pytest.fail( @@ -196,7 +196,7 @@ def test_node_metrics_e2e_workflow(env_dict): constants.METRICS_API_VERSION) response = requests.get(custommetricsUrl, params=params, - headers=Headers, verify=False) + headers=Headers) if not response: pytest.fail("response of the metrics query API shouldnt be null or empty") @@ -244,7 +244,7 @@ def test_node_metrics_e2e_workflow(env_dict): constants.METRICS_API_VERSION) response = requests.get(custommetricsUrl, params=params, - headers=Headers, verify=False) + headers=Headers) if not response: pytest.fail("response of the metrics query API shouldnt be null or empty") @@ -292,7 +292,7 @@ def test_node_metrics_e2e_workflow(env_dict): constants.METRICS_API_VERSION) response = requests.get(custommetricsUrl, params=params, - headers=Headers, verify=False) + headers=Headers) if not response: pytest.fail("response of the metrics query API shouldnt be null or empty") @@ -339,7 +339,7 @@ def test_node_metrics_e2e_workflow(env_dict): constants.METRICS_API_VERSION) response = requests.get(custommetricsUrl, params=params, - headers=Headers, verify=False) + headers=Headers) if not response: pytest.fail("response of the metrics query API shouldnt be null or empty") @@ -386,7 +386,7 @@ def test_node_metrics_e2e_workflow(env_dict): constants.METRICS_API_VERSION) response = requests.get(custommetricsUrl, params=params, - headers=Headers, verify=False) + headers=Headers) if not response: pytest.fail("response of the metrics query API shouldnt be null or empty") diff --git a/test/e2e/src/tests/test_pod_metrics_e2e_workflow.py b/test/e2e/src/tests/test_pod_metrics_e2e_workflow.py index f0dd3366a..81e2b77a9 100755 --- a/test/e2e/src/tests/test_pod_metrics_e2e_workflow.py +++ b/test/e2e/src/tests/test_pod_metrics_e2e_workflow.py @@ -97,8 +97,7 @@ def test_pod_metrics_e2e_workflow(env_dict): constants.POD_METRICS_NAMESPACE, constants.METRICS_API_VERSION) - response = requests.get(custommetricsUrl, params=params, - headers=Headers, verify=False) + response = requests.get(custommetricsUrl, params=params, headers=Headers) if not response: pytest.fail( From 521000cfccc417adfb764b9f7cbf88042c2fc97e Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Sun, 1 Aug 2021 12:22:41 -0700 Subject: [PATCH 09/14] working --- test/e2e/conformance.yaml | 2 +- test/e2e/src/core/e2e_tests.sh | 46 ++++++++++++++++++---------------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/test/e2e/conformance.yaml b/test/e2e/conformance.yaml index 45b8f5e64..4354dc481 100644 --- a/test/e2e/conformance.yaml +++ b/test/e2e/conformance.yaml @@ -3,7 +3,7 @@ sonobuoy-config: plugin-name: azure-arc-ci-conformance result-format: junit spec: - image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest07062021-5 + image: docker.io/ganga1980/cidev:ciagenttest08012021-6 imagePullPolicy: Always name: plugin resources: {} diff --git a/test/e2e/src/core/e2e_tests.sh b/test/e2e/src/core/e2e_tests.sh index 660401270..a0e19e1a5 100644 --- a/test/e2e/src/core/e2e_tests.sh +++ b/test/e2e/src/core/e2e_tests.sh @@ -35,15 +35,17 @@ waitForArcK8sClusterCreated() { sleep_seconds=10 for i in $(seq 1 $max_retries) do - clusterState=$(az connectedk8s show --name $CLUSTER_NAME --resource-group $RESOURCE_GROUP --query connectivityStatus -o json) - clusterState=$(echo $clusterState | tr -d '"' | tr -d '"\r\n') - echo "cluster current state: ${clusterState}" - if [[ ("${clusterState}" == "Connected") || ("${clusterState}" == "Connecting") ]]; then - connectivityState=true - break - else - sleep ${sleep_seconds} - fi + echo "iteration: ${i}, clustername: ${CLUSTER_NAME}, resourcegroup: ${RESOURCE_GROUP}" + clusterState=$(az connectedk8s show --name $CLUSTER_NAME --resource-group $RESOURCE_GROUP --query connectivityStatus -o json) + clusterState=$(echo $clusterState | tr -d '"' | tr -d '"\r\n') + echo "cluster current state: ${clusterState}" + if [ ! -z "$clusterState" ]; then + if [[ ("${clusterState}" == "Connected") || ("${clusterState}" == "Connecting") ]]; then + connectivityState=true + break + fi + fi + sleep ${sleep_seconds} done echo "Arc K8s cluster connectivityState: $connectivityState" } @@ -54,15 +56,17 @@ waitForCIExtensionInstalled() { sleep_seconds=10 for i in $(seq 1 $max_retries) do - installState=$(az k8s-extension show --cluster-name $CLUSTER_NAME --resource-group $RESOURCE_GROUP --cluster-type connectedClusters --name azuremonitor-containers --query installState -o json) - installState=$(echo $installState | tr -d '"' | tr -d '"\r\n') - echo "extension install state: ${installState}" - if [ "${installState}" == "Installed" ]; then - installedState=true - break - else - sleep ${sleep_seconds} - fi + echo "iteration: ${i}, clustername: ${CLUSTER_NAME}, resourcegroup: ${RESOURCE_GROUP}" + installState=$(az k8s-extension show --cluster-name $CLUSTER_NAME --resource-group $RESOURCE_GROUP --cluster-type connectedClusters --name azuremonitor-containers --query installState -o json) + installState=$(echo $installState | tr -d '"' | tr -d '"\r\n') + echo "extension install state: ${installState}" + if [ ! -z "$installState" ]; then + if [ "${installState}" == "Installed" ]; then + installedState=true + break + fi + fi + sleep ${sleep_seconds} done echo "installedState: $installedState" } @@ -107,7 +111,7 @@ addArcConnectedK8sExtension() { addArcK8sCLIExtension() { echo "adding Arc K8s k8s-extension extension" - az extension add --name k8s-extension 2> ${results_dir}/error || python3 setup_failure_handler.py + az extension add --name k8s-extension } createArcCIExtension() { @@ -120,7 +124,7 @@ createArcCIExtension() { basicparameters="$basicparameters --version $CI_ARC_VERSION" fi - az k8s-extension create $basicparameters --configuration-settings omsagent.ISTEST=true 2> ${results_dir}/error || python3 setup_failure_handler.py + az k8s-extension create $basicparameters --configuration-settings omsagent.ISTEST=true } showArcCIExtension() { @@ -132,7 +136,7 @@ deleteArcCIExtension() { az k8s-extension delete --name azuremonitor-containers \ --cluster-type connectedClusters \ --cluster-name $CLUSTER_NAME \ - --resource-group $RESOURCE_GROUP || python3 setup_failure_handler.py + --resource-group $RESOURCE_GROUP } login_to_azure() { From aea73aefd8c9a148d0731ce8c82f1f2a4d16a829 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Sun, 1 Aug 2021 22:07:13 -0700 Subject: [PATCH 10/14] update for mcr image --- README.md | 2 +- test/e2e/conformance.yaml | 5 +++-- test/e2e/e2e-tests.yaml | 2 +- test/e2e/src/core/e2e_tests.sh | 4 ++-- test/e2e/src/core/setup_failure_handler.py | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 73bf858cd..e3ceedc8e 100644 --- a/README.md +++ b/README.md @@ -326,7 +326,7 @@ For DEV and PROD branches, automatically deployed latest yaml with latest agent docker build -f ./core/Dockerfile -t /: . docker push /: ``` -3. update existing agentest image tag in e2e-tests.yaml with newly built image tag with MCR repo +3. update existing agentest image tag in e2e-tests.yaml & conformance.yaml with newly built image tag with MCR repo # Scenario Tests Clusters are used in release pipeline already has the yamls under test\scenario deployed. Make sure to validate these scenarios. diff --git a/test/e2e/conformance.yaml b/test/e2e/conformance.yaml index 4354dc481..522be2060 100644 --- a/test/e2e/conformance.yaml +++ b/test/e2e/conformance.yaml @@ -3,7 +3,7 @@ sonobuoy-config: plugin-name: azure-arc-ci-conformance result-format: junit spec: - image: docker.io/ganga1980/cidev:ciagenttest08012021-6 + image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciconftest08012021 imagePullPolicy: Always name: plugin resources: {} @@ -12,4 +12,5 @@ spec: emptyDir: {} volumeMounts: - mountPath: /tmp/results - name: results \ No newline at end of file + name: results + \ No newline at end of file diff --git a/test/e2e/e2e-tests.yaml b/test/e2e/e2e-tests.yaml index 26bcc8ad5..dda74c818 100644 --- a/test/e2e/e2e-tests.yaml +++ b/test/e2e/e2e-tests.yaml @@ -104,7 +104,7 @@ data: - name: AZURE_CLOUD value: "AZURE_PUBLIC_CLOUD" # image tag should be updated if new tests being added after this image - image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest07062021-4 + image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciconftest08012021 imagePullPolicy: IfNotPresent name: plugin resources: {} diff --git a/test/e2e/src/core/e2e_tests.sh b/test/e2e/src/core/e2e_tests.sh index a0e19e1a5..2be23fdc2 100644 --- a/test/e2e/src/core/e2e_tests.sh +++ b/test/e2e/src/core/e2e_tests.sh @@ -136,7 +136,7 @@ deleteArcCIExtension() { az k8s-extension delete --name azuremonitor-containers \ --cluster-type connectedClusters \ --cluster-name $CLUSTER_NAME \ - --resource-group $RESOURCE_GROUP + --resource-group $RESOURCE_GROUP --yes } login_to_azure() { @@ -217,4 +217,4 @@ export NUM_TESTS="$NUM_PROCESS" pytest /e2etests/ --junitxml=/tmp/results/results.xml -d --tx "$NUM_PROCESS"*popen -k "$TEST_NAME_LIST" -m "$TEST_MARKER_LIST" # cleanup extension resource -deleteArcCIExtension \ No newline at end of file +deleteArcCIExtension diff --git a/test/e2e/src/core/setup_failure_handler.py b/test/e2e/src/core/setup_failure_handler.py index a5d74fd4e..c403d7258 100644 --- a/test/e2e/src/core/setup_failure_handler.py +++ b/test/e2e/src/core/setup_failure_handler.py @@ -15,4 +15,4 @@ TestSuite.to_file(f, [test_suite], prettyprint=False) # Exit with non-zero return code -sys.exit(1) \ No newline at end of file +sys.exit(1) From 5544e3ce6dde31ee501a336cd97630847fc3a057 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Mon, 2 Aug 2021 07:03:36 -0700 Subject: [PATCH 11/14] minor --- test/e2e/conformance.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/test/e2e/conformance.yaml b/test/e2e/conformance.yaml index 522be2060..6adc4c5a0 100644 --- a/test/e2e/conformance.yaml +++ b/test/e2e/conformance.yaml @@ -13,4 +13,3 @@ spec: volumeMounts: - mountPath: /tmp/results name: results - \ No newline at end of file From a97ad7a6d7be140b6d4d6282048cd306d3979514 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Mon, 2 Aug 2021 07:29:42 -0700 Subject: [PATCH 12/14] image update --- test/e2e/conformance.yaml | 2 +- test/e2e/e2e-tests.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/e2e/conformance.yaml b/test/e2e/conformance.yaml index 6adc4c5a0..7e99385dd 100644 --- a/test/e2e/conformance.yaml +++ b/test/e2e/conformance.yaml @@ -3,7 +3,7 @@ sonobuoy-config: plugin-name: azure-arc-ci-conformance result-format: junit spec: - image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciconftest08012021 + image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciconftest08022021 imagePullPolicy: Always name: plugin resources: {} diff --git a/test/e2e/e2e-tests.yaml b/test/e2e/e2e-tests.yaml index dda74c818..500e44d43 100644 --- a/test/e2e/e2e-tests.yaml +++ b/test/e2e/e2e-tests.yaml @@ -104,7 +104,7 @@ data: - name: AZURE_CLOUD value: "AZURE_PUBLIC_CLOUD" # image tag should be updated if new tests being added after this image - image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciconftest08012021 + image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciconftest08022021 imagePullPolicy: IfNotPresent name: plugin resources: {} From e9e29e50fdcf96ea119b974e987b8de95b124139 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Sat, 14 Aug 2021 17:07:34 -0700 Subject: [PATCH 13/14] handle latency of connected cluster resource creation --- test/e2e/src/core/e2e_tests.sh | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/test/e2e/src/core/e2e_tests.sh b/test/e2e/src/core/e2e_tests.sh index 2be23fdc2..dd9d93073 100644 --- a/test/e2e/src/core/e2e_tests.sh +++ b/test/e2e/src/core/e2e_tests.sh @@ -1,31 +1,32 @@ #!/bin/bash set -x -set -e results_dir="${RESULTS_DIR:-/tmp/results}" -waitForResources() { - available=false +waitForResourcesReady() { + ready=false max_retries=60 sleep_seconds=10 NAMESPACE=$1 RESOURCETYPE=$2 - RESOURCE=$3 + RESOURCE=$3 # if resource not specified, set to --all if [ -z $RESOURCE ]; then RESOURCE="--all" fi for i in $(seq 1 $max_retries) do - if [[ ! $(kubectl wait --for=condition=available ${RESOURCETYPE} ${RESOURCE} --namespace ${NAMESPACE}) ]]; then + if [[ ! $(kubectl wait --for=condition=Ready ${RESOURCETYPE} ${RESOURCE} --namespace ${NAMESPACE}) ]]; then + echo "waiting for the resource:${RESOURCE} of the type:${RESOURCETYPE} in namespace:${NAMESPACE} to be ready state, iteration:${i}" sleep ${sleep_seconds} else - available=true + echo "resource:${RESOURCE} of the type:${RESOURCETYPE} in namespace:${NAMESPACE} in ready state" + ready=true break fi done - echo "$available" + echo "waitForResourcesReady state: $ready" } @@ -39,12 +40,12 @@ waitForArcK8sClusterCreated() { clusterState=$(az connectedk8s show --name $CLUSTER_NAME --resource-group $RESOURCE_GROUP --query connectivityStatus -o json) clusterState=$(echo $clusterState | tr -d '"' | tr -d '"\r\n') echo "cluster current state: ${clusterState}" - if [ ! -z "$clusterState" ]; then + if [ ! -z "$clusterState" ]; then if [[ ("${clusterState}" == "Connected") || ("${clusterState}" == "Connecting") ]]; then connectivityState=true break fi - fi + fi sleep ${sleep_seconds} done echo "Arc K8s cluster connectivityState: $connectivityState" @@ -60,15 +61,15 @@ waitForCIExtensionInstalled() { installState=$(az k8s-extension show --cluster-name $CLUSTER_NAME --resource-group $RESOURCE_GROUP --cluster-type connectedClusters --name azuremonitor-containers --query installState -o json) installState=$(echo $installState | tr -d '"' | tr -d '"\r\n') echo "extension install state: ${installState}" - if [ ! -z "$installState" ]; then + if [ ! -z "$installState" ]; then if [ "${installState}" == "Installed" ]; then installedState=true break fi - fi + fi sleep ${sleep_seconds} done - echo "installedState: $installedState" + echo "container insights extension installedState: $installedState" } validateCommonParameters() { @@ -124,7 +125,7 @@ createArcCIExtension() { basicparameters="$basicparameters --version $CI_ARC_VERSION" fi - az k8s-extension create $basicparameters --configuration-settings omsagent.ISTEST=true + az k8s-extension create $basicparameters --configuration-settings omsagent.ISTEST=true } showArcCIExtension() { @@ -192,6 +193,9 @@ else # add arc k8s connectedk8s extension addArcConnectedK8sExtension + # wait for arc k8s pods to be ready state + waitForResourcesReady azure-arc pods + # wait for Arc K8s cluster to be created waitForArcK8sClusterCreated From c043914108b3c9926039c80265dea425015a695f Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Sat, 14 Aug 2021 17:32:09 -0700 Subject: [PATCH 14/14] update conftest image --- test/e2e/conformance.yaml | 2 +- test/e2e/e2e-tests.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/e2e/conformance.yaml b/test/e2e/conformance.yaml index 7e99385dd..ff790e690 100644 --- a/test/e2e/conformance.yaml +++ b/test/e2e/conformance.yaml @@ -3,7 +3,7 @@ sonobuoy-config: plugin-name: azure-arc-ci-conformance result-format: junit spec: - image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciconftest08022021 + image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciconftest08142021 imagePullPolicy: Always name: plugin resources: {} diff --git a/test/e2e/e2e-tests.yaml b/test/e2e/e2e-tests.yaml index 500e44d43..25817be12 100644 --- a/test/e2e/e2e-tests.yaml +++ b/test/e2e/e2e-tests.yaml @@ -104,7 +104,7 @@ data: - name: AZURE_CLOUD value: "AZURE_PUBLIC_CLOUD" # image tag should be updated if new tests being added after this image - image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciconftest08022021 + image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciconftest08142021 imagePullPolicy: IfNotPresent name: plugin resources: {}