diff --git a/README.md b/README.md index 73bf858cd..e3ceedc8e 100644 --- a/README.md +++ b/README.md @@ -326,7 +326,7 @@ For DEV and PROD branches, automatically deployed latest yaml with latest agent docker build -f ./core/Dockerfile -t /: . docker push /: ``` -3. update existing agentest image tag in e2e-tests.yaml with newly built image tag with MCR repo +3. update existing agentest image tag in e2e-tests.yaml & conformance.yaml with newly built image tag with MCR repo # Scenario Tests Clusters are used in release pipeline already has the yamls under test\scenario deployed. Make sure to validate these scenarios. diff --git a/test/e2e/conformance.yaml b/test/e2e/conformance.yaml new file mode 100644 index 000000000..ff790e690 --- /dev/null +++ b/test/e2e/conformance.yaml @@ -0,0 +1,15 @@ +sonobuoy-config: + driver: Job + plugin-name: azure-arc-ci-conformance + result-format: junit +spec: + image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciconftest08142021 + imagePullPolicy: Always + name: plugin + resources: {} + volumes: + - name: results + emptyDir: {} + volumeMounts: + - mountPath: /tmp/results + name: results diff --git a/test/e2e/e2e-tests.yaml b/test/e2e/e2e-tests.yaml index 06dfa1fb0..25817be12 100644 --- a/test/e2e/e2e-tests.yaml +++ b/test/e2e/e2e-tests.yaml @@ -68,7 +68,7 @@ data: containers: [] restartPolicy: Never serviceAccountName: sonobuoy-serviceaccount - nodeSelector: + nodeSelector: kubernetes.io/os: linux tolerations: - effect: NoSchedule @@ -84,8 +84,11 @@ data: result-format: junit spec: env: + # this should be false if the test environment is non ARC K8s for example AKS + - name: IS_NON_ARC_K8S_TEST_ENVIRONMENT + value: "true" # Update values of CLIENT_ID, CLIENT_SECRET of the service principal which has permission to query LA ad Metrics API - # Update value of TENANT_ID corresponding your Azure Service principal + # Update value of TENANT_ID corresponding your Azure Service principal - name: CLIENT_ID value: "SP_CLIENT_ID_VALUE" - name: CLIENT_SECRET @@ -93,15 +96,15 @@ data: - name: TENANT_ID value: "SP_TENANT_ID_VALUE" - name: DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES - value: "10" + value: "10" - name: DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES - value: "10" + value: "10" - name: AGENT_POD_EXPECTED_RESTART_COUNT - value: "0" + value: "0" - name: AZURE_CLOUD - value: "AZURE_PUBLIC_CLOUD" - # image tag should be updated if new tests being added after this image - image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest02152021 + value: "AZURE_PUBLIC_CLOUD" + # image tag should be updated if new tests being added after this image + image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciconftest08142021 imagePullPolicy: IfNotPresent name: plugin resources: {} @@ -144,7 +147,7 @@ spec: name: output-volume restartPolicy: Never serviceAccountName: sonobuoy-serviceaccount - nodeSelector: + nodeSelector: kubernetes.io/os: linux tolerations: - key: "kubernetes.io/e2e-evict-taint-key" diff --git a/test/e2e/src/common/constants.py b/test/e2e/src/common/constants.py index 770964cb5..392b10554 100644 --- a/test/e2e/src/common/constants.py +++ b/test/e2e/src/common/constants.py @@ -40,6 +40,8 @@ TIMEOUT = 300 +# WAIT TIME BEFORE READING THE AGENT LOGS +AGENT_WAIT_TIME_SECS = "180" # Azure Monitor for Container Extension related AGENT_RESOURCES_NAMESPACE = 'kube-system' AGENT_DEPLOYMENT_NAME = 'omsagent-rs' @@ -47,7 +49,9 @@ AGENT_WIN_DAEMONSET_NAME = 'omsagent-win' AGENT_DEPLOYMENT_PODS_LABEL_SELECTOR = 'rsName=omsagent-rs' -AGENT_DAEMON_SET_PODS_LABEL_SELECTOR = 'component=oms-agent' +AGENT_DAEMON_SET_PODS_LABEL_SELECTOR = 'dsName=omsagent-ds' +AGENT_DAEMON_SET_PODS_LABEL_SELECTOR_NON_ARC = 'component=oms-agent' +AGENT_FLUENTD_LOG_PATH = '/var/opt/microsoft/docker-cimprov/log/fluentd.log' AGENT_OMSAGENT_LOG_PATH = '/var/opt/microsoft/omsagent/log/omsagent.log' AGENT_REPLICASET_WORKFLOWS = ["kubePodInventoryEmitStreamSuccess", "kubeNodeInventoryEmitStreamSuccess"] diff --git a/test/e2e/src/core/Dockerfile b/test/e2e/src/core/Dockerfile index 9f85bdf4c..cd85aee40 100644 --- a/test/e2e/src/core/Dockerfile +++ b/test/e2e/src/core/Dockerfile @@ -1,11 +1,26 @@ FROM python:3.6 -RUN pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org pytest pytest-xdist filelock requests kubernetes adal msrestazure +RUN pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org pytest pytest-xdist filelock requests kubernetes adal msrestazure RUN curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash \ && helm version +RUN apt-get update && apt-get -y upgrade && \ + apt-get -f -y install curl apt-transport-https lsb-release gnupg python3-pip python-pip && \ + curl -sL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > /etc/apt/trusted.gpg.d/microsoft.asc.gpg && \ + CLI_REPO=$(lsb_release -cs) && \ + echo "deb [arch=amd64] https://packages.microsoft.com/repos/azure-cli/ ${CLI_REPO} main" \ + > /etc/apt/sources.list.d/azure-cli.list && \ + apt-get update && \ + apt-get install -y azure-cli && \ + rm -rf /var/lib/apt/lists/* + +RUN python3 -m pip install junit_xml + +COPY --from=lachlanevenson/k8s-kubectl:v1.20.5 /usr/local/bin/kubectl /usr/local/bin/kubectl + COPY ./core/e2e_tests.sh / +COPY ./core/setup_failure_handler.py / COPY ./core/pytest.ini /e2etests/ COPY ./core/conftest.py /e2etests/ COPY ./core/helper.py /e2etests/ diff --git a/test/e2e/src/core/conftest.py b/test/e2e/src/core/conftest.py index e659d5189..02f644a18 100644 --- a/test/e2e/src/core/conftest.py +++ b/test/e2e/src/core/conftest.py @@ -22,42 +22,48 @@ def env_dict(): create_results_dir('/tmp/results') # Setting some environment variables - env_dict['SETUP_LOG_FILE'] = '/tmp/results/setup' + env_dict['SETUP_LOG_FILE'] = '/tmp/results/setup' env_dict['TEST_AGENT_LOG_FILE'] = '/tmp/results/containerinsights' env_dict['NUM_TESTS_COMPLETED'] = 0 - + print("Starting setup...") append_result_output("Starting setup...\n", env_dict['SETUP_LOG_FILE']) - + # Collecting environment variables env_dict['TENANT_ID'] = os.getenv('TENANT_ID') env_dict['CLIENT_ID'] = os.getenv('CLIENT_ID') env_dict['CLIENT_SECRET'] = os.getenv('CLIENT_SECRET') - + env_dict['IS_NON_ARC_K8S_TEST_ENVIRONMENT'] = os.getenv('IS_NON_ARC_K8S_TEST_ENVIRONMENT') + # released agent for Arc K8s still uses omsagent and when we rollout the agent with mdsd + # this shouldnt set after agent rollout with mdsd + env_dict['USING_OMSAGENT_BASE_AGENT'] = os.getenv('USING_OMSAGENT_BASE_AGENT') + + waitTimeInterval = int(os.getenv('AGENT_WAIT_TIME_SECS')) if os.getenv('AGENT_WAIT_TIME_SECS') else constants.AGENT_WAIT_TIME_SECS + env_dict['AGENT_WAIT_TIME_SECS'] = waitTimeInterval # get default query time interval for log analytics queries queryTimeInterval = int(os.getenv('DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES')) if os.getenv('DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES') else constants.DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES # add minute suffix since this format required for LA queries env_dict['DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES'] = str(queryTimeInterval) + "m" - + # get default query time interval for metrics queries env_dict['DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES'] = int(os.getenv('DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES')) if os.getenv('DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES') else constants.DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES - - - # expected agent pod restart count + + + # expected agent pod restart count env_dict['AGENT_POD_EXPECTED_RESTART_COUNT'] = int(os.getenv('AGENT_POD_EXPECTED_RESTART_COUNT')) if os.getenv('AGENT_POD_EXPECTED_RESTART_COUNT') else constants.AGENT_POD_EXPECTED_RESTART_COUNT # default to azure public cloud if AZURE_CLOUD not specified env_dict['AZURE_ENDPOINTS'] = constants.AZURE_CLOUD_DICT.get(os.getenv('AZURE_CLOUD')) if os.getenv('AZURE_CLOUD') else constants.AZURE_PUBLIC_CLOUD_ENDPOINTS - + if not env_dict.get('TENANT_ID'): pytest.fail('ERROR: variable TENANT_ID is required.') - + if not env_dict.get('CLIENT_ID'): pytest.fail('ERROR: variable CLIENT_ID is required.') - + if not env_dict.get('CLIENT_SECRET'): pytest.fail('ERROR: variable CLIENT_SECRET is required.') - + print("Setup Complete.") append_result_output("Setup Complete.\n", env_dict['SETUP_LOG_FILE']) @@ -66,22 +72,22 @@ def env_dict(): else: with Path.open(my_file, "rb") as f: env_dict = pickle.load(f) - + yield env_dict - + my_file = Path("env.pkl") with FileLock(str(my_file) + ".lock"): with Path.open(my_file, "rb") as f: env_dict = pickle.load(f) env_dict['NUM_TESTS_COMPLETED'] = 1 + env_dict.get('NUM_TESTS_COMPLETED') - if env_dict['NUM_TESTS_COMPLETED'] == int(os.getenv('NUM_TESTS')): + if env_dict['NUM_TESTS_COMPLETED'] == int(os.getenv('NUM_TESTS')): # Checking if cleanup is required. if os.getenv('SKIP_CLEANUP'): return print('Starting cleanup...') append_result_output("Starting Cleanup...\n", env_dict['SETUP_LOG_FILE']) - + print("Cleanup Complete.") append_result_output("Cleanup Complete.\n", env_dict['SETUP_LOG_FILE']) return diff --git a/test/e2e/src/core/e2e_tests.sh b/test/e2e/src/core/e2e_tests.sh index 3bfafdce9..dd9d93073 100644 --- a/test/e2e/src/core/e2e_tests.sh +++ b/test/e2e/src/core/e2e_tests.sh @@ -1,7 +1,158 @@ -#!/bin/sh +#!/bin/bash +set -x results_dir="${RESULTS_DIR:-/tmp/results}" +waitForResourcesReady() { + ready=false + max_retries=60 + sleep_seconds=10 + NAMESPACE=$1 + RESOURCETYPE=$2 + RESOURCE=$3 + # if resource not specified, set to --all + if [ -z $RESOURCE ]; then + RESOURCE="--all" + fi + for i in $(seq 1 $max_retries) + do + if [[ ! $(kubectl wait --for=condition=Ready ${RESOURCETYPE} ${RESOURCE} --namespace ${NAMESPACE}) ]]; then + echo "waiting for the resource:${RESOURCE} of the type:${RESOURCETYPE} in namespace:${NAMESPACE} to be ready state, iteration:${i}" + sleep ${sleep_seconds} + else + echo "resource:${RESOURCE} of the type:${RESOURCETYPE} in namespace:${NAMESPACE} in ready state" + ready=true + break + fi + done + + echo "waitForResourcesReady state: $ready" +} + + +waitForArcK8sClusterCreated() { + connectivityState=false + max_retries=60 + sleep_seconds=10 + for i in $(seq 1 $max_retries) + do + echo "iteration: ${i}, clustername: ${CLUSTER_NAME}, resourcegroup: ${RESOURCE_GROUP}" + clusterState=$(az connectedk8s show --name $CLUSTER_NAME --resource-group $RESOURCE_GROUP --query connectivityStatus -o json) + clusterState=$(echo $clusterState | tr -d '"' | tr -d '"\r\n') + echo "cluster current state: ${clusterState}" + if [ ! -z "$clusterState" ]; then + if [[ ("${clusterState}" == "Connected") || ("${clusterState}" == "Connecting") ]]; then + connectivityState=true + break + fi + fi + sleep ${sleep_seconds} + done + echo "Arc K8s cluster connectivityState: $connectivityState" +} + +waitForCIExtensionInstalled() { + installedState=false + max_retries=60 + sleep_seconds=10 + for i in $(seq 1 $max_retries) + do + echo "iteration: ${i}, clustername: ${CLUSTER_NAME}, resourcegroup: ${RESOURCE_GROUP}" + installState=$(az k8s-extension show --cluster-name $CLUSTER_NAME --resource-group $RESOURCE_GROUP --cluster-type connectedClusters --name azuremonitor-containers --query installState -o json) + installState=$(echo $installState | tr -d '"' | tr -d '"\r\n') + echo "extension install state: ${installState}" + if [ ! -z "$installState" ]; then + if [ "${installState}" == "Installed" ]; then + installedState=true + break + fi + fi + sleep ${sleep_seconds} + done + echo "container insights extension installedState: $installedState" +} + +validateCommonParameters() { + if [ -z $TENANT_ID ]; then + echo "ERROR: parameter TENANT_ID is required." > ${results_dir}/error + python3 setup_failure_handler.py + fi + if [ -z $CLIENT_ID ]; then + echo "ERROR: parameter CLIENT_ID is required." > ${results_dir}/error + python3 setup_failure_handler.py + fi + + if [ -z $CLIENT_SECRET ]; then + echo "ERROR: parameter CLIENT_SECRET is required." > ${results_dir}/error + python3 setup_failure_handler.py + fi +} + +validateArcConfTestParameters() { + if [ -z $SUBSCRIPTION_ID ]; then + echo "ERROR: parameter SUBSCRIPTION_ID is required." > ${results_dir}/error + python3 setup_failure_handler.py + fi + + if [ -z $RESOURCE_GROUP ]]; then + echo "ERROR: parameter RESOURCE_GROUP is required." > ${results_dir}/error + python3 setup_failure_handler.py + fi + + if [ -z $CLUSTER_NAME ]; then + echo "ERROR: parameter CLUSTER_NAME is required." > ${results_dir}/error + python3 setup_failure_handler.py + fi +} + +addArcConnectedK8sExtension() { + echo "adding Arc K8s connectedk8s extension" + az extension add --name connectedk8s 2> ${results_dir}/error || python3 setup_failure_handler.py +} + +addArcK8sCLIExtension() { + echo "adding Arc K8s k8s-extension extension" + az extension add --name k8s-extension +} + +createArcCIExtension() { + echo "creating extension type: Microsoft.AzureMonitor.Containers" + basicparameters="--cluster-name $CLUSTER_NAME --resource-group $RESOURCE_GROUP --cluster-type connectedClusters --extension-type Microsoft.AzureMonitor.Containers --scope cluster --name azuremonitor-containers" + if [ ! -z "$CI_ARC_RELEASE_TRAIN" ]; then + basicparameters="$basicparameters --release-train $CI_ARC_RELEASE_TRAIN" + fi + if [ ! -z "$CI_ARC_VERSION" ]; then + basicparameters="$basicparameters --version $CI_ARC_VERSION" + fi + + az k8s-extension create $basicparameters --configuration-settings omsagent.ISTEST=true +} + +showArcCIExtension() { + echo "arc ci extension status" + az k8s-extension show --cluster-name $CLUSTER_NAME --resource-group $RESOURCE_GROUP --cluster-type connectedClusters --name azuremonitor-containers +} + +deleteArcCIExtension() { + az k8s-extension delete --name azuremonitor-containers \ + --cluster-type connectedClusters \ + --cluster-name $CLUSTER_NAME \ + --resource-group $RESOURCE_GROUP --yes +} + +login_to_azure() { + # Login with service principal + echo "login to azure using the SP creds" + az login --service-principal \ + -u ${CLIENT_ID} \ + -p ${CLIENT_SECRET} \ + --tenant ${TENANT_ID} 2> ${results_dir}/error || python3 setup_failure_handler.py + + echo "setting subscription: ${SUBSCRIPTION_ID} as default subscription" + az account set -s $SUBSCRIPTION_ID +} + + # saveResults prepares the results for handoff to the Sonobuoy worker. # See: https://github.com/vmware-tanzu/sonobuoy/blob/master/docs/plugins.md saveResults() { @@ -17,6 +168,50 @@ saveResults() { # Ensure that we tell the Sonobuoy worker we are done regardless of results. trap saveResults EXIT +# validate common params +validateCommonParameters + +IS_ARC_K8S_ENV="true" +if [ -z $IS_NON_ARC_K8S_TEST_ENVIRONMENT ]; then + echo "arc k8s environment" +else + if [ "$IS_NON_ARC_K8S_TEST_ENVIRONMENT" = "true" ]; then + IS_ARC_K8S_ENV="false" + echo "non arc k8s environment" + fi +fi + +if [ "$IS_ARC_K8S_ENV" = "false" ]; then + echo "skipping installing of ARC K8s container insights extension since the test environment is non-arc K8s" +else + # validate params + validateArcConfTestParameters + + # login to azure + login_to_azure + + # add arc k8s connectedk8s extension + addArcConnectedK8sExtension + + # wait for arc k8s pods to be ready state + waitForResourcesReady azure-arc pods + + # wait for Arc K8s cluster to be created + waitForArcK8sClusterCreated + + # add CLI extension + addArcK8sCLIExtension + + # add ARC K8s container insights extension + createArcCIExtension + + # show the ci extension status + showArcCIExtension + + #wait for extension state to be installed + waitForCIExtensionInstalled +fi + # The variable 'TEST_LIST' should be provided if we want to run specific tests. If not provided, all tests are run NUM_PROCESS=$(pytest /e2etests/ --collect-only -k "$TEST_NAME_LIST" -m "$TEST_MARKER_LIST" | grep "