From fade8ba9da493e9d946a8af6c2d6ecaf25951dfa Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 16 Feb 2021 16:55:28 -0800 Subject: [PATCH 01/14] add agent e2e fw and tests --- source/plugins/ruby/in_kube_events.rb | 4 + source/plugins/ruby/in_kube_nodes.rb | 21 + source/plugins/ruby/in_kube_podinventory.rb | 12 + source/plugins/ruby/in_kube_pvinventory.rb | 5 +- test/e2e/src/common/arm_rest_utility.py | 25 + test/e2e/src/common/constants.py | 120 +++++ test/e2e/src/common/helm_utility.py | 68 +++ .../common/kubernetes_configmap_utility.py | 8 + test/e2e/src/common/kubernetes_crd_utility.py | 26 ++ .../common/kubernetes_daemonset_utility.py | 36 ++ .../common/kubernetes_deployment_utility.py | 38 ++ .../common/kubernetes_namespace_utility.py | 32 ++ .../e2e/src/common/kubernetes_node_utility.py | 13 + test/e2e/src/common/kubernetes_pod_utility.py | 66 +++ .../src/common/kubernetes_secret_utility.py | 26 ++ .../src/common/kubernetes_service_utility.py | 19 + .../src/common/kubernetes_version_utility.py | 10 + test/e2e/src/common/results_utility.py | 24 + test/e2e/src/core/Dockerfile | 17 + test/e2e/src/core/conftest.py | 90 ++++ test/e2e/src/core/e2e_tests.sh | 26 ++ test/e2e/src/core/helper.py | 429 ++++++++++++++++++ test/e2e/src/core/pytest.ini | 3 + test/e2e/src/core/test_ds_workflows.py | 60 +++ test/e2e/src/core/test_e2e_workflows.py | 330 ++++++++++++++ .../core/test_node_metrics_e2e_workflow.py | 420 +++++++++++++++++ .../src/core/test_pod_metrics_e2e_workflow.py | 134 ++++++ test/e2e/src/core/test_resource_status.py | 43 ++ test/e2e/src/core/test_rs_workflows.py | 93 ++++ test/e2e/src/e2e-tests.yaml | 167 +++++++ 30 files changed, 2364 insertions(+), 1 deletion(-) create mode 100644 test/e2e/src/common/arm_rest_utility.py create mode 100644 test/e2e/src/common/constants.py create mode 100644 test/e2e/src/common/helm_utility.py create mode 100644 test/e2e/src/common/kubernetes_configmap_utility.py create mode 100644 test/e2e/src/common/kubernetes_crd_utility.py create mode 100644 test/e2e/src/common/kubernetes_daemonset_utility.py create mode 100644 test/e2e/src/common/kubernetes_deployment_utility.py create mode 100644 test/e2e/src/common/kubernetes_namespace_utility.py create mode 100644 test/e2e/src/common/kubernetes_node_utility.py create mode 100644 test/e2e/src/common/kubernetes_pod_utility.py create mode 100644 test/e2e/src/common/kubernetes_secret_utility.py create mode 100644 test/e2e/src/common/kubernetes_service_utility.py create mode 100644 test/e2e/src/common/kubernetes_version_utility.py create mode 100644 test/e2e/src/common/results_utility.py create mode 100644 test/e2e/src/core/Dockerfile create mode 100644 test/e2e/src/core/conftest.py create mode 100644 test/e2e/src/core/e2e_tests.sh create mode 100755 test/e2e/src/core/helper.py create mode 100644 test/e2e/src/core/pytest.ini create mode 100755 test/e2e/src/core/test_ds_workflows.py create mode 100755 test/e2e/src/core/test_e2e_workflows.py create mode 100755 test/e2e/src/core/test_node_metrics_e2e_workflow.py create mode 100755 test/e2e/src/core/test_pod_metrics_e2e_workflow.py create mode 100755 test/e2e/src/core/test_resource_status.py create mode 100755 test/e2e/src/core/test_rs_workflows.py create mode 100644 test/e2e/src/e2e-tests.yaml diff --git a/source/plugins/ruby/in_kube_events.rb b/source/plugins/ruby/in_kube_events.rb index 4f6017cc5..f50019a01 100644 --- a/source/plugins/ruby/in_kube_events.rb +++ b/source/plugins/ruby/in_kube_events.rb @@ -129,6 +129,7 @@ def enumerate def parse_and_emit_records(events, eventQueryState, newEventQueryState, batchTime = Time.utc.iso8601) currentTime = Time.now emitTime = currentTime.to_f + @@istestvar = ENV["ISTEST"] begin eventStream = MultiEventStream.new events["items"].each do |items| @@ -171,6 +172,9 @@ def parse_and_emit_records(events, eventQueryState, newEventQueryState, batchTim @eventsCount += 1 end router.emit_stream(@tag, eventStream) if eventStream + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) + $log.info("kubeEventsInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") + end rescue => errorStr $log.debug_backtrace(errorStr.backtrace) ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb index 0a4727077..c803c0fa2 100644 --- a/source/plugins/ruby/in_kube_nodes.rb +++ b/source/plugins/ruby/in_kube_nodes.rb @@ -188,6 +188,9 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601) $log.info("in_kube_node::parse_and_emit_records: number of container node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}") router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream containerNodeInventoryEventStream = MultiEventStream.new + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) + $log.info("containerNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") + end end # node metrics records @@ -217,6 +220,9 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601) $log.info("in_kube_nodes::parse_and_emit_records: number of node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}") router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream kubePerfEventStream = MultiEventStream.new + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) + $log.info("kubeNodePerfEmitStreamSuccess @ #{Time.now.utc.iso8601}") + end end # node GPU metrics record @@ -249,6 +255,9 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601) $log.info("in_kube_nodes::parse_and_emit_records: number of GPU node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}") router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream insightsMetricsEventStream = MultiEventStream.new + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) + $log.info("kubeNodeInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}") + end end # Adding telemetry to send node telemetry every 10 minutes timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs @@ -300,23 +309,35 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601) router.emit_stream(@tag, eventStream) if eventStream $log.info("in_kube_node::parse_and_emit_records: number of mdm node inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}") router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) + $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") + end eventStream = nil end if containerNodeInventoryEventStream.count > 0 $log.info("in_kube_node::parse_and_emit_records: number of container node inventory records emitted #{containerNodeInventoryEventStream.count} @ #{Time.now.utc.iso8601}") router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream containerNodeInventoryEventStream = nil + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) + $log.info("containerNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") + end end if kubePerfEventStream.count > 0 $log.info("in_kube_nodes::parse_and_emit_records: number of node perf metric records emitted #{kubePerfEventStream.count} @ #{Time.now.utc.iso8601}") router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream kubePerfEventStream = nil + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) + $log.info("kubeNodePerfInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") + end end if insightsMetricsEventStream.count > 0 $log.info("in_kube_nodes::parse_and_emit_records: number of GPU node perf metric records emitted #{insightsMetricsEventStream.count} @ #{Time.now.utc.iso8601}") router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream insightsMetricsEventStream = nil + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) + $log.info("kubeNodeInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}") + end end rescue => errorStr $log.warn "Failed to retrieve node inventory: #{errorStr}" diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb index 0cff2eefe..5256eb159 100644 --- a/source/plugins/ruby/in_kube_podinventory.rb +++ b/source/plugins/ruby/in_kube_podinventory.rb @@ -265,6 +265,9 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && kubePerfEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE $log.info("in_kube_podinventory::parse_and_emit_records: number of container perf records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}") router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) + $log.info("kubeContainerPerfEventEmitStreamSuccess @ #{Time.now.utc.iso8601}") + end kubePerfEventStream = MultiEventStream.new end @@ -306,6 +309,9 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc $log.info("in_kube_podinventory::parse_and_emit_records: number of perf records emitted #{kubePerfEventStream.count} @ #{Time.now.utc.iso8601}") router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream kubePerfEventStream = nil + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) + $log.info("kubeContainerPerfEventEmitStreamSuccess @ #{Time.now.utc.iso8601}") + end end if insightsMetricsEventStream.count > 0 @@ -345,6 +351,9 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc $log.info("in_kube_podinventory::parse_and_emit_records: number of service records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}") router.emit_stream(@@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream kubeServicesEventStream = MultiEventStream.new + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) + $log.info("kubeServicesEventEmitStreamSuccess @ #{Time.now.utc.iso8601}") + end end end end @@ -352,6 +361,9 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc if kubeServicesEventStream.count > 0 $log.info("in_kube_podinventory::parse_and_emit_records : number of service records emitted #{kubeServicesEventStream.count} @ #{Time.now.utc.iso8601}") router.emit_stream(@@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) + $log.info("kubeServicesEventEmitStreamSuccess @ #{Time.now.utc.iso8601}") + end end kubeServicesEventStream = nil end diff --git a/source/plugins/ruby/in_kube_pvinventory.rb b/source/plugins/ruby/in_kube_pvinventory.rb index 861b3a8e1..4efe86f61 100644 --- a/source/plugins/ruby/in_kube_pvinventory.rb +++ b/source/plugins/ruby/in_kube_pvinventory.rb @@ -106,7 +106,7 @@ def parse_and_emit_records(pvInventory, batchTime = Time.utc.iso8601) currentTime = Time.now emitTime = currentTime.to_f eventStream = MultiEventStream.new - + @@istestvar = ENV["ISTEST"] begin records = [] pvInventory["items"].each do |item| @@ -156,6 +156,9 @@ def parse_and_emit_records(pvInventory, batchTime = Time.utc.iso8601) end router.emit_stream(@tag, eventStream) if eventStream + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) + $log.info("kubePVInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") + end rescue => errorStr $log.warn "Failed in parse_and_emit_record for in_kube_pvinventory: #{errorStr}" diff --git a/test/e2e/src/common/arm_rest_utility.py b/test/e2e/src/common/arm_rest_utility.py new file mode 100644 index 000000000..604f8b791 --- /dev/null +++ b/test/e2e/src/common/arm_rest_utility.py @@ -0,0 +1,25 @@ +import adal +import pytest + +from msrestazure.azure_active_directory import AADTokenCredentials + + +# Function to fetch aad token from spn id and password +def fetch_aad_token(client_id, client_secret, authority_uri, resource_uri): + """ + Authenticate using service principal w/ key. + """ + try: + context = adal.AuthenticationContext(authority_uri, api_version=None) + return context.acquire_token_with_client_credentials(resource_uri, client_id, client_secret) + except Exception as e: + pytest.fail("Error occured while fetching aad token: " + str(e)) + + +# Function that returns aad token credentials for a given spn +def fetch_aad_token_credentials(client_id, client_secret, authority_uri, resource_uri): + mgmt_token = fetch_aad_token(client_id, client_secret, authority_uri, resource_uri) + try: + return AADTokenCredentials(mgmt_token, client_id) + except Exception as e: + pytest.fail("Error occured while fetching credentials: " + str(e)) diff --git a/test/e2e/src/common/constants.py b/test/e2e/src/common/constants.py new file mode 100644 index 000000000..738b6c7f8 --- /dev/null +++ b/test/e2e/src/common/constants.py @@ -0,0 +1,120 @@ +AZURE_PUBLIC_CLOUD_ENDPOINTS = { + "activeDirectory": "https://login.microsoftonline.com/", + "activeDirectoryDataLakeResourceId": "https://datalake.azure.net/", + "activeDirectoryGraphResourceId": "https://graph.windows.net/", + "activeDirectoryResourceId": "https://management.core.windows.net/", + "appInsights": "https://api.applicationinsights.io", + "appInsightsTelemetryChannel": "https://dc.applicationinsights.azure.com/v2/track", + "batchResourceId": "https://batch.core.windows.net/", + "gallery": "https://gallery.azure.com/", + "logAnalytics": "https://api.loganalytics.io", + "management": "https://management.core.windows.net/", + "mediaResourceId": "https://rest.media.azure.net", + "microsoftGraphResourceId": "https://graph.microsoft.com/", + "ossrdbmsResourceId": "https://ossrdbms-aad.database.windows.net", + "resourceManager": "https://management.azure.com/", + "sqlManagement": "https://management.core.windows.net:8443/", + "vmImageAliasDoc": "https://raw.githubusercontent.com/Azure/azure-rest-api-specs/master/arm-compute/quickstart-templates/aliases.json" +} + +AZURE_DOGFOOD_ENDPOINTS = { + "activeDirectory": "https://login.windows-ppe.net/", + "activeDirectoryDataLakeResourceId": None, + "activeDirectoryGraphResourceId": "https://graph.ppe.windows.net/", + "activeDirectoryResourceId": "https://management.core.windows.net/", + "appInsights": None, + "appInsightsTelemetryChannel": None, + "batchResourceId": None, + "gallery": "https://df.gallery.azure-test.net/", + "logAnalytics": None, + "management": "https://management-preview.core.windows-int.net/", + "mediaResourceId": None, + "microsoftGraphResourceId": None, + "ossrdbmsResourceId": None, + "resourceManager": "https://api-dogfood.resources.windows-int.net/", + "sqlManagement": None, + "vmImageAliasDoc": None +} + +AZURE_CLOUD_DICT = {"AZURE_PUBLIC_CLOUD" : AZURE_PUBLIC_CLOUD_ENDPOINTS, "AZURE_DOGFOOD": AZURE_DOGFOOD_ENDPOINTS} + +TIMEOUT = 300 + +# Azure Monitor for Container Extension related +AGENT_RESOURCES_NAMESPACE = 'kube-system' +AGENT_DEPLOYMENT_NAME = 'omsagent-rs' +AGENT_DAEMONSET_NAME = 'omsagent' +AGENT_WIN_DAEMONSET_NAME = 'omsagent-win' + +AGENT_DEPLOYMENT_PODS_LABEL_SELECTOR = 'rsName=omsagent-rs' +AGENT_DAEMON_SET_PODS_LABEL_SELECTOR = 'component=oms-agent' +AGENT_OMSAGENT_LOG_PATH = '/var/opt/microsoft/omsagent/log/omsagent.log' +AGENT_REPLICASET_WORKFLOWS = ["kubePodInventoryEmitStreamSuccess", "kubeNodeInventoryEmitStreamSuccess"] + +# override this through setting enviornment variable if the expected restart count is > 0 for example applying configmap +AGENT_POD_EXPECTED_RESTART_COUNT = 0 + +# replicaset workflow streams +KUBE_POD_INVENTORY_EMIT_STREAM = "kubePodInventoryEmitStreamSuccess" +KUBE_NODE_INVENTORY_EMIT_STREAM = "kubeNodeInventoryEmitStreamSuccess" +KUBE_DEPLOYMENT_INVENTORY_EMIT_STREAM = "kubestatedeploymentsInsightsMetricsEmitStreamSuccess" +KUBE_CONTAINER_PERF_EMIT_STREAM = "kubeContainerPerfEventEmitStreamSuccess" +KUBE_SERVICES_EMIT_STREAM = "kubeServicesEventEmitStreamSuccess" +KUBE_CONTAINER_NODE_INVENTORY_EMIT_STREAM = "containerNodeInventoryEmitStreamSuccess" +KUBE_EVENTS_EMIT_STREAM = "kubeEventsInventoryEmitStreamSuccess" +# daemonset workflow streams +CONTAINER_PERF_EMIT_STREAM = "cAdvisorPerfEmitStreamSuccess" +CONTAINER_INVENTORY_EMIT_STREAM = "containerInventoryEmitStreamSuccess" + +# simple log analytics queries to validate for e2e workflows +DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES = 10 +KUBE_POD_INVENTORY_QUERY = "KubePodInventory | where TimeGenerated > ago({0}) | count" +KUBE_NODE_INVENTORY_QUERY = "KubeNodeInventory | where TimeGenerated > ago({0}) | count" +KUBE_SERVICES_QUERY = "KubeServices | where TimeGenerated > ago({0}) | count" +KUBE_EVENTS_QUERY = "KubeEvents | where TimeGenerated > ago({0}) | count" +CONTAINER_NODE_INVENTORY_QUERY = "ContainerNodeInventory | where TimeGenerated > ago({0}) | count" +CONTAINER_INVENTORY_QUERY = "ContainerInventory | where TimeGenerated > ago({0}) | count" +# node perf +NODE_PERF_CPU_CAPCITY_QUERY = "Perf | where ObjectName == 'K8SNode' | where CounterName == 'cpuCapacityNanoCores' | where TimeGenerated > ago({0}) | count" +NODE_PERF_MEMORY_CAPCITY_QUERY = "Perf | where ObjectName == 'K8SNode' | where CounterName == 'memoryCapacityBytes' | where TimeGenerated > ago({0}) | count" +NODE_PERF_CPU_ALLOCATABLE_QUERY = "Perf | where ObjectName == 'K8SNode' | where CounterName == 'cpuAllocatableNanoCores' | where TimeGenerated > ago({0}) | count" +NODE_PERF_MEMORY_ALLOCATABLE_QUERY = "Perf | where ObjectName == 'K8SNode' | where CounterName == 'memoryAllocatableBytes' | where TimeGenerated > ago({0}) | count" +NODE_PERF_CPU_USAGE_QUERY = "Perf | where ObjectName == 'K8SNode' | where CounterName == 'cpuUsageNanoCores' | where TimeGenerated > ago({0}) | count" +NODE_PERF_MEMORY_RSS_USAGE_QUERY = "Perf | where ObjectName == 'K8SNode' | where CounterName == 'memoryRssBytes' | where TimeGenerated > ago({0}) | count" +NODE_PERF_MEMORY_WS_USAGE_QUERY = "Perf | where ObjectName == 'K8SNode' | where CounterName =='memoryWorkingSetBytes' | where TimeGenerated > ago({0}) | count" +NODE_PERF_RESTART_TIME_EPOCH_QUERY = "Perf | where ObjectName == 'K8SNode' | where CounterName == 'restartTimeEpoch' | where TimeGenerated > ago({0}) | count" +# container perf +CONTAINER_PERF_CPU_LIMITS_QUERY = "Perf | where ObjectName == 'K8SContainer' | where CounterName == 'cpuLimitNanoCores' | where TimeGenerated > ago({0}) | count" +CONTAINER_PERF_MEMORY_LIMITS_QUERY = "Perf | where ObjectName == 'K8SContainer' | where CounterName == 'memoryLimitBytes' | where TimeGenerated > ago({0}) | count" +CONTAINER_PERF_CPU_REQUESTS_QUERY = "Perf | where ObjectName == 'K8SContainer' | where CounterName == 'cpuRequestNanoCores' | where TimeGenerated > ago({0}) | count" +CONTAINER_PERF_MEMORY_REQUESTS_QUERY = "Perf | where ObjectName == 'K8SContainer' | where CounterName == 'memoryRequestBytes' | where TimeGenerated > ago({0}) | count" +CONTAINER_PERF_CPU_USAGE_QUERY = "Perf | where ObjectName == 'K8SContainer' | where CounterName == 'cpuUsageNanoCores' | where TimeGenerated > ago({0}) | count" +CONTAINER_PERF_MEMORY_RSS_USAGE_QUERY = "Perf | where ObjectName == 'K8SContainer' | where CounterName == 'memoryRssBytes' | where TimeGenerated > ago({0}) | count" +CONTAINER_PERF_MEMORY_WS_USAGE_QUERY = "Perf | where ObjectName == 'K8SContainer' | where CounterName == 'memoryWorkingSetBytes' | where TimeGenerated > ago({0}) | count" +CONTAINER_PERF_RESTART_TIME_EPOCH_QUERY = "Perf | where ObjectName == 'K8SContainer' | where CounterName == 'restartTimeEpoch' | where TimeGenerated > ago({0}) | count" +# container log +CONTAINER_LOG_QUERY = "ContainerLog | where TimeGenerated > ago({0}) | count" +# insights metrics +INSIGHTS_METRICS_QUERY = "InsightsMetrics | where TimeGenerated > ago({0}) | count" + +# custom metrics +METRICS_API_VERSION = '2019-07-01' +DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES = 10 + +# node metrics +NODE_METRICS_NAMESPACE = 'insights.container/nodes' +NODE_METRIC_METRIC_AGGREGATION = 'average' +NODE_CPU_USAGE_MILLI_CORES_METRIC_NAME = 'cpuUsageMilliCores' +NODE_CPU_USAGE_PERCENTAGE_METRIC_NAME = 'cpuUsagePercentage' +NODE_MEMORY_RSS_METRIC_NAME = 'memoryRssBytes' +NODE_MEMORY_RSS_PERCENTAGE_METRIC_NAME = 'memoryRssPercentage' +NODE_MEMORY_WS_METRIC_NAME = 'memoryWorkingSetBytes' +NODE_MEMORY_WS_PERCENTAGE_METRIC_NAME = 'memoryWorkingSetPercentage' +NODE_COUNT_METRIC_NAME = 'nodesCount' +NODE_DISK_USAGE_PERCENTAGE_METRIC_NAME = 'diskUsedPercentage(Preview)' + +# pod metrics +POD_METRICS_NAMESPACE = 'insights.container/pods' +POD_METRIC_METRIC_AGGREGATION = 'average' +POD_COUNT_METRIC_NAME = 'PodCount' + diff --git a/test/e2e/src/common/helm_utility.py b/test/e2e/src/common/helm_utility.py new file mode 100644 index 000000000..6eac1e071 --- /dev/null +++ b/test/e2e/src/common/helm_utility.py @@ -0,0 +1,68 @@ +import os +import pytest +import subprocess + + +# Function to pull helm charts +def pull_helm_chart(registry_path): + os.environ['HELM_EXPERIMENTAL_OCI'] = '1' + cmd_helm_chart_pull = ["helm", "chart", "pull", registry_path] + response_helm_chart_pull = subprocess.Popen(cmd_helm_chart_pull, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output_helm_chart_pull, error_helm_chart_pull = response_helm_chart_pull.communicate() + if response_helm_chart_pull.returncode != 0: + pytest.fail("Unable to pull helm chart from the registry '{}': ".format(registry_path) + error_helm_chart_pull.decode("ascii")) + return output_helm_chart_pull.decode("ascii") + + +# Function to export helm charts +def export_helm_chart(registry_path, destination): + cmd_helm_chart_export = ["helm", "chart", "export", registry_path, "--destination", destination] + response_helm_chart_export = subprocess.Popen(cmd_helm_chart_export, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output_helm_chart_export, error_helm_chart_export = response_helm_chart_export.communicate() + if response_helm_chart_export.returncode != 0: + pytest.fail("Unable to export helm chart from the registry '{}': ".format(registry_path) + error_helm_chart_export.decode("ascii")) + return output_helm_chart_export.decode("ascii") + + +# Function to add a helm repository +def add_helm_repo(repo_name, repo_url): + cmd_helm_repo = ["helm", "repo", "add", repo_name, repo_url] + response_helm_repo = subprocess.Popen(cmd_helm_repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output_helm_repo, error_helm_repo = response_helm_repo.communicate() + if response_helm_repo.returncode != 0: + pytest.fail("Unable to add repository {} to helm: ".format(repo_url) + error_helm_repo.decode("ascii")) + return output_helm_repo.decode("ascii") + + +# Function to install helm charts +def install_helm_chart(helm_release_name, helm_release_namespace, helm_chart_path, wait=False, **kwargs): + cmd_helm_install = ["helm", "install", helm_release_name, helm_chart_path, "--namespace", helm_release_namespace] + if wait: + cmd_helm_install.extend(["--wait"]) + for key, value in kwargs.items(): + cmd_helm_install.extend(["--set", "{}={}".format(key, value)]) + response_helm_install = subprocess.Popen(cmd_helm_install, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output_helm_install, error_helm_install = response_helm_install.communicate() + if response_helm_install.returncode != 0: + pytest.fail("Unable to install helm release: " + error_helm_install.decode("ascii")) + return output_helm_install.decode("ascii") + + +# Function to delete helm chart +def delete_helm_release(helm_release_name, helm_release_namespace): + cmd_helm_delete = ["helm", "delete", helm_release_name, "--namespace", helm_release_namespace] + response_helm_delete = subprocess.Popen(cmd_helm_delete, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output_helm_delete, error_helm_delete = response_helm_delete.communicate() + if response_helm_delete.returncode != 0: + pytest.fail("Error occured while deleting the helm release: " + error_helm_delete.decode("ascii")) + return output_helm_delete.decode("ascii") + + +# Function to list helm release +def list_helm_release(helm_release_namespace): + cmd_helm_list = ["helm", "list", "--namespace", helm_release_namespace] + response_helm_list = subprocess.Popen(cmd_helm_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output_helm_list, error_helm_list = response_helm_list.communicate() + if response_helm_list.returncode != 0: + pytest.fail("Error occured while fetching the helm release: " + error_helm_list.decode("ascii")) + return output_helm_list.decode("ascii") diff --git a/test/e2e/src/common/kubernetes_configmap_utility.py b/test/e2e/src/common/kubernetes_configmap_utility.py new file mode 100644 index 000000000..caee9628e --- /dev/null +++ b/test/e2e/src/common/kubernetes_configmap_utility.py @@ -0,0 +1,8 @@ +import pytest + + +def get_namespaced_configmap(api_instance, namespace, configmap_name): + try: + return api_instance.read_namespaced_config_map(configmap_name, namespace) + except Exception as e: + pytest.fail("Error occured when retrieving configmap: " + str(e)) diff --git a/test/e2e/src/common/kubernetes_crd_utility.py b/test/e2e/src/common/kubernetes_crd_utility.py new file mode 100644 index 000000000..6b591845c --- /dev/null +++ b/test/e2e/src/common/kubernetes_crd_utility.py @@ -0,0 +1,26 @@ +import pytest + +from kubernetes import watch + + +# Function to get the CRD instance +def get_crd_instance(api_instance, group, version, namespace, plural, crd_name): + try: + return api_instance.get_namespaced_custom_object(group, version, namespace, plural, crd_name) + except Exception as e: + pytest.fail("Error occurred when retrieving crd information: " + str(e)) + + +# Function that watches events corresponding to given CRD instance and passes the events to a callback function +def watch_crd_instance(api_instance, group, version, namespace, plural, crd_name, timeout, callback=None): + if not callback: + return + field_selector = "metadata.name={}".format(crd_name) if crd_name else "" + try: + w = watch.Watch() + for event in w.stream(api_instance.list_namespaced_custom_object, group, version, namespace, plural, field_selector=field_selector, timeout_seconds=timeout): + if callback(event): + return + except Exception as e: + pytest.fail("Error occurred when watching crd instance events: " + str(e)) + pytest.fail("The watch on the crd instance events has timed out.") diff --git a/test/e2e/src/common/kubernetes_daemonset_utility.py b/test/e2e/src/common/kubernetes_daemonset_utility.py new file mode 100644 index 000000000..dd76a11d9 --- /dev/null +++ b/test/e2e/src/common/kubernetes_daemonset_utility.py @@ -0,0 +1,36 @@ +import pytest +from kubernetes import watch + +# Returns a list of daemon_sets in a given namespace +def list_daemon_set(api_instance, namespace, field_selector="", label_selector=""): + try: + return api_instance.list_namespaced_daemon_set(namespace, field_selector=field_selector, label_selector=label_selector) + except Exception as e: + pytest.fail("Error occured when retrieving daemon_sets: " + str(e)) + +# Deletes a daemon_set +def delete_daemon_set(api_instance, namespace, daemon_set_name): + try: + return api_instance.delete_namespaced_daemon_set(daemon_set_name, namespace) + except Exception as e: + pytest.fail("Error occured when deleting daemon_set: " + str(e)) + +# Read a daemon_set +def read_daemon_set(api_instance, namespace, daemon_set_name): + try: + return api_instance.read_namespaced_daemon_set(daemon_set_name, namespace) + except Exception as e: + pytest.fail("Error occured when reading daemon_set: " + str(e)) + +# Function that watches events corresponding to daemon_sets in the given namespace and passes the events to a callback function +def watch_daemon_set_status(api_instance, namespace, timeout, callback=None): + if not callback: + return + try: + w = watch.Watch() + for event in w.stream(api_instance.list_namespaced_daemon_set, namespace, timeout_seconds=timeout): + if callback(event): + return + except Exception as e: + print("Error occurred when checking daemon_set status: " + str(e)) + print("The watch on the daemon_set status has timed out. Please see the pod logs for more info.") diff --git a/test/e2e/src/common/kubernetes_deployment_utility.py b/test/e2e/src/common/kubernetes_deployment_utility.py new file mode 100644 index 000000000..1be7a6b71 --- /dev/null +++ b/test/e2e/src/common/kubernetes_deployment_utility.py @@ -0,0 +1,38 @@ +import pytest +from kubernetes import watch + +# Returns a list of deployments in a given namespace +def list_deployment(api_instance, namespace, field_selector="", label_selector=""): + try: + return api_instance.list_namespaced_deployment(namespace, field_selector=field_selector, label_selector=label_selector) + except Exception as e: + pytest.fail("Error occured when retrieving deployments: " + str(e)) + +# Deletes a deployment +def delete_deployment(api_instance, namespace, deployment_name): + try: + return api_instance.delete_namespaced_deployment(deployment_name, namespace) + except Exception as e: + pytest.fail("Error occured when deleting deployment: " + str(e)) + + +# Read a deployment +def read_deployment(api_instance, namespace, deployment_name): + try: + return api_instance.read_namespaced_deployment(deployment_name, namespace) + except Exception as e: + pytest.fail("Error occured when reading deployment: " + str(e)) + +# Function that watches events corresponding to deployments in the given namespace and passes the events to a callback function +def watch_deployment_status(api_instance, namespace, timeout, callback=None): + if not callback: + return + try: + w = watch.Watch() + for event in w.stream(api_instance.list_namespaced_deployment, namespace, timeout_seconds=timeout): + if callback(event): + return + except Exception as e: + print("Error occurred when checking deployment status: " + str(e)) + print("The watch on the deployment status has timed out. Please see the pod logs for more info.") + \ No newline at end of file diff --git a/test/e2e/src/common/kubernetes_namespace_utility.py b/test/e2e/src/common/kubernetes_namespace_utility.py new file mode 100644 index 000000000..cea5788c5 --- /dev/null +++ b/test/e2e/src/common/kubernetes_namespace_utility.py @@ -0,0 +1,32 @@ +import pytest +from kubernetes import watch + + +# Function that watches events corresponding to kubernetes namespaces and passes the events to a callback function +def watch_namespace(api_instance, timeout, callback=None): + if not callback: + return + try: + w = watch.Watch() + for event in w.stream(api_instance.list_namespace, timeout_seconds=timeout): + if callback(event): + return + except Exception as e: + pytest.fail("Error occurred when checking namespace status: " + str(e)) + pytest.fail("The watch on the namespaces has timed out.") + + +# Function to list all kubernetes namespaces +def list_namespace(api_instance): + try: + return api_instance.list_namespace() + except Exception as e: + pytest.fail("Error occured when retrieving namespaces: " + str(e)) + + +# Function to delete a kubernetes namespaces +def delete_namespace(api_instance, namespace_name): + try: + return api_instance.delete_namespace(namespace_name) + except Exception as e: + pytest.fail("Error occured when deleting namespace: " + str(e)) diff --git a/test/e2e/src/common/kubernetes_node_utility.py b/test/e2e/src/common/kubernetes_node_utility.py new file mode 100644 index 000000000..739af55f6 --- /dev/null +++ b/test/e2e/src/common/kubernetes_node_utility.py @@ -0,0 +1,13 @@ +import pytest + +def get_kubernetes_node_count(api_instance): + node_list = list_kubernetes_nodes(api_instance) + return len(node_list.items) + +def list_kubernetes_nodes(api_instance): + try: + return api_instance.list_node() + except Exception as e: + pytest.fail("Error occured while retrieving node information: " + str(e)) + + diff --git a/test/e2e/src/common/kubernetes_pod_utility.py b/test/e2e/src/common/kubernetes_pod_utility.py new file mode 100644 index 000000000..403da48cb --- /dev/null +++ b/test/e2e/src/common/kubernetes_pod_utility.py @@ -0,0 +1,66 @@ +import pytest +import time + +from kubernetes import watch +from kubernetes.stream import stream + +# Returns a kubernetes pod object in given namespace. Object description at: https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1PodList.md +def get_pod(api_instance, namespace, pod_name): + try: + return api_instance.read_namespaced_pod(pod_name, namespace) + except Exception as e: + pytest.fail("Error occured when retrieving pod information: " + str(e)) + + +# Returns a list of kubernetes pod objects in a given namespace. Object description at: https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1PodList.md +def get_pod_list(api_instance, namespace, label_selector=""): + try: + return api_instance.list_namespaced_pod(namespace, label_selector=label_selector) + except Exception as e: + pytest.fail("Error occurred when retrieving pod information: " + str(e)) + +# get the content of the log file in the container via exec +def get_log_file_content(api_instance, namespace, podName, logfilePath): + try: + exec_command = ['tar','cf', '-', logfilePath] + return stream(api_instance.connect_get_namespaced_pod_exec, podName, namespace, command=exec_command, stderr=True, stdin=False, stdout=True, tty=False) + except Exception as e: + pytest.fail("Error occurred when retrieving log file content: " + str(e)) + +# Function that watches events corresponding to pods in the given namespace and passes the events to a callback function +def watch_pod_status(api_instance, namespace, timeout, callback=None): + if not callback: + return + try: + w = watch.Watch() + for event in w.stream(api_instance.list_namespaced_pod, namespace, timeout_seconds=timeout): + if callback(event): + return + except Exception as e: + pytest.fail("Error occurred when checking pod status: " + str(e)) + pytest.fail("The watch on the pods has timed out. Please see the pod logs for more info.") + + +# Function that watches events corresponding to pod logs and passes them to a callback function +def watch_pod_logs(api_instance, namespace, pod_name, container_name, timeout_seconds, callback=None): + if not callback: + return + try: + w = watch.Watch() + timeout = time.time() + timeout_seconds + for event in w.stream(api_instance.read_namespaced_pod_log, pod_name, namespace, container=container_name): + if callback(event): + return + if time.time() > timeout: + pytest.fail("The watch on the pod logs has timed out.") + except Exception as e: + pytest.fail("Error occurred when checking pod logs: " + str(e)) + + +# Function that returns the pod logs of a given container. +def get_pod_logs(api_instance, pod_namespace, pod_name, container_name): + try: + return api_instance.read_namespaced_pod_log(pod_name, pod_namespace, container=container_name) + except Exception as e: + pytest.fail("Error occurred when fetching pod logs: " + str(e)) + diff --git a/test/e2e/src/common/kubernetes_secret_utility.py b/test/e2e/src/common/kubernetes_secret_utility.py new file mode 100644 index 000000000..8cc07fd4d --- /dev/null +++ b/test/e2e/src/common/kubernetes_secret_utility.py @@ -0,0 +1,26 @@ +import sys + +from kubernetes import watch + + +# This function returns the kubernetes secret object present in a given namespace +def get_kubernetes_secret(api_instance, namespace, secret_name): + try: + return api_instance.read_namespaced_secret(secret_name, namespace) + except Exception as e: + sys.exit("Error occurred when retrieving secret '{}': ".format(secret_name) + str(e)) + + +# Function that watches events corresponding to kubernetes secrets and passes the events to a callback function +def watch_kubernetes_secret(api_instance, namespace, secret_name, timeout, callback=None): + if not callback: + return + field_selector = "metadata.name={}".format(secret_name) if secret_name else "" + try: + w = watch.Watch() + for event in w.stream(api_instance.list_namespaced_secret, namespace, field_selector=field_selector, timeout_seconds=timeout): + if callback(event): + return + except Exception as e: + sys.exit("Error occurred when watching kubernetes secret events: " + str(e)) + sys.exit("The watch on the kubernetes secret events has timed out. Please see the pod logs for more info.") diff --git a/test/e2e/src/common/kubernetes_service_utility.py b/test/e2e/src/common/kubernetes_service_utility.py new file mode 100644 index 000000000..694af885a --- /dev/null +++ b/test/e2e/src/common/kubernetes_service_utility.py @@ -0,0 +1,19 @@ +import pytest + +from kubernetes import watch + + +# Returns a list of services in a given namespace +def list_service(api_instance, namespace, field_selector="", label_selector=""): + try: + return api_instance.list_namespaced_service(namespace, field_selector=field_selector, label_selector=label_selector) + except Exception as e: + pytest.fail("Error occured when retrieving services: " + str(e)) + + +# Deletes a service +def delete_service(api_instance, namespace, service_name): + try: + return api_instance.delete_namespaced_service(service_name, namespace) + except Exception as e: + pytest.fail("Error occured when deleting service: " + str(e)) diff --git a/test/e2e/src/common/kubernetes_version_utility.py b/test/e2e/src/common/kubernetes_version_utility.py new file mode 100644 index 000000000..8980aa0f2 --- /dev/null +++ b/test/e2e/src/common/kubernetes_version_utility.py @@ -0,0 +1,10 @@ +import pytest + + +def get_kubernetes_server_version(api_instance): + try: + api_response = api_instance.get_code() + return api_response.git_version + except Exception as e: + pytest.fail("Error occured when retrieving kubernetes server version: " + str(e)) + diff --git a/test/e2e/src/common/results_utility.py b/test/e2e/src/common/results_utility.py new file mode 100644 index 000000000..14066bf16 --- /dev/null +++ b/test/e2e/src/common/results_utility.py @@ -0,0 +1,24 @@ +import pytest +import shutil +import tarfile + +from pathlib import Path + + + +# Function to create the test result directory +def create_results_dir(results_dir): + print(results_dir) + try: + Path(results_dir).mkdir(parents=True, exist_ok=True) + except Exception as e: + pytest.fail("Unable to create the results directory: " + str(e)) + + +# Function to append logs from the test run into a result file +def append_result_output(message, result_file_path): + try: + with open(result_file_path, "a") as result_file: + result_file.write(message) + except Exception as e: + pytest.fail("Error while appending message '{}' to results file: ".format(message) + str(e)) diff --git a/test/e2e/src/core/Dockerfile b/test/e2e/src/core/Dockerfile new file mode 100644 index 000000000..2637a1591 --- /dev/null +++ b/test/e2e/src/core/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3.6 + +RUN pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org pytest pytest-xdist filelock requests kubernetes adal msrestazure + +RUN curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash \ + && helm version + +COPY ./core/e2e_tests.sh / +COPY ./core/pytest.ini /e2etests/ +COPY ./core/conftest.py /e2etests/ +COPY ./core/helper.py /e2etests/ +COPY ./core/ /e2etests/ +COPY ./common/ /e2etests/ + +RUN ["chmod", "+x", "/e2e_tests.sh"] +ENTRYPOINT ["./e2e_tests.sh"] + diff --git a/test/e2e/src/core/conftest.py b/test/e2e/src/core/conftest.py new file mode 100644 index 000000000..e659d5189 --- /dev/null +++ b/test/e2e/src/core/conftest.py @@ -0,0 +1,90 @@ +import pytest +import os +import time +import pickle + +import constants + +from filelock import FileLock +from pathlib import Path +from results_utility import create_results_dir, append_result_output + +pytestmark = pytest.mark.agentests + +# Fixture to collect all the environment variables, install pre-requisites. It will be run before the tests. +@pytest.fixture(scope='session', autouse=True) +def env_dict(): + my_file = Path("env.pkl") # File to store the environment variables. + with FileLock(str(my_file) + ".lock"): # Locking the file since each test will be run in parallel as separate subprocesses and may try to access the file simultaneously. + env_dict = {} + if not my_file.is_file(): + # Creating the results directory + create_results_dir('/tmp/results') + + # Setting some environment variables + env_dict['SETUP_LOG_FILE'] = '/tmp/results/setup' + env_dict['TEST_AGENT_LOG_FILE'] = '/tmp/results/containerinsights' + env_dict['NUM_TESTS_COMPLETED'] = 0 + + print("Starting setup...") + append_result_output("Starting setup...\n", env_dict['SETUP_LOG_FILE']) + + # Collecting environment variables + env_dict['TENANT_ID'] = os.getenv('TENANT_ID') + env_dict['CLIENT_ID'] = os.getenv('CLIENT_ID') + env_dict['CLIENT_SECRET'] = os.getenv('CLIENT_SECRET') + + # get default query time interval for log analytics queries + queryTimeInterval = int(os.getenv('DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES')) if os.getenv('DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES') else constants.DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES + # add minute suffix since this format required for LA queries + env_dict['DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES'] = str(queryTimeInterval) + "m" + + # get default query time interval for metrics queries + env_dict['DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES'] = int(os.getenv('DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES')) if os.getenv('DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES') else constants.DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES + + + # expected agent pod restart count + env_dict['AGENT_POD_EXPECTED_RESTART_COUNT'] = int(os.getenv('AGENT_POD_EXPECTED_RESTART_COUNT')) if os.getenv('AGENT_POD_EXPECTED_RESTART_COUNT') else constants.AGENT_POD_EXPECTED_RESTART_COUNT + + # default to azure public cloud if AZURE_CLOUD not specified + env_dict['AZURE_ENDPOINTS'] = constants.AZURE_CLOUD_DICT.get(os.getenv('AZURE_CLOUD')) if os.getenv('AZURE_CLOUD') else constants.AZURE_PUBLIC_CLOUD_ENDPOINTS + + if not env_dict.get('TENANT_ID'): + pytest.fail('ERROR: variable TENANT_ID is required.') + + if not env_dict.get('CLIENT_ID'): + pytest.fail('ERROR: variable CLIENT_ID is required.') + + if not env_dict.get('CLIENT_SECRET'): + pytest.fail('ERROR: variable CLIENT_SECRET is required.') + + print("Setup Complete.") + append_result_output("Setup Complete.\n", env_dict['SETUP_LOG_FILE']) + + with Path.open(my_file, "wb") as f: + pickle.dump(env_dict, f, pickle.HIGHEST_PROTOCOL) + else: + with Path.open(my_file, "rb") as f: + env_dict = pickle.load(f) + + yield env_dict + + my_file = Path("env.pkl") + with FileLock(str(my_file) + ".lock"): + with Path.open(my_file, "rb") as f: + env_dict = pickle.load(f) + + env_dict['NUM_TESTS_COMPLETED'] = 1 + env_dict.get('NUM_TESTS_COMPLETED') + if env_dict['NUM_TESTS_COMPLETED'] == int(os.getenv('NUM_TESTS')): + # Checking if cleanup is required. + if os.getenv('SKIP_CLEANUP'): + return + print('Starting cleanup...') + append_result_output("Starting Cleanup...\n", env_dict['SETUP_LOG_FILE']) + + print("Cleanup Complete.") + append_result_output("Cleanup Complete.\n", env_dict['SETUP_LOG_FILE']) + return + + with Path.open(my_file, "wb") as f: + pickle.dump(env_dict, f, pickle.HIGHEST_PROTOCOL) diff --git a/test/e2e/src/core/e2e_tests.sh b/test/e2e/src/core/e2e_tests.sh new file mode 100644 index 000000000..ff34a213f --- /dev/null +++ b/test/e2e/src/core/e2e_tests.sh @@ -0,0 +1,26 @@ +#!/bin/sh + +results_dir="${RESULTS_DIR:-/tmp/results}" + +# saveResults prepares the results for handoff to the Sonobuoy worker. +# See: https://github.com/vmware-tanzu/sonobuoy/blob/master/docs/plugins.md +saveResults() { + cd ${results_dir} + + # Sonobuoy worker expects a tar file. + tar czf results.tar.gz * + + # Signal to the worker that we are done and where to find the results. + printf ${results_dir}/results.tar.gz > ${results_dir}/done +} + +# Ensure that we tell the Sonobuoy worker we are done regardless of results. +trap saveResults EXIT + +# The variable 'TEST_LIST' should be provided if we want to run specific tests. If not provided, all tests are run + +NUM_PROCESS=$(pytest /e2etests/ --collect-only -k "$TEST_NAME_LIST" -m "$TEST_MARKER_LIST" | grep " 0): + pytest.fail("numberMisscheduled shouldnt be greater than 0 for the daemonset {}.".format( + daemonset_name)) + + except Exception as e: + pytest.fail("Error occured while checking daemonset status: " + str(e)) + +# This function checks the status of kubernetes pods +def check_kubernetes_pods_status(pod_namespace, label_selector, expectedPodRestartCount, outfile=None): + try: + api_instance = client.CoreV1Api() + pod_list = get_pod_list(api_instance, pod_namespace, label_selector) + append_result_output("podlist output {}\n".format(pod_list), outfile) + if not pod_list: + pytest.fail("pod_list shouldnt be null or empty") + pods = pod_list.items + if not pods: + pytest.fail("pod items shouldnt be null or empty") + if len(pods) <= 0: + pytest.fail("pod count should be greater than 0") + for pod in pods: + status = pod.status + podstatus = status.phase + if not podstatus: + pytest.fail("status should not be null or empty") + if podstatus != "Running": + pytest.fail("pod status should be in running state") + containerStatuses = status.container_statuses + if not containerStatuses: + pytest.fail("containerStatuses shouldnt be nil or empty") + if len(containerStatuses) <= 0: + pytest.fail("length containerStatuses should be greater than 0") + for containerStatus in containerStatuses: + containerId = containerStatus.container_id + if not containerId: + pytest.fail("containerId shouldnt be nil or empty") + image = containerStatus.image + if not image: + pytest.fail("image shouldnt be nil or empty") + imageId = containerStatus.image_id + if not imageId: + pytest.fail("imageId shouldnt be nil or empty") + restartCount = containerStatus.restart_count + if restartCount > expectedPodRestartCount: + pytest.fail("restartCount shouldnt be greater than expected pod restart count: {}".format(expectedPodRestartCount)) + ready = containerStatus.ready + if not ready: + pytest.fail("container status should be in ready state") + containerState = containerStatus.state + if not containerState.running: + pytest.fail("container state should be in running state") + except Exception as e: + pytest.fail("Error occured while checking pods status: " + str(e)) + + +def check_namespace_status_using_watch(outfile=None, namespace_list=None, timeout=300): + namespace_dict = {} + for namespace in namespace_list: + namespace_dict[namespace] = 0 + append_result_output( + "Namespace dict: {}\n".format(namespace_dict), outfile) + print("Generated the namespace dictionary.") + + # THe callback function to check the namespace status + def namespace_event_callback(event): + try: + append_result_output("{}\n".format(event), outfile) + namespace_name = event['raw_object'].get('metadata').get('name') + namespace_status = event['raw_object'].get('status') + if not namespace_status: + return False + if namespace_status.get('phase') == 'Active': + namespace_dict[namespace_name] = 1 + if all(ele == 1 for ele in list(namespace_dict.values())): + return True + return False + except Exception as e: + pytest.fail( + "Error occured while processing the namespace event: " + str(e)) + + # Checking the namespace status + api_instance = client.CoreV1Api() + watch_namespace(api_instance, timeout, namespace_event_callback) + +# This function checks the status of daemonset in a given namespace. The daemonset to be monitored are identified using the pod label list parameter. +def check_kubernetes_daemonset_status_using_watch(daemonset_namespace, outfile=None, daemonset_label_list=None, timeout=300): + daemonset_label_dict = {} + if daemonset_label_list: # This parameter is a list of label values to identify the daemonsets that we want to monitor in the given namespace + for daemonset_label in daemonset_label_list: + daemonset_label_dict[daemonset_label] = 0 + append_result_output("daemonset label dict: {}\n".format( + daemonset_label_dict), outfile) + print("Generated the daemonset dictionary.") + + # The callback function to check if the pod is in running state + def daemonset_event_callback(event): + try: + # append_result_output("{}\n".format(event), outfile) + daemonset_status = event['raw_object'].get('status') + daemonset_metadata = event['raw_object'].get('metadata') + daemonset_metadata_labels = daemonset_metadata.get('labels') + if not daemonset_metadata_labels: + return False + + # It contains the list of all label values for the pod whose event was called. + daemonset_metadata_label_values = daemonset_metadata_labels.values() + # This label value will be common in pod event and label list provided and will be monitored + current_label_value = None + for label_value in daemonset_metadata_label_values: + if label_value in daemonset_label_dict: + current_label_value = label_value + if not current_label_value: + return False + + currentNumberScheduled = daemonset_status.get( + 'currentNumberScheduled') + desiredNumberScheduled = daemonset_status.get( + 'desiredNumberScheduled') + numberAvailable = daemonset_status.get('numberAvailable') + numberReady = daemonset_status.get('numberReady') + numberMisscheduled = daemonset_status.get('numberMisscheduled') + + if (currentNumberScheduled != desiredNumberScheduled): + pytest.fail("currentNumberScheduled doesnt match with currentNumberScheduled for the daemonset {}.".format( + daemonset_metadata.get('name'))) + + if (numberAvailable != numberReady): + pytest.fail("numberAvailable doesnt match with expected numberReady for the daemonset {}.".format( + daemonset_metadata.get('name'))) + + if (numberMisscheduled > 0): + pytest.fail("numberMisscheduled is greater than 0 for the daemonset {}.".format( + daemonset_metadata.get('name'))) + + return True + except Exception as e: + print("Error occured while processing the pod event: " + str(e)) + + # Checking status of all pods + if daemonset_label_dict: + api_instance = client.AppsV1Api() + watch_daemon_set_status( + api_instance, daemonset_namespace, timeout, daemonset_event_callback) + +# This function checks the status of deployment in a given namespace. The deployment to be monitored are identified using the pod label list parameter. +def check_kubernetes_deployments_status_using_watch(deployment_namespace, outfile=None, deployment_label_list=None, timeout=300): + deployment_label_dict = {} + if deployment_label_list: # This parameter is a list of label values to identify the deployments that we want to monitor in the given namespace + for deployment_label in deployment_label_list: + deployment_label_dict[deployment_label] = 0 + append_result_output("Deployment label dict: {}\n".format( + deployment_label_dict), outfile) + print("Generated the deployment dictionary.") + + # The callback function to check if the pod is in running state + def deployment_event_callback(event): + try: + # append_result_output("{}\n".format(event), outfile) + deployment_status = event['raw_object'].get('status') + deployment_metadata = event['raw_object'].get('metadata') + deployment_metadata_labels = deployment_metadata.get('labels') + if not deployment_metadata_labels: + return False + + # It contains the list of all label values for the deployment whose event was called. + deployment_metadata_label_values = deployment_metadata_labels.values() + # This label value will be common in deployment event and label list provided and will be monitored + current_label_value = None + for label_value in deployment_metadata_label_values: + if label_value in deployment_label_dict: + current_label_value = label_value + if not current_label_value: + return False + + availableReplicas = deployment_status.get('availableReplicas') + readyReplicas = deployment_status.get('readyReplicas') + replicas = deployment_status.get('replicas') + + if (replicas != availableReplicas): + pytest.fail("availableReplicas doesnt match with expected replicas for the deployment {}.".format( + deployment_metadata.get('name'))) + + if (replicas != readyReplicas): + pytest.fail("readyReplicas doesnt match with expected replicas for the deployment {}.".format( + deployment_metadata.get('name'))) + + return True + except Exception as e: + print("Error occured while processing the pod event: " + str(e)) + + # Checking status of all pods + if deployment_label_dict: + api_instance = client.AppsV1Api() + watch_deployment_status( + api_instance, deployment_namespace, timeout, deployment_event_callback) + +# This function checks the status of pods in a given namespace. The pods to be monitored are identified using the pod label list parameter. +def check_kubernetes_pods_status_using_watch(pod_namespace, outfile=None, pod_label_list=None, timeout=300): + pod_label_dict = {} + if pod_label_list: # This parameter is a list of label values to identify the pods that we want to monitor in the given namespace + for pod_label in pod_label_list: + pod_label_dict[pod_label] = 0 + append_result_output( + "Pod label dict: {}\n".format(pod_label_dict), outfile) + print("Generated the pods dictionary.") + + # The callback function to check if the pod is in running state + def pod_event_callback(event): + try: + # append_result_output("{}\n".format(event), outfile) + pod_status = event['raw_object'].get('status') + pod_metadata = event['raw_object'].get('metadata') + pod_metadata_labels = pod_metadata.get('labels') + if not pod_metadata_labels: + return False + + # It contains the list of all label values for the pod whose event was called. + pod_metadata_label_values = pod_metadata_labels.values() + # This label value will be common in pod event and label list provided and will be monitored + current_label_value = None + for label_value in pod_metadata_label_values: + if label_value in pod_label_dict: + current_label_value = label_value + if not current_label_value: + return False + + if pod_status.get('containerStatuses'): + for container in pod_status.get('containerStatuses'): + if container.get('restartCount') > 0: + pytest.fail("The pod {} was restarted. Please see the pod logs for more info.".format( + container.get('name'))) + if not container.get('state').get('running'): + pod_label_dict[current_label_value] = 0 + return False + else: + pod_label_dict[current_label_value] = 1 + if all(ele == 1 for ele in list(pod_label_dict.values())): + return True + return False + except Exception as e: + pytest.fail( + "Error occured while processing the pod event: " + str(e)) + + # Checking status of all pods + if pod_label_dict: + api_instance = client.CoreV1Api() + watch_pod_status(api_instance, pod_namespace, + timeout, pod_event_callback) + + +# Function to check if the crd instance status has been updated with the status fields mentioned in the 'status_list' parameter +def check_kubernetes_crd_status_using_watch(crd_group, crd_version, crd_namespace, crd_plural, crd_name, status_dict={}, outfile=None, timeout=300): + # The callback function to check if the crd event received has been updated with the status fields + def crd_event_callback(event): + try: + append_result_output("{}\n".format(event), outfile) + crd_status = event['raw_object'].get('status') + if not crd_status: + return False + for status_field in status_dict: + if not crd_status.get(status_field): + return False + if crd_status.get(status_field) != status_dict.get(status_field): + pytest.fail( + "The CRD instance status has been updated with incorrect value for '{}' field.".format(status_field)) + return True + except Exception as e: + pytest.fail("Error occured while processing crd event: " + str(e)) + + # Checking if CRD instance has been updated with status fields + api_instance = client.CustomObjectsApi() + watch_crd_instance(api_instance, crd_group, crd_version, crd_namespace, + crd_plural, crd_name, timeout, crd_event_callback) + + +# Function to monitor the pod logs. It will ensure that are logs passed in the 'log_list' parameter are present in the container logs. +def check_kubernetes_pod_logs_using_watch(pod_namespace, pod_name, container_name, logs_list=None, error_logs_list=None, outfile=None, timeout=300): + logs_dict = {} + for log in logs_list: + logs_dict[log] = 0 + print("Generated the logs dictionary.") + + # The callback function to examine the pod log + def pod_log_event_callback(event): + try: + append_result_output("{}\n".format(event), outfile) + for error_log in error_logs_list: + if error_log in event: + pytest.fail("Error log found: " + event) + for log in logs_dict: + if log in event: + logs_dict[log] = 1 + if all(ele == 1 for ele in list(logs_dict.values())): + return True + return False + except Exception as e: + pytest.fail( + "Error occured while processing pod log event: " + str(e)) + + # Checking the pod logs + api_instance = client.CoreV1Api() + watch_pod_logs(api_instance, pod_namespace, pod_name, + container_name, timeout, pod_log_event_callback) + +# Function to monitor the kubernetes secret. It will determine if the secret has been successfully created. +def check_kubernetes_secret_using_watch(secret_namespace, secret_name, timeout=300): + # The callback function to check if the secret event received has secret data + def secret_event_callback(event): + try: + secret_data = event['raw_object'].get('data') + if not secret_data: + return False + return True + except Exception as e: + pytest.fail( + "Error occured while processing secret event: " + str(e)) + + # Checking the kubernetes secret + api_instance = client.CoreV1Api() + watch_kubernetes_secret(api_instance, secret_namespace, + secret_name, timeout, secret_event_callback) diff --git a/test/e2e/src/core/pytest.ini b/test/e2e/src/core/pytest.ini new file mode 100644 index 000000000..75f0242d1 --- /dev/null +++ b/test/e2e/src/core/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +markers = + agentests: marks tests are a part of arc agent conformance tests (deselect with '-m "not agentests"') \ No newline at end of file diff --git a/test/e2e/src/core/test_ds_workflows.py b/test/e2e/src/core/test_ds_workflows.py new file mode 100755 index 000000000..55d9e4be4 --- /dev/null +++ b/test/e2e/src/core/test_ds_workflows.py @@ -0,0 +1,60 @@ +import pytest +import constants + +from kubernetes import client, config +from kubernetes_pod_utility import get_pod_list, get_log_file_content +from results_utility import append_result_output +from helper import check_kubernetes_deployment_status +from helper import check_kubernetes_daemonset_status +from helper import check_kubernetes_pods_status +from kubernetes.stream import stream + +pytestmark = pytest.mark.agentests + +# validation of ds agent workflows +def test_ds_workflows(env_dict): + print("Starting daemonset workflows test.") + append_result_output("test_ds_workflows start \n", + env_dict['TEST_AGENT_LOG_FILE']) + # Loading in-cluster kube-config + try: + config.load_incluster_config() + except Exception as e: + pytest.fail("Error loading the in-cluster config: " + str(e)) + + print("getting pod list") + api_instance = client.CoreV1Api() + pod_list = get_pod_list(api_instance, constants.AGENT_RESOURCES_NAMESPACE, + constants.AGENT_DAEMON_SET_PODS_LABEL_SELECTOR) + if not pod_list: + pytest.fail("pod_list shouldnt be null or empty") + + if len(pod_list.items) <= 0: + pytest.fail("number of items in pod list should be greater than 0") + + for podItem in pod_list.items: + podName = podItem.metadata.name + logcontent = get_log_file_content( + api_instance, constants.AGENT_RESOURCES_NAMESPACE, podName, constants.AGENT_OMSAGENT_LOG_PATH) + if not logcontent: + pytest.fail("logcontent should not be null or empty for pod: " + podName) + loglines = logcontent.split("\n") + if len(loglines) <= 0: + pytest.fail("number of log lines should be greater than 0 for pod :" + podName) + + IsContainerPerfEmitStream = False + IsContainerInventoryStream = False + for line in loglines: + if line.find(constants.CONTAINER_PERF_EMIT_STREAM) >= 0: + IsContainerPerfEmitStream = True + if line.find(constants.CONTAINER_INVENTORY_EMIT_STREAM) >= 0: + IsContainerInventoryStream = True + + if IsContainerPerfEmitStream == False: + pytest.fail("ContainerPerf stream not emitted successfully from pod:" + podName) + if IsContainerInventoryStream == False: + pytest.fail("ContainerPerf stream not emitted successfully from pod:" + podName) + + append_result_output("test_ds_workflows end \n", + env_dict['TEST_AGENT_LOG_FILE']) + print("Successfully completed daemonset workflows test.") diff --git a/test/e2e/src/core/test_e2e_workflows.py b/test/e2e/src/core/test_e2e_workflows.py new file mode 100755 index 000000000..11a8e18e3 --- /dev/null +++ b/test/e2e/src/core/test_e2e_workflows.py @@ -0,0 +1,330 @@ +import pytest +import constants +import requests + +from arm_rest_utility import fetch_aad_token +from kubernetes import client, config +from kubernetes_pod_utility import get_pod_list +from results_utility import append_result_output + + +pytestmark = pytest.mark.agentests + +# validation of workflows e2e +def test_e2e_workflows(env_dict): + print("Starting e2e workflows test.") + append_result_output("test_e2e_workflows start \n", + env_dict['TEST_AGENT_LOG_FILE']) + # Loading in-cluster kube-config + try: + config.load_incluster_config() + except Exception as e: + pytest.fail("Error loading the in-cluster config: " + str(e)) + + # query time interval for LA queries + queryTimeInterval = env_dict['DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES'] + if not queryTimeInterval: + pytest.fail("DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES should not be null or empty") + + # get the cluster resource id from replicaset pod envvars + api_instance = client.CoreV1Api() + pod_list = get_pod_list(api_instance, constants.AGENT_RESOURCES_NAMESPACE, + constants.AGENT_DEPLOYMENT_PODS_LABEL_SELECTOR) + + if not pod_list: + pytest.fail("pod_list shouldnt be null or empty") + + if len(pod_list.items) <= 0: + pytest.fail("number of items in pod list should be greater than 0") + + envVars = pod_list.items[0].spec.containers[0].env + if not envVars: + pytest.fail("environment variables should be defined in the replicaset pod") + + clusterResourceId = '' + for env in envVars: + if env.name == "AKS_RESOURCE_ID": + clusterResourceId = env.value + print("cluster resource id: {}".format(clusterResourceId)) + + if not clusterResourceId: + pytest.fail("failed to get clusterResourceId from replicaset pod environment variables") + + # fetch AAD token for log analytics resource for the queries + tenant_id = env_dict.get('TENANT_ID') + authority_uri = env_dict.get('AZURE_ENDPOINTS').get('activeDirectory') + tenant_id + client_id = env_dict.get('CLIENT_ID') + client_secret = env_dict.get('CLIENT_SECRET') + resource = env_dict.get('AZURE_ENDPOINTS').get('logAnalytics') + aad_token = fetch_aad_token(client_id, client_secret, authority_uri, resource) + if not aad_token: + pytest.fail("failed to fetch AAD token") + + access_token = aad_token.get('accessToken') + if not access_token: + pytest.fail("access_token shouldnt be null or empty") + + # validate e2e workflows by checking data in log analytics workspace through resource centric queries + queryUrl = resource + "/v1" + clusterResourceId + "/query" + Headers = { + "Authorization": str("Bearer " + access_token), + "Content-Type": "application/json" + } + # KubePodInventory + query = constants.KUBE_POD_INVENTORY_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('KUBE_POD_INVENTORY')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} and workflow: {1}".format(clusterResourceId, 'KUBE_POD_INVENTORY')) + + # KubeNodeInventory + query = constants.KUBE_NODE_INVENTORY_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('KUBE_NODE_INVENTORY')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'KUBE_NODE_INVENTORY')) + + # KubeServices + query = constants.KUBE_SERVICES_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('KUBE_SERVICES')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'KUBE_SERVICES')) + + # KubeEvents + query = constants.KUBE_EVENTS_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('KUBE_EVENTS')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'KUBE_EVENTS')) + + # Container Node Inventory + query = constants.CONTAINER_NODE_INVENTORY_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_NODE_INVENTORY')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_NODE_INVENTORY')) + + # Node Perf + # cpu capacity + query = constants.NODE_PERF_CPU_CAPCITY_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_CPU_CAPCITY')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_CPU_CAPCITY')) + + # memory capacity + query = constants.NODE_PERF_MEMORY_CAPCITY_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_MEMORY_CAPCITY')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_MEMORY_CAPCITY')) + + # cpu allocatable + query = constants.NODE_PERF_CPU_ALLOCATABLE_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_CPU_ALLOCATABLE')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_CPU_ALLOCATABLE')) + + # memory allocatable + query = constants.NODE_PERF_MEMORY_ALLOCATABLE_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_MEMORY_ALLOCATABLE')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_MEMORY_ALLOCATABLE')) + + # cpu usage + query = constants.NODE_PERF_CPU_USAGE_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_CPU_USAGE')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_CPU_USAGE')) + + # memory rss usage + query = constants.NODE_PERF_MEMORY_RSS_USAGE_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_MEMORY_RSS_USAGE')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_MEMORY_RSS_USAGE')) + + # memory ws usage + query = constants.NODE_PERF_MEMORY_WS_USAGE_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_MEMORY_WS_USAGE')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_MEMORY_WS_USAGE')) + + # restartime epoch + query = constants.NODE_PERF_RESTART_TIME_EPOCH_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('NODE_PERF_RESTART_TIME_EPOCH')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'NODE_PERF_RESTART_TIME_EPOCH')) + + # Container Perf + # container cpu limits + query = constants.CONTAINER_PERF_CPU_LIMITS_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_CPU_LIMITS')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_CPU_LIMITS')) + + # container memory limits + query = constants.CONTAINER_PERF_MEMORY_LIMITS_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_MEMORY_LIMITS')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_MEMORY_LIMITS')) + + # cpu requests + query = constants.CONTAINER_PERF_CPU_REQUESTS_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_CPU_REQUESTS')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_CPU_REQUESTS')) + + # memory requests + query = constants.CONTAINER_PERF_MEMORY_REQUESTS_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_MEMORY_REQUESTS_QUERY')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_MEMORY_REQUESTS')) + + # cpu usage + query = constants.CONTAINER_PERF_CPU_USAGE_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_CPU_USAGE')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_CPU_USAGE')) + + # memory rss usage + query = constants.CONTAINER_PERF_MEMORY_RSS_USAGE_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_MEMORY_RSS_USAGE')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_MEMORY_RSS_USAGE')) + + # memory ws usage + query = constants.CONTAINER_PERF_MEMORY_WS_USAGE_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_MEMORY_WS_USAGE')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_MEMORY_WS_USAGE')) + + # restart time epoch + query = constants.CONTAINER_PERF_RESTART_TIME_EPOCH_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_PERF_RESTART_TIME_EPOCH')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_PERF_RESTART_TIME_EPOCH')) + + # Container log + query = constants.CONTAINER_LOG_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('CONTAINER_LOG')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'CONTAINER_LOG')) + + # InsightsMetrics + query = constants.INSIGHTS_METRICS_QUERY.format(queryTimeInterval) + params = { 'query': query} + result = requests.get(queryUrl, params=params, headers=Headers, verify=False) + if not result: + pytest.fail("log analytics query response shouldnt be null or empty for workflow: {0}".format('INSIGHTS_METRICS')) + + rowCount = result.json()['tables'][0]['rows'][0][0] + if not rowCount: + pytest.fail("rowCount should be greater than for cluster: {0} for workflow: {1} ".format(clusterResourceId, 'INSIGHTS_METRICS')) + + append_result_output("test_e2e_workflows end \n", + env_dict['TEST_AGENT_LOG_FILE']) + print("Successfully completed e2e workflows test.") diff --git a/test/e2e/src/core/test_node_metrics_e2e_workflow.py b/test/e2e/src/core/test_node_metrics_e2e_workflow.py new file mode 100755 index 000000000..4346f89a8 --- /dev/null +++ b/test/e2e/src/core/test_node_metrics_e2e_workflow.py @@ -0,0 +1,420 @@ +import pytest +import constants +import requests + +from arm_rest_utility import fetch_aad_token +from kubernetes import client, config +from kubernetes_pod_utility import get_pod_list +from results_utility import append_result_output +from datetime import datetime, timedelta + +pytestmark = pytest.mark.agentests + +# validation of node metrics e2e workflow +def test_node_metrics_e2e_workflow(env_dict): + print("Starting node metrics e2e workflow test.") + append_result_output("test_node_metrics_e2e_workflow start \n", + env_dict['TEST_AGENT_LOG_FILE']) + # Loading in-cluster kube-config + try: + config.load_incluster_config() + except Exception as e: + pytest.fail("Error loading the in-cluster config: " + str(e)) + + # query time interval for metric queries + metricQueryIntervalInMins = env_dict['DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES'] + if not metricQueryIntervalInMins: + pytest.fail( + "DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES should not be null or empty or 0") + + # get the cluster resource id from replicaset pod envvars + api_instance = client.CoreV1Api() + pod_list = get_pod_list(api_instance, constants.AGENT_RESOURCES_NAMESPACE, + constants.AGENT_DEPLOYMENT_PODS_LABEL_SELECTOR) + + if not pod_list: + pytest.fail("pod_list shouldnt be null or empty") + + if len(pod_list.items) <= 0: + pytest.fail("number of items in pod list should be greater than 0") + + envVars = pod_list.items[0].spec.containers[0].env + if not envVars: + pytest.fail( + "environment variables should be defined in the replicaset pod") + + clusterResourceId = '' + for env in envVars: + if env.name == "AKS_RESOURCE_ID": + clusterResourceId = env.value + print("cluster resource id: {}".format(clusterResourceId)) + + if not clusterResourceId: + pytest.fail( + "failed to get clusterResourceId from replicaset pod environment variables") + + # fetch AAD token for metric queries + tenant_id = env_dict.get('TENANT_ID') + authority_uri = env_dict.get('AZURE_ENDPOINTS').get( + 'activeDirectory') + tenant_id + client_id = env_dict.get('CLIENT_ID') + client_secret = env_dict.get('CLIENT_SECRET') + resourceManager = env_dict.get('AZURE_ENDPOINTS').get('resourceManager') + aad_token = fetch_aad_token( + client_id, client_secret, authority_uri, resourceManager) + if not aad_token: + pytest.fail("failed to fetch AAD token") + + access_token = aad_token.get('accessToken') + if not access_token: + pytest.fail("access_token shouldnt be null or empty") + + # validate metrics e2e workflow + now = datetime.utcnow() + endtime = now.isoformat()[:-3]+'Z' + starttime = (now - timedelta(hours=0, + minutes=constants.DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES)).isoformat()[:-3]+'Z' + Headers = { + "Authorization": str("Bearer " + access_token), + "Content-Type": "application/json", + "content-length": "0" + } + params = {} + # node metric - memoryRssBytes + custommetricsUrl = '{0}{1}/providers/microsoft.Insights/metrics?timespan={2}/{3}&interval=FULL&metricnames={4}&aggregation={5}&metricNamespace={6}&validatedimensions=false&api-version={7}'.format( + resourceManager.rstrip("/"), + clusterResourceId, + starttime, + endtime, + constants.NODE_MEMORY_RSS_METRIC_NAME, + constants.NODE_METRIC_METRIC_AGGREGATION, + constants.NODE_METRICS_NAMESPACE, + constants.METRICS_API_VERSION) + + response = requests.get(custommetricsUrl, params=params, + headers=Headers, verify=False) + + if not response: + pytest.fail( + "response of the metrics query API shouldnt be null or empty") + + if response.status_code != 200: + pytest.fail("metrics query API failed with an error code: {}".format( + response.status_code)) + + responseJSON = response.json() + if not responseJSON: + pytest.fail("response JSON shouldnt be null or empty") + + namespace = responseJSON['namespace'] + if namespace != constants.NODE_METRICS_NAMESPACE: + pytest.fail("got the namespace: {0} but expected namespace:{1} in the response".format( + namespace, constants.NODE_METRICS_NAMESPACE)) + + responseValues = responseJSON['value'] + if not responseValues: + pytest.fail("response JSON shouldnt be null or empty") + + if len(responseValues) <= 0: + pytest.fail("length of value array in the response should be greater than 0") + + for responseVal in responseValues: + metricName = responseVal['name']['value'] + if metricName != constants.NODE_MEMORY_RSS_METRIC_NAME: + pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_MEMORY_RSS_METRIC_NAME)) + timeseries = responseVal['timeseries'] + if not timeseries: + pytest.fail("metric series shouldnt be null or empty for metric:{0} in namespace: {1}".format( + constants.NODE_MEMORY_RSS_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) + if len(timeseries) <= 0: + pytest.fail("length of timeseries should be greater than for 0 for metric: {0} in namespace :{1}".format(constants.NODE_MEMORY_RSS_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) + + # node metric - memoryRssPercentage + custommetricsUrl = '{0}{1}/providers/microsoft.Insights/metrics?timespan={2}/{3}&interval=FULL&metricnames={4}&aggregation={5}&metricNamespace={6}&validatedimensions=false&api-version={7}'.format( + resourceManager.rstrip("/"), + clusterResourceId, + starttime, + endtime, + constants.NODE_MEMORY_RSS_PERCENTAGE_METRIC_NAME, + constants.NODE_METRIC_METRIC_AGGREGATION, + constants.NODE_METRICS_NAMESPACE, + constants.METRICS_API_VERSION) + + response = requests.get(custommetricsUrl, params=params, + headers=Headers, verify=False) + + if not response: + pytest.fail( + "response of the metrics query API shouldnt be null or empty") + + if response.status_code != 200: + pytest.fail("metrics query API failed with an error code: {}".format( + response.status_code)) + + responseJSON = response.json() + if not responseJSON: + pytest.fail("response JSON shouldnt be null or empty") + + namespace = responseJSON['namespace'] + if namespace != constants.NODE_METRICS_NAMESPACE: + pytest.fail("got the namespace: {0} but expected namespace:{1} in the response".format( + namespace, constants.NODE_METRICS_NAMESPACE)) + + responseValues = responseJSON['value'] + if not responseValues: + pytest.fail("response JSON shouldnt be null or empty") + + if len(responseValues) <= 0: + pytest.fail("length of value array in the response should be greater than 0") + + for responseVal in responseValues: + metricName = responseVal['name']['value'] + if metricName != constants.NODE_MEMORY_RSS_PERCENTAGE_METRIC_NAME: + pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_MEMORY_RSS_PERCENTAGE_METRIC_NAME)) + timeseries = responseVal['timeseries'] + if not timeseries: + pytest.fail("metric series shouldnt be null or empty for metric:{0} in namespace: {1}".format( + constants.NODE_MEMORY_RSS_PERCENTAGE_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) + if len(timeseries) <= 0: + pytest.fail("length of timeseries should be greater than for 0 for metric: {0} in namespace :{1}".format(constants.NODE_MEMORY_RSS_PERCENTAGE_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) + + # node metric - memoryWorkingSetBytes + custommetricsUrl = '{0}{1}/providers/microsoft.Insights/metrics?timespan={2}/{3}&interval=FULL&metricnames={4}&aggregation={5}&metricNamespace={6}&validatedimensions=false&api-version={7}'.format( + resourceManager.rstrip("/"), + clusterResourceId, + starttime, + endtime, + constants.NODE_MEMORY_WS_METRIC_NAME, + constants.NODE_METRIC_METRIC_AGGREGATION, + constants.NODE_METRICS_NAMESPACE, + constants.METRICS_API_VERSION) + + response = requests.get(custommetricsUrl, params=params, + headers=Headers, verify=False) + + if not response: + pytest.fail("response of the metrics query API shouldnt be null or empty") + + if response.status_code != 200: + pytest.fail("metrics query API failed with an error code: {}".format( + response.status_code)) + + responseJSON = response.json() + if not responseJSON: + pytest.fail("response JSON shouldnt be null or empty") + + namespace = responseJSON['namespace'] + if namespace != constants.NODE_METRICS_NAMESPACE: + pytest.fail("got the namespace: {0} but expected namespace:{1} in the response".format( + namespace, constants.NODE_METRICS_NAMESPACE)) + + responseValues = responseJSON['value'] + if not responseValues: + pytest.fail("response JSON shouldnt be null or empty") + + if len(responseValues) <= 0: + pytest.fail("length of value array in the response should be greater than 0") + + for responseVal in responseValues: + metricName = responseVal['name']['value'] + if metricName != constants.NODE_MEMORY_WS_METRIC_NAME: + pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_MEMORY_WS_METRIC_NAME)) + timeseries = responseVal['timeseries'] + if not timeseries: + pytest.fail("metric series shouldnt be null or empty for metric:{0} in namespace: {1}".format( + constants.NODE_MEMORY_WS_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) + if len(timeseries) <= 0: + pytest.fail("length of timeseries should be greater than for 0 for metric: {0} in namespace :{1}".format(constants.NODE_MEMORYE_WS_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) + + # node metric - memoryWorkingSetPercentage + custommetricsUrl = '{0}{1}/providers/microsoft.Insights/metrics?timespan={2}/{3}&interval=FULL&metricnames={4}&aggregation={5}&metricNamespace={6}&validatedimensions=false&api-version={7}'.format( + resourceManager.rstrip("/"), + clusterResourceId, + starttime, + endtime, + constants.NODE_MEMORY_WS_PERCENTAGE_METRIC_NAME, + constants.NODE_METRIC_METRIC_AGGREGATION, + constants.NODE_METRICS_NAMESPACE, + constants.METRICS_API_VERSION) + + response = requests.get(custommetricsUrl, params=params, + headers=Headers, verify=False) + + if not response: + pytest.fail("response of the metrics query API shouldnt be null or empty") + + if response.status_code != 200: + pytest.fail("metrics query API failed with an error code: {}".format( + response.status_code)) + + responseJSON = response.json() + if not responseJSON: + pytest.fail("response JSON shouldnt be null or empty") + + namespace = responseJSON['namespace'] + if namespace != constants.NODE_METRICS_NAMESPACE: + pytest.fail("got the namespace: {0} but expected namespace:{1} in the response".format( + namespace, constants.NODE_METRICS_NAMESPACE)) + + responseValues = responseJSON['value'] + if not responseValues: + pytest.fail("response JSON shouldnt be null or empty") + + if len(responseValues) <= 0: + pytest.fail("length of value array in the response should be greater than 0") + + for responseVal in responseValues: + metricName = responseVal['name']['value'] + if metricName != constants.NODE_MEMORY_WS_PERCENTAGE_METRIC_NAME: + pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_MEMORY_WS_PERCENTAGE_METRIC_NAME)) + timeseries = responseVal['timeseries'] + if not timeseries: + pytest.fail("metric series shouldnt be null or empty for metric:{0} in namespace: {1}".format( + constants.NODE_MEMORY_WS_PERCENTAGE_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) + if len(timeseries) <= 0: + pytest.fail("length of timeseries should be greater than for 0 for metric: {0} in namespace :{1}".format(constants.NODE_MEMORY_WS_PERCENTAGE_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) + + # node metric - cpuUsageMilliCores + custommetricsUrl = '{0}{1}/providers/microsoft.Insights/metrics?timespan={2}/{3}&interval=FULL&metricnames={4}&aggregation={5}&metricNamespace={6}&validatedimensions=false&api-version={7}'.format( + resourceManager.rstrip("/"), + clusterResourceId, + starttime, + endtime, + constants.NODE_CPU_USAGE_MILLI_CORES_METRIC_NAME, + constants.NODE_METRIC_METRIC_AGGREGATION, + constants.NODE_METRICS_NAMESPACE, + constants.METRICS_API_VERSION) + + response = requests.get(custommetricsUrl, params=params, + headers=Headers, verify=False) + + if not response: + pytest.fail("response of the metrics query API shouldnt be null or empty") + + if response.status_code != 200: + pytest.fail("metrics query API failed with an error code: {}".format(response.status_code)) + + responseJSON = response.json() + if not responseJSON: + pytest.fail("response JSON shouldnt be null or empty") + + namespace = responseJSON['namespace'] + if namespace != constants.NODE_METRICS_NAMESPACE: + pytest.fail("got the namespace: {0} but expected namespace:{1} in the response".format( + namespace, constants.NODE_METRICS_NAMESPACE)) + + responseValues = responseJSON['value'] + if not responseValues: + pytest.fail("response JSON shouldnt be null or empty") + + if len(responseValues) <= 0: + pytest.fail("length of value array in the response should be greater than 0") + + for responseVal in responseValues: + metricName = responseVal['name']['value'] + if metricName != constants.NODE_CPU_USAGE_MILLI_CORES_METRIC_NAME: + pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_CPU_USAGE_MILLI_CORES_METRIC_NAME)) + timeseries = responseVal['timeseries'] + if not timeseries: + pytest.fail("metric series shouldnt be null or empty for metric:{0} in namespace: {1}".format( + constants.NODE_CPU_USAGE_MILLI_CORES_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) + if len(timeseries) <= 0: + pytest.fail("length of timeseries should be greater than for 0 for metric: {0} in namespace :{1}".format(constants.NODE_CPU_USAGE_MILLI_CORES_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) + + # node metric - cpuUsagePercentage + custommetricsUrl = '{0}{1}/providers/microsoft.Insights/metrics?timespan={2}/{3}&interval=FULL&metricnames={4}&aggregation={5}&metricNamespace={6}&validatedimensions=false&api-version={7}'.format( + resourceManager.rstrip("/"), + clusterResourceId, + starttime, + endtime, + constants.NODE_CPU_USAGE_PERCENTAGE_METRIC_NAME, + constants.NODE_METRIC_METRIC_AGGREGATION, + constants.NODE_METRICS_NAMESPACE, + constants.METRICS_API_VERSION) + + response = requests.get(custommetricsUrl, params=params, + headers=Headers, verify=False) + + if not response: + pytest.fail("response of the metrics query API shouldnt be null or empty") + + if response.status_code != 200: + pytest.fail("metrics query API failed with an error code: {}".format(response.status_code)) + + responseJSON = response.json() + if not responseJSON: + pytest.fail("response JSON shouldnt be null or empty") + + namespace = responseJSON['namespace'] + if namespace != constants.NODE_METRICS_NAMESPACE: + pytest.fail("got the namespace: {0} but expected namespace:{1} in the response".format( + namespace, constants.NODE_METRICS_NAMESPACE)) + + responseValues = responseJSON['value'] + if not responseValues: + pytest.fail("response JSON shouldnt be null or empty") + + if len(responseValues) <= 0: + pytest.fail("length of value array in the response should be greater than 0") + + for responseVal in responseValues: + metricName = responseVal['name']['value'] + if metricName != constants.NODE_CPU_USAGE_PERCENTAGE_METRIC_NAME: + pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_CPU_USAGE_PERCENTAGE_METRIC_NAME)) + timeseries = responseVal['timeseries'] + if not timeseries: + pytest.fail("metric series shouldnt be null or empty for metric:{0} in namespace: {1}".format( + constants.NODE_CPU_USAGE_PERCENTAGE_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) + if len(timeseries) <= 0: + pytest.fail("length of timeseries should be greater than for 0 for metric: {0} in namespace :{1}".format(constants.NODE_CPU_USAGE_PERCENTAGE_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) + + # node metric - nodesCount + custommetricsUrl = '{0}{1}/providers/microsoft.Insights/metrics?timespan={2}/{3}&interval=FULL&metricnames={4}&aggregation={5}&metricNamespace={6}&validatedimensions=false&api-version={7}'.format( + resourceManager.rstrip("/"), + clusterResourceId, + starttime, + endtime, + constants.NODE_COUNT_METRIC_NAME, + constants.NODE_METRIC_METRIC_AGGREGATION, + constants.NODE_METRICS_NAMESPACE, + constants.METRICS_API_VERSION) + + response = requests.get(custommetricsUrl, params=params, + headers=Headers, verify=False) + + if not response: + pytest.fail("response of the metrics query API shouldnt be null or empty") + + if response.status_code != 200: + pytest.fail("metrics query API failed with an error code: {}".format(response.status_code)) + + responseJSON = response.json() + if not responseJSON: + pytest.fail("response JSON shouldnt be null or empty") + + namespace = responseJSON['namespace'] + if namespace != constants.NODE_METRICS_NAMESPACE: + pytest.fail("got the namespace: {0} but expected namespace:{1} in the response".format( + namespace, constants.NODE_METRICS_NAMESPACE)) + + responseValues = responseJSON['value'] + if not responseValues: + pytest.fail("response JSON shouldnt be null or empty") + + if len(responseValues) <= 0: + pytest.fail("length of value array in the response should be greater than 0") + + for responseVal in responseValues: + metricName = responseVal['name']['value'] + if metricName != constants.NODE_COUNT_METRIC_NAME: + pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.NODE_COUNT_METRIC_NAME)) + timeseries = responseVal['timeseries'] + if not timeseries: + pytest.fail("metric series shouldnt be null or empty for metric:{0} in namespace: {1}".format( + constants.NODE_COUNT_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) + if len(timeseries) <= 0: + pytest.fail("length of timeseries should be greater than for 0 for metric: {0} in namespace :{1}".format(constants.NODE_COUNT_METRIC_NAME, constants.NODE_METRICS_NAMESPACE)) + + append_result_output("test_node_metrics_e2e_workflow end \n", + env_dict['TEST_AGENT_LOG_FILE']) + print("Successfully completed node metrics e2e workflow test.") diff --git a/test/e2e/src/core/test_pod_metrics_e2e_workflow.py b/test/e2e/src/core/test_pod_metrics_e2e_workflow.py new file mode 100755 index 000000000..cd4260f76 --- /dev/null +++ b/test/e2e/src/core/test_pod_metrics_e2e_workflow.py @@ -0,0 +1,134 @@ +import pytest +import constants +import requests + +from arm_rest_utility import fetch_aad_token +from kubernetes import client, config +from kubernetes_pod_utility import get_pod_list +from results_utility import append_result_output +from datetime import datetime, timedelta + +pytestmark = pytest.mark.agentests + +# validation of pod metrics e2e workflows +def test_pod_metrics_e2e_workflow(env_dict): + print("Starting pod metrics e2e workflows test.") + append_result_output("test_pod_metrics_e2e_workflow start \n", + env_dict['TEST_AGENT_LOG_FILE']) + # Loading in-cluster kube-config + try: + config.load_incluster_config() + except Exception as e: + pytest.fail("Error loading the in-cluster config: " + str(e)) + + # query time interval for metrics queries + metricQueryIntervalInMins = env_dict['DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES'] + if not metricQueryIntervalInMins: + pytest.fail( + "DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES should not be null or empty or 0") + + # get the cluster resource id from replicaset pod envvars + api_instance = client.CoreV1Api() + pod_list = get_pod_list(api_instance, constants.AGENT_RESOURCES_NAMESPACE, + constants.AGENT_DEPLOYMENT_PODS_LABEL_SELECTOR) + + if not pod_list: + pytest.fail("pod_list shouldnt be null or empty") + + if len(pod_list.items) <= 0: + pytest.fail("number of items in pod list should be greater than 0") + + envVars = pod_list.items[0].spec.containers[0].env + if not envVars: + pytest.fail( + "environment variables should be defined in the replicaset pod") + + clusterResourceId = '' + for env in envVars: + if env.name == "AKS_RESOURCE_ID": + clusterResourceId = env.value + print("cluster resource id: {}".format(clusterResourceId)) + + if not clusterResourceId: + pytest.fail( + "failed to get clusterResourceId from replicaset pod environment variables") + + # fetch AAD token for metrics queries + tenant_id = env_dict.get('TENANT_ID') + authority_uri = env_dict.get('AZURE_ENDPOINTS').get( + 'activeDirectory') + tenant_id + client_id = env_dict.get('CLIENT_ID') + client_secret = env_dict.get('CLIENT_SECRET') + resourceManager = env_dict.get('AZURE_ENDPOINTS').get('resourceManager') + aad_token = fetch_aad_token( + client_id, client_secret, authority_uri, resourceManager) + if not aad_token: + pytest.fail("failed to fetch AAD token") + + access_token = aad_token.get('accessToken') + if not access_token: + pytest.fail("access_token shouldnt be null or empty") + + # validate metrics e2e workflow + now = datetime.utcnow() + endtime = now.isoformat()[:-3]+'Z' + starttime = (now - timedelta(hours=0, + minutes=constants.DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES)).isoformat()[:-3]+'Z' + Headers = { + "Authorization": str("Bearer " + access_token), + "Content-Type": "application/json", + "content-length": "0" + } + params = {} + # pod metric - PodCount + custommetricsUrl = '{0}{1}/providers/microsoft.Insights/metrics?timespan={2}/{3}&interval=FULL&metricnames={4}&aggregation={5}&metricNamespace={6}&validatedimensions=false&api-version={7}'.format( + resourceManager.rstrip("/"), + clusterResourceId, + starttime, + endtime, + constants.POD_COUNT_METRIC_NAME, + constants.POD_METRIC_METRIC_AGGREGATION, + constants.POD_METRICS_NAMESPACE, + constants.METRICS_API_VERSION) + + response = requests.get(custommetricsUrl, params=params, + headers=Headers, verify=False) + + if not response: + pytest.fail( + "response of the metrics query API shouldnt be null or empty") + + if response.status_code != 200: + pytest.fail("metrics query API failed with an error code: {}".format( + response.status_code)) + + responseJSON = response.json() + if not responseJSON: + pytest.fail("response JSON shouldnt be null or empty") + + namespace = responseJSON['namespace'] + if namespace != constants.POD_METRICS_NAMESPACE: + pytest.fail("got the namespace: {0} but expected namespace:{1} in the response".format( + namespace, constants.POD_METRICS_NAMESPACE)) + + responseValues = responseJSON['value'] + if not responseValues: + pytest.fail("response JSON shouldnt be null or empty") + + if len(responseValues) <= 0: + pytest.fail("length of value array in the response should be greater than 0") + + for responseVal in responseValues: + metricName = responseVal['name']['value'] + if metricName != constants.POD_COUNT_METRIC_NAME: + pytest.fail("got the metricname: {0} but expected metricname:{1} in the response".format(metricName, constants.POD_COUNT_METRIC_NAME)) + timeseries = responseVal['timeseries'] + if not timeseries: + pytest.fail("metric series shouldnt be null or empty for metric:{0} in namespace: {1}".format( + constants.POD_COUNT_METRIC_NAME, constants.POD_METRICS_NAMESPACE)) + if len(timeseries) <= 0: + pytest.fail("length of timeseries should be greater than for 0 for metric: {0} in namespace :{1}".format(constants.POD_COUNT_METRIC_NAME, constants.POD_METRICS_NAMESPACE)) + + append_result_output("test_pod_metrics_e2e_workflow end \n", + env_dict['TEST_AGENT_LOG_FILE']) + print("Successfully completed e2e workflows test.") diff --git a/test/e2e/src/core/test_resource_status.py b/test/e2e/src/core/test_resource_status.py new file mode 100755 index 000000000..bb63dac7c --- /dev/null +++ b/test/e2e/src/core/test_resource_status.py @@ -0,0 +1,43 @@ +import pytest +import constants + +from kubernetes import client, config +from results_utility import append_result_output +from helper import check_kubernetes_deployment_status +from helper import check_kubernetes_daemonset_status +from helper import check_kubernetes_pods_status + +pytestmark = pytest.mark.agentests + +# validate all the critical resources such as ds, rs, ds pods and rs pod etc. are up and running +def test_resource_status(env_dict): + print("Starting resource status check.") + append_result_output("test_resource_status start \n", + env_dict['TEST_AGENT_LOG_FILE']) + # Loading in-cluster kube-config + try: + config.load_incluster_config() + #config.load_kube_config() + except Exception as e: + pytest.fail("Error loading the in-cluster config: " + str(e)) + + # checking the deployment status + check_kubernetes_deployment_status( + constants.AGENT_RESOURCES_NAMESPACE, constants.AGENT_DEPLOYMENT_NAME, env_dict['TEST_AGENT_LOG_FILE']) + + # checking the daemonset status + check_kubernetes_daemonset_status( + constants.AGENT_RESOURCES_NAMESPACE, constants.AGENT_DAEMONSET_NAME, env_dict['TEST_AGENT_LOG_FILE']) + + expectedPodRestartCount = env_dict['AGENT_POD_EXPECTED_RESTART_COUNT'] + # checking deployment pod status + check_kubernetes_pods_status(constants.AGENT_RESOURCES_NAMESPACE, + constants.AGENT_DEPLOYMENT_PODS_LABEL_SELECTOR, expectedPodRestartCount, env_dict['TEST_AGENT_LOG_FILE']) + + # checking daemonset pod status + check_kubernetes_pods_status(constants.AGENT_RESOURCES_NAMESPACE, + constants.AGENT_DAEMON_SET_PODS_LABEL_SELECTOR, expectedPodRestartCount, env_dict['TEST_AGENT_LOG_FILE']) + + append_result_output("test_resource_status end \n", + env_dict['TEST_AGENT_LOG_FILE']) + print("Successfully checked resource status check.") diff --git a/test/e2e/src/core/test_rs_workflows.py b/test/e2e/src/core/test_rs_workflows.py new file mode 100755 index 000000000..ed5c21f68 --- /dev/null +++ b/test/e2e/src/core/test_rs_workflows.py @@ -0,0 +1,93 @@ +import pytest +import constants + +from kubernetes import client, config +from kubernetes_pod_utility import get_pod_list, get_log_file_content +from results_utility import append_result_output +from helper import check_kubernetes_deployment_status +from helper import check_kubernetes_daemonset_status +from helper import check_kubernetes_pods_status +from kubernetes.stream import stream + +pytestmark = pytest.mark.agentests + +# validation of replicaset agent workflows +def test_rs_workflows(env_dict): + print("Starting replicaset agent workflows test.") + append_result_output("test_rs_workflows start \n", + env_dict['TEST_AGENT_LOG_FILE']) + # Loading in-cluster kube-config + try: + config.load_incluster_config() + except Exception as e: + pytest.fail("Error loading the in-cluster config: " + str(e)) + + print("getting pod list") + api_instance = client.CoreV1Api() + pod_list = get_pod_list(api_instance, constants.AGENT_RESOURCES_NAMESPACE, + constants.AGENT_DEPLOYMENT_PODS_LABEL_SELECTOR) + if not pod_list: + pytest.fail("pod_list shouldnt be null or empty") + + if len(pod_list.items) <= 0: + pytest.fail("number of items in pod list should be greater than 0") + + rspodName = pod_list.items[0].metadata.name + if not rspodName: + pytest.fail("replicaset pod name should not be null or empty") + + logcontent = get_log_file_content( + api_instance, constants.AGENT_RESOURCES_NAMESPACE, rspodName, constants.AGENT_OMSAGENT_LOG_PATH) + if not logcontent: + pytest.fail("logcontent should not be null or empty for rs pod: {}".format(rspodName)) + loglines = logcontent.split("\n") + if len(loglines) <= 0: + pytest.fail("number of log lines should be greater than 0") + + IsKubePodInventorySuccessful = False + IsKubeNodeInventorySuccessful = False + IsKubeDeploymentInventorySuccessful = False + IsKubeContainerPerfInventorySuccessful = False + IsKubeServicesInventorySuccessful = False + IsContainerNodeInventorySuccessful = False + IsKubeEventsSuccessful = False + for line in loglines: + if line.find(constants.KUBE_POD_INVENTORY_EMIT_STREAM) >= 0: + IsKubePodInventorySuccessful = True + if line.find(constants.KUBE_NODE_INVENTORY_EMIT_STREAM) >= 0: + IsKubeNodeInventorySuccessful = True + if line.find(constants.KUBE_DEPLOYMENT_INVENTORY_EMIT_STREAM) >= 0: + IsKubeDeploymentInventorySuccessful = True + if line.find(constants.KUBE_CONTAINER_PERF_EMIT_STREAM) >= 0: + IsKubeContainerPerfInventorySuccessful = True + if line.find(constants.KUBE_SERVICES_EMIT_STREAM) >= 0: + IsKubeServicesInventorySuccessful = True + if line.find(constants.KUBE_CONTAINER_NODE_INVENTORY_EMIT_STREAM) >= 0: + IsContainerNodeInventorySuccessful = True + if line.find(constants.KUBE_EVENTS_EMIT_STREAM) >= 0: + IsKubeEventsSuccessful = True + + if IsKubePodInventorySuccessful == False: + pytest.fail("KubePodInventory stream not emitted successfully from pod:" + rspodName) + + if IsKubeNodeInventorySuccessful == False: + pytest.fail("KubePodInventory stream not emitted successfully from pod:" + rspodName) + + if IsKubeDeploymentInventorySuccessful == False: + pytest.fail("KubeDeploymentInventory stream not emitted successfully from pod:" + rspodName) + + if IsKubeContainerPerfInventorySuccessful == False: + pytest.fail("KubeContainerPerfInventory stream not emitted successfully from pod:" + rspodName) + + if IsKubeServicesInventorySuccessful == False: + pytest.fail("KubeServicesInventory stream not emitted successfully from pod:" + rspodName) + + if IsContainerNodeInventorySuccessful == False: + pytest.fail("ContainerNodeInventory stream not emitted successfully from pod:" + rspodName) + + if IsKubeEventsSuccessful == False: + pytest.fail("KubeEventsInventory stream not emitted successfully from rs pod:" + rspodName) + + append_result_output("test_rs_workflows end \n", + env_dict['TEST_AGENT_LOG_FILE']) + print("Successfully completed replicaset workflows test.") diff --git a/test/e2e/src/e2e-tests.yaml b/test/e2e/src/e2e-tests.yaml new file mode 100644 index 000000000..65f55af86 --- /dev/null +++ b/test/e2e/src/e2e-tests.yaml @@ -0,0 +1,167 @@ + +--- +apiVersion: v1 +kind: Namespace +metadata: + name: sonobuoy +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + component: sonobuoy + name: sonobuoy-serviceaccount + namespace: sonobuoy +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + component: sonobuoy + namespace: sonobuoy + name: sonobuoy-serviceaccount-sonobuoy +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: sonobuoy-serviceaccount-sonobuoy +subjects: +- kind: ServiceAccount + name: sonobuoy-serviceaccount + namespace: sonobuoy +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + component: sonobuoy + namespace: sonobuoy + name: sonobuoy-serviceaccount-sonobuoy +rules: +- apiGroups: + - '*' + resources: + - '*' + verbs: + - '*' +- nonResourceURLs: + - '/metrics' + - '/logs' + - '/logs/*' + verbs: + - 'get' +--- +apiVersion: v1 +data: + config.json: | + {"Description":"DEFAULT","UUID":"bf5c02ed-1948-48f1-b12d-5a2d74435e46","Version":"v0.20.0","ResultsDir":"/tmp/sonobuoy","Resources":["apiservices","certificatesigningrequests","clusterrolebindings","clusterroles","componentstatuses","configmaps","controllerrevisions","cronjobs","customresourcedefinitions","daemonsets","deployments","endpoints","ingresses","jobs","leases","limitranges","mutatingwebhookconfigurations","namespaces","networkpolicies","nodes","persistentvolumeclaims","persistentvolumes","poddisruptionbudgets","pods","podlogs","podsecuritypolicies","podtemplates","priorityclasses","replicasets","replicationcontrollers","resourcequotas","rolebindings","roles","servergroups","serverversion","serviceaccounts","services","statefulsets","storageclasses","validatingwebhookconfigurations","volumeattachments"],"Filters":{"Namespaces":".*","LabelSelector":""},"Limits":{"PodLogs":{"Namespaces":"","SonobuoyNamespace":true,"FieldSelectors":[],"LabelSelector":"","Previous":false,"SinceSeconds":null,"SinceTime":null,"Timestamps":false,"TailLines":null,"LimitBytes":null,"LimitSize":"","LimitTime":""}},"QPS":30,"Burst":50,"Server":{"bindaddress":"0.0.0.0","bindport":8080,"advertiseaddress":"","timeoutseconds":10800},"Plugins":null,"PluginSearchPath":["./plugins.d","/etc/sonobuoy/plugins.d","~/sonobuoy/plugins.d"],"Namespace":"sonobuoy","WorkerImage":"sonobuoy/sonobuoy:v0.20.0","ImagePullPolicy":"IfNotPresent","ImagePullSecrets":"","ProgressUpdatesPort":"8099"} +kind: ConfigMap +metadata: + labels: + component: sonobuoy + name: sonobuoy-config-cm + namespace: sonobuoy +--- +apiVersion: v1 +data: + plugin-0.yaml: | + podSpec: + containers: [] + restartPolicy: Never + serviceAccountName: sonobuoy-serviceaccount + nodeSelector: + kubernetes.io/os: linux + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/master + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + - key: kubernetes.io/e2e-evict-taint-key + operator: Exists + sonobuoy-config: + driver: Job + plugin-name: agenttests + result-format: junit + spec: + env: + - name: CLIENT_ID + value: 3dd89c8a-c883-4654-a446-52430248ac60 + - name: CLIENT_SECRET + value: 0FWxHGgg.C.OtDuMhzKpIx7Y6vOCxXTx8A + - name: TENANT_ID + value: 72f988bf-86f1-41af-91ab-2d7cd011db47 + image: ganga1980/agentest:v0.5.4 + imagePullPolicy: Always + name: plugin + resources: {} + volumeMounts: + - mountPath: /tmp/results + name: results +kind: ConfigMap +metadata: + labels: + component: sonobuoy + name: sonobuoy-plugins-cm + namespace: sonobuoy +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + component: sonobuoy + run: sonobuoy-master + sonobuoy-component: aggregator + tier: analysis + name: sonobuoy + namespace: sonobuoy +spec: + containers: + - env: + - name: SONOBUOY_ADVERTISE_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: sonobuoy/sonobuoy:v0.20.0 + imagePullPolicy: IfNotPresent + name: kube-sonobuoy + volumeMounts: + - mountPath: /etc/sonobuoy + name: sonobuoy-config-volume + - mountPath: /plugins.d + name: sonobuoy-plugins-volume + - mountPath: /tmp/sonobuoy + name: output-volume + restartPolicy: Never + serviceAccountName: sonobuoy-serviceaccount + nodeSelector: + kubernetes.io/os: linux + tolerations: + - key: "kubernetes.io/e2e-evict-taint-key" + operator: "Exists" + volumes: + - configMap: + name: sonobuoy-config-cm + name: sonobuoy-config-volume + - configMap: + name: sonobuoy-plugins-cm + name: sonobuoy-plugins-volume + - emptyDir: {} + name: output-volume +--- +apiVersion: v1 +kind: Service +metadata: + labels: + component: sonobuoy + sonobuoy-component: aggregator + name: sonobuoy-aggregator + namespace: sonobuoy +spec: + ports: + - port: 8080 + protocol: TCP + targetPort: 8080 + selector: + sonobuoy-component: aggregator + type: ClusterIP + From 316511ef03c596f09e7ad5a67adc5983281e29e2 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 16 Feb 2021 18:34:46 -0800 Subject: [PATCH 02/14] doc and script updates --- .pipelines/deploy-and-validate-e2e-tests.sh | 31 ++++++++++++++++ .../update-place-holdres-in-e2e-tests.sh | 35 +++++++++++++++++++ README.md | 27 ++++++++++++++ kubernetes/omsagent.yaml | 6 ++++ test/e2e/{src => }/e2e-tests.yaml | 13 ++++--- 5 files changed, 107 insertions(+), 5 deletions(-) create mode 100644 .pipelines/deploy-and-validate-e2e-tests.sh create mode 100755 .pipelines/update-place-holdres-in-e2e-tests.sh rename test/e2e/{src => }/e2e-tests.yaml (90%) diff --git a/.pipelines/deploy-and-validate-e2e-tests.sh b/.pipelines/deploy-and-validate-e2e-tests.sh new file mode 100644 index 000000000..2176aa279 --- /dev/null +++ b/.pipelines/deploy-and-validate-e2e-tests.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +echo "start: update placeholders of e2e-tests.yaml ..." + +for ARGUMENT in "$@" +do + KEY=$(echo $ARGUMENT | cut -f1 -d=) + VALUE=$(echo $ARGUMENT | cut -f2 -d=) + + case "$KEY" in + CLIENT_ID) CLIENT_ID=$VALUE ;; + CLIENT_SECRET) CLIENT_SECRET=$VALUE ;; + TENANT_ID) TENANT_ID=$VALUE ;; + *) + esac +done + +echo "Service Principal CLIENT_ID:$CLIENT_ID" +echo "replace CLIENT_ID value" +sed -i "s=SP_CLIENT_ID_VALUE=$CLIENT_ID=g" e2e-tests.yaml + +# only uncomment for debug purpose +# echo "Service Principal CLIENT_SECRET:$CLIENT_SECRET" +echo "replace CLIENT_SECRET value" +sed -i "s=SP_CLIENT_SECRET_VALUE=$CLIENT_SECRET=g" e2e-tests.yaml + +echo "Service Principal TENANT_ID:$TENANT_ID" +echo "replace TENANT_ID value" +sed -i "s=SP_TENANT_ID_VALUE=$TENANT_ID=g" e2e-tests.yaml + +echo "end: update placeholders of e2e-tests.yaml." diff --git a/.pipelines/update-place-holdres-in-e2e-tests.sh b/.pipelines/update-place-holdres-in-e2e-tests.sh new file mode 100755 index 000000000..b7a1f8d0d --- /dev/null +++ b/.pipelines/update-place-holdres-in-e2e-tests.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +echo "start: update placeholders of e2e-tests.yaml ..." + +for ARGUMENT in "$@" +do + KEY=$(echo $ARGUMENT | cut -f1 -d=) + VALUE=$(echo $ARGUMENT | cut -f2 -d=) + + case "$KEY" in + TENANT_ID) TENANT_ID=$VALUE ;; + *) + esac +done + +echo "start: read appid and appsecret" +# used the same SP which used for acr +CLIENT_ID=$(cat ~/acrappid) +CLIENT_SECRET=$(cat ~/acrappsecret) +echo "end: read appid and appsecret" + +echo "Service Principal CLIENT_ID:$CLIENT_ID" +echo "replace CLIENT_ID value" +sed -i "s=SP_CLIENT_ID_VALUE=$CLIENT_ID=g" e2e-tests.yaml + +# only uncomment for debug purpose +# echo "Service Principal CLIENT_SECRET:$CLIENT_SECRET" +echo "replace CLIENT_SECRET value" +sed -i "s=SP_CLIENT_SECRET_VALUE=$CLIENT_SECRET=g" e2e-tests.yaml + +echo "Service Principal TENANT_ID:$TENANT_ID" +echo "replace TENANT_ID value" +sed -i "s=SP_TENANT_ID_VALUE=$TENANT_ID=g" e2e-tests.yaml + +echo "start: update placeholders of e2e-tests.yaml ..." diff --git a/README.md b/README.md index 3eec1f344..474bb1177 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,7 @@ The general directory structure is: │ │ | ... - plugins in, out and filters code in ruby │ ├── toml-parser/ - code for parsing of toml configuration files ├── test/ - source code for tests +│ ├── e2e/ - e2e tests to validate agent and e2e workflow(s) │ ├── unit-tests/ - unit tests code │ ├── scenario/ - scenario tests code ├── !_README.md - this file @@ -271,6 +272,32 @@ For DEV and PROD branches, automatically deployed latest yaml with latest agent # E2E Tests +## For executing tests + +- Deploy the omsagent.yaml with your agent image. Makesure `ISTEST` environment variable set to `true` if its not set already +- Update the CLIENT_ID, CLIENT_SECRET and TENANT_ID placeholder values and apply e2e-tests.yaml to execute the tests + ``` + cd ~/Docker-Provider/test/e2e # based on your repo path + kubectl apply -f e2e-tests.yaml # this will trigger job in sonobuoy namespace + kubectl get po -n sonobuoy # to check the pods and jobs associated to tests + ``` +- Download (sonobuoy)[https://github.com/vmware-tanzu/sonobuoy/releases] on your dev box to view the results of the test + ``` + results=$(sonobuoy retrieve) # downloads tar file which has logs and test results + sonobuoy results $results # get the summary of the results + tar -xzvf # extract downloaded tar file and look for pod logs if there are any failures + ``` + +## For adding tests +- Add the test file with your test code under `core` directory +- Build the docker image + ``` + cd ~/Docker-Provider/test/e2e/src # based on your repo path + docker build -f ./core/Dockerfile -t /: . + ``` +- update the image tag + +# Scenario Tests Clusters are used in release pipeline already has the yamls under test\scenario deployed. Make sure to validate these scenarios. If you have new interesting scenarios, please add/update them. diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml index 67bd9cdde..4369ac75d 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent.yaml @@ -383,6 +383,9 @@ spec: value: "VALUE_AKS_RESOURCE_ID_VALUE" - name: AKS_REGION value: "VALUE_AKS_RESOURCE_REGION_VALUE" + # this used for e2e test and setting this just emits some additional log statements which used for the e2e tests + - name: ISTEST + value: "true" #Uncomment below two lines for ACS clusters and set the cluster names manually. Also comment out the above two lines for ACS clusters #- name: ACS_RESOURCE_NAME # value: "my_acs_cluster_name" @@ -541,6 +544,9 @@ spec: value: "VALUE_AKS_RESOURCE_ID_VALUE" - name: AKS_REGION value: "VALUE_AKS_RESOURCE_REGION_VALUE" + # this used for e2e test and setting this just emits some additional log statements which used for the e2e tests + - name: ISTEST + value: "true" # Uncomment below two lines for ACS clusters and set the cluster names manually. Also comment out the above two lines for ACS clusters #- name: ACS_RESOURCE_NAME # value: "my_acs_cluster_name" diff --git a/test/e2e/src/e2e-tests.yaml b/test/e2e/e2e-tests.yaml similarity index 90% rename from test/e2e/src/e2e-tests.yaml rename to test/e2e/e2e-tests.yaml index 65f55af86..f0b6a0e81 100644 --- a/test/e2e/src/e2e-tests.yaml +++ b/test/e2e/e2e-tests.yaml @@ -84,14 +84,17 @@ data: result-format: junit spec: env: + # Update values of CLIENT_ID, CLIENT_SECRET of the service principal which has permission to query LA ad Metrics API + # Update value of TENANT_ID corresponding your Azure Service principal - name: CLIENT_ID - value: 3dd89c8a-c883-4654-a446-52430248ac60 + value: "SP_CLIENT_ID_VALUE" - name: CLIENT_SECRET - value: 0FWxHGgg.C.OtDuMhzKpIx7Y6vOCxXTx8A + value: "CLIENT_SECRET_VALUE" - name: TENANT_ID - value: 72f988bf-86f1-41af-91ab-2d7cd011db47 - image: ganga1980/agentest:v0.5.4 - imagePullPolicy: Always + value: "SP_TENANT_ID_VALUE" + # image tag should be updated if new tests being added after this image + image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest02142021 + imagePullPolicy: IfNotPresent name: plugin resources: {} volumeMounts: From 022eae571e71f7b3de277240ec9149d054d1233c Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 16 Feb 2021 19:55:33 -0800 Subject: [PATCH 03/14] add validation script --- .pipelines/deploy-and-validate-e2e-tests.sh | 31 --------- .pipelines/validate-e2e-tests-results.sh | 71 +++++++++++++++++++++ 2 files changed, 71 insertions(+), 31 deletions(-) delete mode 100644 .pipelines/deploy-and-validate-e2e-tests.sh create mode 100644 .pipelines/validate-e2e-tests-results.sh diff --git a/.pipelines/deploy-and-validate-e2e-tests.sh b/.pipelines/deploy-and-validate-e2e-tests.sh deleted file mode 100644 index 2176aa279..000000000 --- a/.pipelines/deploy-and-validate-e2e-tests.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -echo "start: update placeholders of e2e-tests.yaml ..." - -for ARGUMENT in "$@" -do - KEY=$(echo $ARGUMENT | cut -f1 -d=) - VALUE=$(echo $ARGUMENT | cut -f2 -d=) - - case "$KEY" in - CLIENT_ID) CLIENT_ID=$VALUE ;; - CLIENT_SECRET) CLIENT_SECRET=$VALUE ;; - TENANT_ID) TENANT_ID=$VALUE ;; - *) - esac -done - -echo "Service Principal CLIENT_ID:$CLIENT_ID" -echo "replace CLIENT_ID value" -sed -i "s=SP_CLIENT_ID_VALUE=$CLIENT_ID=g" e2e-tests.yaml - -# only uncomment for debug purpose -# echo "Service Principal CLIENT_SECRET:$CLIENT_SECRET" -echo "replace CLIENT_SECRET value" -sed -i "s=SP_CLIENT_SECRET_VALUE=$CLIENT_SECRET=g" e2e-tests.yaml - -echo "Service Principal TENANT_ID:$TENANT_ID" -echo "replace TENANT_ID value" -sed -i "s=SP_TENANT_ID_VALUE=$TENANT_ID=g" e2e-tests.yaml - -echo "end: update placeholders of e2e-tests.yaml." diff --git a/.pipelines/validate-e2e-tests-results.sh b/.pipelines/validate-e2e-tests-results.sh new file mode 100644 index 000000000..c38fa0f50 --- /dev/null +++ b/.pipelines/validate-e2e-tests-results.sh @@ -0,0 +1,71 @@ +#!/bin/bash +echo "start: validating results of e2e-tests ..." +DEFAULT_SONOBUOY_VERSION="0.20.0" +DEFAULT_TIME_OUT_IN_MINS=60 +for ARGUMENT in "$@" +do + KEY=$(echo $ARGUMENT | cut -f1 -d=) + VALUE=$(echo $ARGUMENT | cut -f2 -d=) + + case "$KEY" in + SONOBUOY_VERSION) SONOBUOY_VERSION=$VALUE ;; + *) + esac +done + +if [ -z $SONOBUOY_VERSION ]; then + SONOBUOY_VERSION=$DEFAULT_SONOBUOY_VERSION +fi + +echo "sonobuoy version: ${SONOBUOY_VERSION}" + +echo "start: downloading sonobuoy" +curl -LO https://github.com/vmware-tanzu/sonobuoy/releases/download/v${SONOBUOY_VERSION}/sonobuoy_${SONOBUOY_VERSION}_linux_amd64.tar.gz +echo "end: downloading sonobuoy" + +echo "start: extract sonobuoy tar file" +mkdir -p sonobuoy-install/ +tar -zxf sonobuoy_${SONOBUOY_VERSION}_*.tar.gz -C sonobuoy-install/ +echo "end: extract sonobuoy tar file" + +echo "start: move sonobuoy binaries to /usr/local/bin/" +mv -f sonobuoy-install/sonobuoy /usr/local/bin/ +echo "end: move sonobuoy binaries to /usr/local/bin/" + +rm -rf sonobuoy_${SONOBUOY_VERSION}_*.tar.gz sonobuoy-install/ + +results=$(sonobuoy retrieve) +mins=0 +IsSucceeded=true +while [ $mins -le $DEFAULT_TIME_OUT_IN_MINS ] +do + # check the status + echo "checking test status" + status=$(sonobuoy status) + status=$(echo $status | sed 's/`//g') + if [[ $status == *"completed"* ]]; then + echo "test run completed" + mins=$DEFAULT_TIME_OUT_IN_MINS + if [[ $status == *"failed"* ]]; then + IsSucceeded=false + fi + else + echo "sleep for 1m to check the status again" + sleep 1m + fi + mins=$(( $mins + 1 )) +done +echo "status:${IsSucceeded}" + +results=$(sonobuoy retrieve) +sonobuoy results $results + +if $IsSucceeded == true; then + echo "all test passed" + exit 0 +else + echo "tests are failed. please review the results by downloading tar file via sonobuoy retrieve command" + exit 1 +fi + +echo "end: validating results of e2e-tests ..." From f738a2b598f862d15060af3543a3ff518c3e8234 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 16 Feb 2021 20:01:36 -0800 Subject: [PATCH 04/14] doc updates --- README.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 474bb1177..22eb4ac31 100644 --- a/README.md +++ b/README.md @@ -274,28 +274,30 @@ For DEV and PROD branches, automatically deployed latest yaml with latest agent ## For executing tests -- Deploy the omsagent.yaml with your agent image. Makesure `ISTEST` environment variable set to `true` if its not set already -- Update the CLIENT_ID, CLIENT_SECRET and TENANT_ID placeholder values and apply e2e-tests.yaml to execute the tests +1. Deploy the omsagent.yaml with your agent image. Makesure `ISTEST` environment variable set to `true` if its not set already +2. Update the Service Principal CLIENT_ID, CLIENT_SECRET and TENANT_ID placeholder values and apply e2e-tests.yaml to execute the tests + > Note: Service Principal at least requires reader role on log analytics workspace and cluster resource to query LA and metrics ``` cd ~/Docker-Provider/test/e2e # based on your repo path kubectl apply -f e2e-tests.yaml # this will trigger job in sonobuoy namespace kubectl get po -n sonobuoy # to check the pods and jobs associated to tests ``` -- Download (sonobuoy)[https://github.com/vmware-tanzu/sonobuoy/releases] on your dev box to view the results of the test +3. Download (sonobuoy)[https://github.com/vmware-tanzu/sonobuoy/releases] on your dev box to view the results of the test ``` results=$(sonobuoy retrieve) # downloads tar file which has logs and test results sonobuoy results $results # get the summary of the results tar -xzvf # extract downloaded tar file and look for pod logs if there are any failures ``` -## For adding tests -- Add the test file with your test code under `core` directory -- Build the docker image +## For adding new tests + +1. Add the test file with your test code under `core` directory +2. Build the docker image, preference to use MCR ``` cd ~/Docker-Provider/test/e2e/src # based on your repo path docker build -f ./core/Dockerfile -t /: . ``` -- update the image tag +3. update the existing agentest image tag in e2e-tests.yaml with latest image tag # Scenario Tests Clusters are used in release pipeline already has the yamls under test\scenario deployed. Make sure to validate these scenarios. From e99c5edea74ce6237333b835c0d880dd2396b363 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 16 Feb 2021 20:12:51 -0800 Subject: [PATCH 05/14] yaml updates --- test/e2e/e2e-tests.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/e2e/e2e-tests.yaml b/test/e2e/e2e-tests.yaml index f0b6a0e81..df7f5b9b7 100644 --- a/test/e2e/e2e-tests.yaml +++ b/test/e2e/e2e-tests.yaml @@ -92,6 +92,14 @@ data: value: "CLIENT_SECRET_VALUE" - name: TENANT_ID value: "SP_TENANT_ID_VALUE" + - name: DEFAULT_QUERY_TIME_INTERVAL_IN_MINUTES + value: "10" + - name: DEFAULT_METRICS_QUERY_TIME_INTERVAL_IN_MINUTES + value: "10" + - name: AGENT_POD_EXPECTED_RESTART_COUNT + value: "0" + - name: AZURE_CLOUD + value: "AZURE_PUBLIC_CLOUD" # image tag should be updated if new tests being added after this image image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest02142021 imagePullPolicy: IfNotPresent From 3d089a65bcb36358d0f7118d56e421c59eb3e250 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 16 Feb 2021 22:14:43 -0800 Subject: [PATCH 06/14] fix typo --- test/e2e/src/core/test_ds_workflows.py | 2 +- test/e2e/src/core/test_rs_workflows.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/e2e/src/core/test_ds_workflows.py b/test/e2e/src/core/test_ds_workflows.py index 55d9e4be4..a45c3081a 100755 --- a/test/e2e/src/core/test_ds_workflows.py +++ b/test/e2e/src/core/test_ds_workflows.py @@ -53,7 +53,7 @@ def test_ds_workflows(env_dict): if IsContainerPerfEmitStream == False: pytest.fail("ContainerPerf stream not emitted successfully from pod:" + podName) if IsContainerInventoryStream == False: - pytest.fail("ContainerPerf stream not emitted successfully from pod:" + podName) + pytest.fail("ContainerInventory stream not emitted successfully from pod:" + podName) append_result_output("test_ds_workflows end \n", env_dict['TEST_AGENT_LOG_FILE']) diff --git a/test/e2e/src/core/test_rs_workflows.py b/test/e2e/src/core/test_rs_workflows.py index ed5c21f68..aef422171 100755 --- a/test/e2e/src/core/test_rs_workflows.py +++ b/test/e2e/src/core/test_rs_workflows.py @@ -71,7 +71,7 @@ def test_rs_workflows(env_dict): pytest.fail("KubePodInventory stream not emitted successfully from pod:" + rspodName) if IsKubeNodeInventorySuccessful == False: - pytest.fail("KubePodInventory stream not emitted successfully from pod:" + rspodName) + pytest.fail("KubeNodeInventory stream not emitted successfully from pod:" + rspodName) if IsKubeDeploymentInventorySuccessful == False: pytest.fail("KubeDeploymentInventory stream not emitted successfully from pod:" + rspodName) From df83256f0925df30edd9544615fe31cb212e45f5 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 16 Feb 2021 22:38:53 -0800 Subject: [PATCH 07/14] doc updates --- README.md | 16 +++++++++------- test/e2e/src/core/test_ds_workflows.py | 8 ++++---- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 22eb4ac31..df0f459fb 100644 --- a/README.md +++ b/README.md @@ -274,30 +274,32 @@ For DEV and PROD branches, automatically deployed latest yaml with latest agent ## For executing tests -1. Deploy the omsagent.yaml with your agent image. Makesure `ISTEST` environment variable set to `true` if its not set already +1. Deploy the omsagent.yaml with your agent image. In the yaml, makesure `ISTEST` environment variable set to `true` if its not set already 2. Update the Service Principal CLIENT_ID, CLIENT_SECRET and TENANT_ID placeholder values and apply e2e-tests.yaml to execute the tests - > Note: Service Principal at least requires reader role on log analytics workspace and cluster resource to query LA and metrics + > Note: Service Principal requires reader role on log analytics workspace and cluster resource to query LA and metrics ``` cd ~/Docker-Provider/test/e2e # based on your repo path - kubectl apply -f e2e-tests.yaml # this will trigger job in sonobuoy namespace + kubectl apply -f e2e-tests.yaml # this will trigger job to run the tests in sonobuoy namespace kubectl get po -n sonobuoy # to check the pods and jobs associated to tests ``` -3. Download (sonobuoy)[https://github.com/vmware-tanzu/sonobuoy/releases] on your dev box to view the results of the test +3. Download (sonobuoy)[https://github.com/vmware-tanzu/sonobuoy/releases] on your dev box to view the results of the tests ``` results=$(sonobuoy retrieve) # downloads tar file which has logs and test results sonobuoy results $results # get the summary of the results - tar -xzvf # extract downloaded tar file and look for pod logs if there are any failures + tar -xzvf # extract downloaded tar file and look for pod logs, results and other k8s resources if there are any failures ``` ## For adding new tests 1. Add the test file with your test code under `core` directory -2. Build the docker image, preference to use MCR +2. Build the docker image, prefer to use ACR & MCR ``` cd ~/Docker-Provider/test/e2e/src # based on your repo path + docker login -u -p # login to acr docker build -f ./core/Dockerfile -t /: . + docker push /: ``` -3. update the existing agentest image tag in e2e-tests.yaml with latest image tag +3. update existing agentest image tag in e2e-tests.yaml with newly built image tag # Scenario Tests Clusters are used in release pipeline already has the yamls under test\scenario deployed. Make sure to validate these scenarios. diff --git a/test/e2e/src/core/test_ds_workflows.py b/test/e2e/src/core/test_ds_workflows.py index a45c3081a..81ef08325 100755 --- a/test/e2e/src/core/test_ds_workflows.py +++ b/test/e2e/src/core/test_ds_workflows.py @@ -13,7 +13,7 @@ # validation of ds agent workflows def test_ds_workflows(env_dict): - print("Starting daemonset workflows test.") + print("Starting daemonset agent workflows test.") append_result_output("test_ds_workflows start \n", env_dict['TEST_AGENT_LOG_FILE']) # Loading in-cluster kube-config @@ -22,15 +22,15 @@ def test_ds_workflows(env_dict): except Exception as e: pytest.fail("Error loading the in-cluster config: " + str(e)) - print("getting pod list") + print("getting daemonset pod list") api_instance = client.CoreV1Api() pod_list = get_pod_list(api_instance, constants.AGENT_RESOURCES_NAMESPACE, constants.AGENT_DAEMON_SET_PODS_LABEL_SELECTOR) if not pod_list: - pytest.fail("pod_list shouldnt be null or empty") + pytest.fail("daemonset pod_list shouldnt be null or empty") if len(pod_list.items) <= 0: - pytest.fail("number of items in pod list should be greater than 0") + pytest.fail("number of items in daemonset pod list should be greater than 0") for podItem in pod_list.items: podName = podItem.metadata.name From ad7dc64dc5800e56c7a685802d9f99e65e1e603c Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 16 Feb 2021 22:49:34 -0800 Subject: [PATCH 08/14] more doc updates --- .pipelines/update-place-holdres-in-e2e-tests.sh | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.pipelines/update-place-holdres-in-e2e-tests.sh b/.pipelines/update-place-holdres-in-e2e-tests.sh index b7a1f8d0d..5fec73684 100755 --- a/.pipelines/update-place-holdres-in-e2e-tests.sh +++ b/.pipelines/update-place-holdres-in-e2e-tests.sh @@ -32,4 +32,4 @@ echo "Service Principal TENANT_ID:$TENANT_ID" echo "replace TENANT_ID value" sed -i "s=SP_TENANT_ID_VALUE=$TENANT_ID=g" e2e-tests.yaml -echo "start: update placeholders of e2e-tests.yaml ..." +echo "end: update placeholders of e2e-tests.yaml." diff --git a/README.md b/README.md index df0f459fb..2cc0ebb08 100644 --- a/README.md +++ b/README.md @@ -291,7 +291,7 @@ For DEV and PROD branches, automatically deployed latest yaml with latest agent ## For adding new tests -1. Add the test file with your test code under `core` directory +1. Add the test python file with your test code under `core` directory 2. Build the docker image, prefer to use ACR & MCR ``` cd ~/Docker-Provider/test/e2e/src # based on your repo path From 38c6b9454375180ef6bf1d19dad00548381e148b Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Thu, 18 Feb 2021 13:35:03 -0800 Subject: [PATCH 09/14] add ISTEST for helm chart to use arc conf --- .../azuremonitor-containers/templates/omsagent-daemonset.yaml | 2 ++ .../templates/omsagent-deployment.yaml | 4 +++- charts/azuremonitor-containers/values.yaml | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml b/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml index 0272c6263..615cd0485 100644 --- a/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml +++ b/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml @@ -89,6 +89,8 @@ spec: - name: FBIT_TAIL_BUFFER_MAX_SIZE value: {{ .Values.omsagent.logsettings.tailbufmaxsizemegabytes | quote }} {{- end }} + - name: ISTEST + value: {{ .Values.omsagent.ISTEST | quote }} securityContext: privileged: true ports: diff --git a/charts/azuremonitor-containers/templates/omsagent-deployment.yaml b/charts/azuremonitor-containers/templates/omsagent-deployment.yaml index ecd0b705b..012dd2720 100644 --- a/charts/azuremonitor-containers/templates/omsagent-deployment.yaml +++ b/charts/azuremonitor-containers/templates/omsagent-deployment.yaml @@ -72,7 +72,9 @@ spec: value: {{ .Values.Azure.Extension.Name | quote }} {{- end }} - name: USER_ASSIGNED_IDENTITY_CLIENT_ID - value: "" + value: "" + - name: ISTEST + value: {{ .Values.omsagent.ISTEST | quote }} securityContext: privileged: true ports: diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml index 5601a5738..de3a8ff74 100644 --- a/charts/azuremonitor-containers/values.yaml +++ b/charts/azuremonitor-containers/values.yaml @@ -33,6 +33,10 @@ omsagent: # chance to build pod for the node and give it to the scheduler) # Should be some number greater than default (0) priority: 10 + + # This used for running agent pods in test mode and setting true makes the agent pods running in test mode + # additional agent workflow logs will be emitted which are used for e2e and conformance testing + ISTEST: false ## To get your workspace id and key do the following ## You can create a Azure Loganalytics workspace from portal.azure.com and get its ID & PRIMARY KEY from 'Advanced Settings' tab in the Ux. From 20d4e8e04e8381edf6cd63060c1deb3cddf271a3 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Thu, 18 Feb 2021 22:27:28 -0800 Subject: [PATCH 10/14] refactor test code --- README.md | 6 +++--- test/e2e/e2e-tests.yaml | 2 +- test/e2e/src/core/Dockerfile | 1 + test/e2e/src/{core => tests}/test_ds_workflows.py | 0 test/e2e/src/{core => tests}/test_e2e_workflows.py | 0 .../src/{core => tests}/test_node_metrics_e2e_workflow.py | 0 .../src/{core => tests}/test_pod_metrics_e2e_workflow.py | 0 test/e2e/src/{core => tests}/test_resource_status.py | 0 test/e2e/src/{core => tests}/test_rs_workflows.py | 0 9 files changed, 5 insertions(+), 4 deletions(-) rename test/e2e/src/{core => tests}/test_ds_workflows.py (100%) rename test/e2e/src/{core => tests}/test_e2e_workflows.py (100%) rename test/e2e/src/{core => tests}/test_node_metrics_e2e_workflow.py (100%) rename test/e2e/src/{core => tests}/test_pod_metrics_e2e_workflow.py (100%) rename test/e2e/src/{core => tests}/test_resource_status.py (100%) rename test/e2e/src/{core => tests}/test_rs_workflows.py (100%) diff --git a/README.md b/README.md index 2cc0ebb08..d283ff2fe 100644 --- a/README.md +++ b/README.md @@ -291,15 +291,15 @@ For DEV and PROD branches, automatically deployed latest yaml with latest agent ## For adding new tests -1. Add the test python file with your test code under `core` directory -2. Build the docker image, prefer to use ACR & MCR +1. Add the test python file with your test code under `tests` directory +2. Build the docker image, recommended to use ACR & MCR ``` cd ~/Docker-Provider/test/e2e/src # based on your repo path docker login -u -p # login to acr docker build -f ./core/Dockerfile -t /: . docker push /: ``` -3. update existing agentest image tag in e2e-tests.yaml with newly built image tag +3. update existing agentest image tag in e2e-tests.yaml with newly built image tag with MCR repo # Scenario Tests Clusters are used in release pipeline already has the yamls under test\scenario deployed. Make sure to validate these scenarios. diff --git a/test/e2e/e2e-tests.yaml b/test/e2e/e2e-tests.yaml index df7f5b9b7..06dfa1fb0 100644 --- a/test/e2e/e2e-tests.yaml +++ b/test/e2e/e2e-tests.yaml @@ -101,7 +101,7 @@ data: - name: AZURE_CLOUD value: "AZURE_PUBLIC_CLOUD" # image tag should be updated if new tests being added after this image - image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest02142021 + image: mcr.microsoft.com/azuremonitor/containerinsights/cidev:ciagenttest02152021 imagePullPolicy: IfNotPresent name: plugin resources: {} diff --git a/test/e2e/src/core/Dockerfile b/test/e2e/src/core/Dockerfile index 2637a1591..43b17504c 100644 --- a/test/e2e/src/core/Dockerfile +++ b/test/e2e/src/core/Dockerfile @@ -11,6 +11,7 @@ COPY ./core/conftest.py /e2etests/ COPY ./core/helper.py /e2etests/ COPY ./core/ /e2etests/ COPY ./common/ /e2etests/ +COPY ./tests/ /e2etests/ RUN ["chmod", "+x", "/e2e_tests.sh"] ENTRYPOINT ["./e2e_tests.sh"] diff --git a/test/e2e/src/core/test_ds_workflows.py b/test/e2e/src/tests/test_ds_workflows.py similarity index 100% rename from test/e2e/src/core/test_ds_workflows.py rename to test/e2e/src/tests/test_ds_workflows.py diff --git a/test/e2e/src/core/test_e2e_workflows.py b/test/e2e/src/tests/test_e2e_workflows.py similarity index 100% rename from test/e2e/src/core/test_e2e_workflows.py rename to test/e2e/src/tests/test_e2e_workflows.py diff --git a/test/e2e/src/core/test_node_metrics_e2e_workflow.py b/test/e2e/src/tests/test_node_metrics_e2e_workflow.py similarity index 100% rename from test/e2e/src/core/test_node_metrics_e2e_workflow.py rename to test/e2e/src/tests/test_node_metrics_e2e_workflow.py diff --git a/test/e2e/src/core/test_pod_metrics_e2e_workflow.py b/test/e2e/src/tests/test_pod_metrics_e2e_workflow.py similarity index 100% rename from test/e2e/src/core/test_pod_metrics_e2e_workflow.py rename to test/e2e/src/tests/test_pod_metrics_e2e_workflow.py diff --git a/test/e2e/src/core/test_resource_status.py b/test/e2e/src/tests/test_resource_status.py similarity index 100% rename from test/e2e/src/core/test_resource_status.py rename to test/e2e/src/tests/test_resource_status.py diff --git a/test/e2e/src/core/test_rs_workflows.py b/test/e2e/src/tests/test_rs_workflows.py similarity index 100% rename from test/e2e/src/core/test_rs_workflows.py rename to test/e2e/src/tests/test_rs_workflows.py From 51b4b3b07a02f540cf0a33fb49b148acab79aba2 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 23 Feb 2021 12:31:15 -0800 Subject: [PATCH 11/14] fix pr feedback --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d283ff2fe..3564345ee 100644 --- a/README.md +++ b/README.md @@ -274,7 +274,7 @@ For DEV and PROD branches, automatically deployed latest yaml with latest agent ## For executing tests -1. Deploy the omsagent.yaml with your agent image. In the yaml, makesure `ISTEST` environment variable set to `true` if its not set already +1. Deploy the omsagent.yaml with your agent image. In the yaml, make sure `ISTEST` environment variable set to `true` if its not set already 2. Update the Service Principal CLIENT_ID, CLIENT_SECRET and TENANT_ID placeholder values and apply e2e-tests.yaml to execute the tests > Note: Service Principal requires reader role on log analytics workspace and cluster resource to query LA and metrics ``` From 928ab112e687d99210353eabdcb2d8e9b07a7979 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 23 Feb 2021 12:39:43 -0800 Subject: [PATCH 12/14] fix pr feedback --- charts/azuremonitor-containers/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml index de3a8ff74..ea83bafaa 100644 --- a/charts/azuremonitor-containers/values.yaml +++ b/charts/azuremonitor-containers/values.yaml @@ -34,8 +34,8 @@ omsagent: # Should be some number greater than default (0) priority: 10 - # This used for running agent pods in test mode and setting true makes the agent pods running in test mode - # additional agent workflow logs will be emitted which are used for e2e and conformance testing + # This used for running agent pods in test mode. + # if set to true additional agent workflow logs will be emitted which are used for e2e and arc k8s conformance testing ISTEST: false ## To get your workspace id and key do the following From a367922f1848a557938e4dc49628c31bc80d9e75 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 23 Feb 2021 15:19:01 -0800 Subject: [PATCH 13/14] fix pr feedback --- test/e2e/src/common/kubernetes_crd_utility.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/e2e/src/common/kubernetes_crd_utility.py b/test/e2e/src/common/kubernetes_crd_utility.py index 6b591845c..f84092878 100644 --- a/test/e2e/src/common/kubernetes_crd_utility.py +++ b/test/e2e/src/common/kubernetes_crd_utility.py @@ -14,7 +14,8 @@ def get_crd_instance(api_instance, group, version, namespace, plural, crd_name): # Function that watches events corresponding to given CRD instance and passes the events to a callback function def watch_crd_instance(api_instance, group, version, namespace, plural, crd_name, timeout, callback=None): if not callback: - return + pytest.fail("callback should be specified") + field_selector = "metadata.name={}".format(crd_name) if crd_name else "" try: w = watch.Watch() From 5c5e4a7dc8dd1fa84d7ac1675a4cbae087e5706c Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 23 Feb 2021 15:21:14 -0800 Subject: [PATCH 14/14] fix pr feedback --- test/e2e/src/common/constants.py | 1 - test/e2e/src/common/kubernetes_node_utility.py | 1 - test/e2e/src/common/kubernetes_pod_utility.py | 1 - test/e2e/src/common/kubernetes_version_utility.py | 1 - test/e2e/src/core/Dockerfile | 1 - test/e2e/src/core/e2e_tests.sh | 2 +- test/e2e/src/core/pytest.ini | 3 ++- 7 files changed, 3 insertions(+), 7 deletions(-) diff --git a/test/e2e/src/common/constants.py b/test/e2e/src/common/constants.py index 738b6c7f8..770964cb5 100644 --- a/test/e2e/src/common/constants.py +++ b/test/e2e/src/common/constants.py @@ -117,4 +117,3 @@ POD_METRICS_NAMESPACE = 'insights.container/pods' POD_METRIC_METRIC_AGGREGATION = 'average' POD_COUNT_METRIC_NAME = 'PodCount' - diff --git a/test/e2e/src/common/kubernetes_node_utility.py b/test/e2e/src/common/kubernetes_node_utility.py index 739af55f6..050ce8b87 100644 --- a/test/e2e/src/common/kubernetes_node_utility.py +++ b/test/e2e/src/common/kubernetes_node_utility.py @@ -10,4 +10,3 @@ def list_kubernetes_nodes(api_instance): except Exception as e: pytest.fail("Error occured while retrieving node information: " + str(e)) - diff --git a/test/e2e/src/common/kubernetes_pod_utility.py b/test/e2e/src/common/kubernetes_pod_utility.py index 403da48cb..27345fae7 100644 --- a/test/e2e/src/common/kubernetes_pod_utility.py +++ b/test/e2e/src/common/kubernetes_pod_utility.py @@ -63,4 +63,3 @@ def get_pod_logs(api_instance, pod_namespace, pod_name, container_name): return api_instance.read_namespaced_pod_log(pod_name, pod_namespace, container=container_name) except Exception as e: pytest.fail("Error occurred when fetching pod logs: " + str(e)) - diff --git a/test/e2e/src/common/kubernetes_version_utility.py b/test/e2e/src/common/kubernetes_version_utility.py index 8980aa0f2..884d1df2f 100644 --- a/test/e2e/src/common/kubernetes_version_utility.py +++ b/test/e2e/src/common/kubernetes_version_utility.py @@ -7,4 +7,3 @@ def get_kubernetes_server_version(api_instance): return api_response.git_version except Exception as e: pytest.fail("Error occured when retrieving kubernetes server version: " + str(e)) - diff --git a/test/e2e/src/core/Dockerfile b/test/e2e/src/core/Dockerfile index 43b17504c..9f85bdf4c 100644 --- a/test/e2e/src/core/Dockerfile +++ b/test/e2e/src/core/Dockerfile @@ -15,4 +15,3 @@ COPY ./tests/ /e2etests/ RUN ["chmod", "+x", "/e2e_tests.sh"] ENTRYPOINT ["./e2e_tests.sh"] - diff --git a/test/e2e/src/core/e2e_tests.sh b/test/e2e/src/core/e2e_tests.sh index ff34a213f..3bfafdce9 100644 --- a/test/e2e/src/core/e2e_tests.sh +++ b/test/e2e/src/core/e2e_tests.sh @@ -23,4 +23,4 @@ NUM_PROCESS=$(pytest /e2etests/ --collect-only -k "$TEST_NAME_LIST" -m "$TEST_M export NUM_TESTS="$NUM_PROCESS" -pytest /e2etests/ --junitxml=/tmp/results/results.xml -d --tx "$NUM_PROCESS"*popen -k "$TEST_NAME_LIST" -m "$TEST_MARKER_LIST" \ No newline at end of file +pytest /e2etests/ --junitxml=/tmp/results/results.xml -d --tx "$NUM_PROCESS"*popen -k "$TEST_NAME_LIST" -m "$TEST_MARKER_LIST" diff --git a/test/e2e/src/core/pytest.ini b/test/e2e/src/core/pytest.ini index 75f0242d1..f4dc462f0 100644 --- a/test/e2e/src/core/pytest.ini +++ b/test/e2e/src/core/pytest.ini @@ -1,3 +1,4 @@ [pytest] markers = - agentests: marks tests are a part of arc agent conformance tests (deselect with '-m "not agentests"') \ No newline at end of file + agentests: marks tests are a part of arc agent conformance tests (deselect with '-m "not agentests"') + \ No newline at end of file