From df136067a764465b9f0ec44dd1d312c870238540 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Thu, 13 May 2021 20:25:17 -0700 Subject: [PATCH 01/18] full switch to mdsd, upgrade to ruby v1 & omsagent removal --- build/linux/installer/conf/container.conf | 275 ++--- build/linux/installer/conf/kube.conf | 361 +++--- build/linux/installer/conf/out_oms.conf | 1 - .../installer/datafiles/base_container.data | 298 +++-- build/linux/installer/datafiles/linux.data | 18 +- .../linux/installer/datafiles/linux_dpkg.data | 2 +- .../linux/installer/datafiles/linux_rpm.data | 2 +- .../linux/installer/scripts/livenessprobe.sh | 18 +- .../scripts/tomlparser-mdm-metrics-config.rb | 2 +- .../tomlparser-metric-collection-config.rb | 2 +- kubernetes/linux/envmdsd | 2 - kubernetes/linux/main.sh | 288 ++--- kubernetes/linux/mdsd.xml | 345 +++++- kubernetes/linux/setup.sh | 54 +- source/plugins/go/src/oms.go | 349 ++++-- source/plugins/go/src/telemetry.go | 4 + source/plugins/go/src/utils.go | 97 +- .../ruby/ApplicationInsightsUtility.rb | 22 +- .../plugins/ruby/CAdvisorMetricsAPIClient.rb | 207 ++-- source/plugins/ruby/DockerApiClient.rb | 2 +- source/plugins/ruby/KubernetesApiClient.rb | 101 +- source/plugins/ruby/MdmMetricsGenerator.rb | 16 +- source/plugins/ruby/filter_cadvisor2mdm.rb | 111 +- .../ruby/filter_cadvisor_health_container.rb | 15 +- .../ruby/filter_cadvisor_health_node.rb | 28 +- source/plugins/ruby/filter_container.rb | 59 - source/plugins/ruby/filter_docker_log.rb | 103 -- .../ruby/filter_health_model_builder.rb | 42 +- source/plugins/ruby/filter_inventory2mdm.rb | 24 +- source/plugins/ruby/filter_telegraf2mdm.rb | 8 +- ...h_container_cpu_memory_record_formatter.rb | 8 +- .../ruby/health/health_monitor_utils.rb | 10 +- source/plugins/ruby/in_cadvisor_perf.rb | 40 +- source/plugins/ruby/in_containerinventory.rb | 28 +- source/plugins/ruby/in_kube_events.rb | 30 +- source/plugins/ruby/in_kube_health.rb | 17 +- source/plugins/ruby/in_kube_nodes.rb | 112 +- source/plugins/ruby/in_kube_podinventory.rb | 117 +- source/plugins/ruby/in_kube_pvinventory.rb | 34 +- .../plugins/ruby/in_kubestate_deployments.rb | 35 +- source/plugins/ruby/in_kubestate_hpa.rb | 31 +- source/plugins/ruby/in_win_cadvisor_perf.rb | 28 +- source/plugins/ruby/out_health_forward.rb | 1074 ++++++++++------- source/plugins/ruby/out_mdm.rb | 79 +- source/plugins/ruby/podinventory_to_mdm.rb | 10 +- source/plugins/utils/oms_common.rb | 1020 ++++++++++++++++ source/plugins/utils/oms_configuration.rb | 381 ++++++ source/plugins/utils/omslog.rb | 50 + 48 files changed, 3825 insertions(+), 2135 deletions(-) delete mode 100644 source/plugins/ruby/filter_container.rb delete mode 100644 source/plugins/ruby/filter_docker_log.rb create mode 100644 source/plugins/utils/oms_common.rb create mode 100644 source/plugins/utils/oms_configuration.rb create mode 100644 source/plugins/utils/omslog.rb diff --git a/build/linux/installer/conf/container.conf b/build/linux/installer/conf/container.conf index 958a85eb6..90f923c8d 100644 --- a/build/linux/installer/conf/container.conf +++ b/build/linux/installer/conf/container.conf @@ -1,141 +1,134 @@ -# Fluentd config file for OMS Docker - container components (non kubeAPI) - -# Forward port 25225 for container logs - - type forward - port 25225 - bind 127.0.0.1 - - -# MDM metrics from telegraf - - @type tcp - tag oms.mdm.container.perf.telegraf.* - bind 0.0.0.0 - port 25228 - format json - - -# Container inventory - - type containerinventory - tag oms.containerinsights.containerinventory - run_interval 60 - log_level debug - - -#cadvisor perf - - type cadvisorperf - tag oms.api.cadvisorperf - run_interval 60 - log_level debug - - - - type filter_cadvisor_health_node - log_level debug - - - - type filter_cadvisor_health_container - log_level debug - - -#custom_metrics_mdm filter plugin - - type filter_cadvisor2mdm - metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes,pvUsedBytes - log_level info - - - - type filter_telegraf2mdm - log_level debug - - - - type out_oms - log_level debug - num_threads 5 - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_containerinventory*.buffer - buffer_queue_full_action drop_oldest_chunk - buffer_chunk_limit 4m - flush_interval 20s - retry_limit 10 - retry_wait 5s - max_retry_wait 5m - - - - type out_oms - log_level debug - num_threads 5 - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_cadvisorperf*.buffer - buffer_queue_full_action drop_oldest_chunk - buffer_chunk_limit 4m - flush_interval 20s - retry_limit 10 - retry_wait 5s - max_retry_wait 5m - - - - - @type health_forward - send_timeout 60s - recover_wait 10s - hard_timeout 60s - heartbeat_type tcp - skip_network_error_at_init true - expire_dns_cache 600s - buffer_queue_full_action drop_oldest_chunk - buffer_type file - buffer_path %STATE_DIR_WS%/out_health_forward*.buffer - buffer_chunk_limit 3m - flush_interval 20s - retry_limit 10 - retry_wait 5s - max_retry_wait 5m - - - host "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_HOST']}" - port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}" - - - - @type file - path %STATE_DIR_WS%/fluent_forward_failed.buffer - - - - - type out_mdm - log_level debug - num_threads 5 - buffer_type file - buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer - buffer_queue_full_action drop_oldest_chunk - buffer_chunk_limit 4m - flush_interval 20s - retry_limit 10 - retry_wait 5s - max_retry_wait 5m - retry_mdm_post_wait_minutes 30 - - - - type out_oms - log_level debug - num_threads 5 - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_insightsmetrics*.buffer - buffer_queue_full_action drop_oldest_chunk - buffer_chunk_limit 4m - flush_interval 20s - retry_limit 10 - retry_wait 5s - max_retry_wait 5m - + # Fluentd config file for OMS Docker - container components (non kubeAPI) + + # Forward port 25225 for container logs + # gangams - not used and get ridoff after confirming safe to remove + + @type forward + port 25225 + bind 127.0.0.1 + + + # MDM metrics from telegraf + + @type tcp + tag oms.mdm.container.perf.telegraf.* + bind 0.0.0.0 + port 25228 + format json + + + # Container inventory + + @type containerinventory + tag oneagent.containerInsights.CONTAINER_INVENTORY_BLOB + run_interval 60 + @log_level debug + + + #cadvisor perf + + @type cadvisor_perf + tag oneagent.containerInsights.LINUX_PERF_BLOB + run_interval 60 + @log_level debug + + + + @type cadvisor_health_node + @log_level debug + + + + @type cadvisor_health_container + @log_level debug + + + #custom_metrics_mdm filter plugin + + @type cadvisor2mdm + metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes,pvUsedBytes + @log_level info + + + + @type telegraf2mdm + @log_level debug + + + #containerinventory + + @type forward + @log_level debug + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + keepalive true + + + #cadvisorperf + + @type forward + @log_level debug + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + keepalive true + + + + @type health_forward + send_timeout 60s + recover_wait 10s + hard_timeout 60s + transport tcp + ignore_network_errors_at_startup true + expire_dns_cache 600s + + @type file + overflow_action drop_oldest_chunk + path /var/opt/microsoft/docker-cimprov/state/out_health_forward*.buffer + chunk_limit_size 3m + flush_interval 20s + retry_max_times 10 + retry_max_interval 5m + retry_wait 5s + + + host "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_HOST']}" + port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}" + + + @type file + path /var/opt/microsoft/docker-cimprov/state/fluent_forward_failed.buffer + + + + + @type mdm + @log_level debug + + @type file + path /var/opt/microsoft/docker-cimprov/state/out_mdm_cdvisorperf*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + + retry_mdm_post_wait_minutes 30 + + + #InsightsMetrics + + @type forward + @log_level debug + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + keepalive true + diff --git a/build/linux/installer/conf/kube.conf b/build/linux/installer/conf/kube.conf index fb566c360..1990e8b6f 100644 --- a/build/linux/installer/conf/kube.conf +++ b/build/linux/installer/conf/kube.conf @@ -1,7 +1,6 @@ -# Fluentd config file for OMS Docker - cluster components (kubeAPI) #fluent forward plugin - type forward + @type forward port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}" bind 0.0.0.0 chunk_size_limit 4m @@ -9,262 +8,226 @@ #Kubernetes pod inventory - type kubepodinventory - tag oms.containerinsights.KubePodInventory + @type kube_podinventory + tag oneagent.containerInsights.KUBE_POD_INVENTORY_BLOB run_interval 60 - log_level debug + @log_level debug #Kubernetes Persistent Volume inventory - type kubepvinventory - tag oms.containerinsights.KubePVInventory + @type kube_pvinventory + tag oneagent.containerInsights.KUBE_PV_INVENTORY_BLOB run_interval 60 - log_level debug + @log_level debug #Kubernetes events - type kubeevents - tag oms.containerinsights.KubeEvents + @type kube_events + tag oneagent.containerInsights.KUBE_EVENTS_BLOB run_interval 60 - log_level debug - + @log_level debug + #Kubernetes Nodes - type kubenodeinventory - tag oms.containerinsights.KubeNodeInventory + @type kube_nodes + tag oneagent.containerInsights.KUBE_NODE_INVENTORY_BLOB run_interval 60 - log_level debug + @log_level debug #Kubernetes health - type kubehealth + @type kube_health tag kubehealth.ReplicaSet run_interval 60 - log_level debug + @log_level debug #cadvisor perf- Windows nodes - type wincadvisorperf - tag oms.api.wincadvisorperf + @type win_cadvisor_perf + tag oneagent.containerInsights.LINUX_PERF_BLOB run_interval 60 - log_level debug + @log_level debug #Kubernetes object state - deployments - - type kubestatedeployments - tag oms.containerinsights.KubeStateDeployments - run_interval 60 - log_level debug - + + @type kubestate_deployments + tag oneagent.containerInsights.INSIGHTS_METRICS_BLOB + run_interval 60 + @log_level debug + - #Kubernetes object state - HPA - - type kubestatehpa - tag oms.containerinsights.KubeStateHpa - run_interval 60 - log_level debug - + #Kubernetes object state - HPA + + @type kubestate_hpa + tag oneagent.containerInsights.INSIGHTS_METRICS_BLOB + run_interval 60 + @log_level debug + - type filter_inventory2mdm - log_level info + @type inventory2mdm + @log_level info #custom_metrics_mdm filter plugin for perf data from windows nodes - type filter_cadvisor2mdm + @type cadvisor2mdm metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,pvUsedBytes - log_level info + @log_level info #health model aggregation filter - type filter_health_model_builder + @type health_model_builder - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 4m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 5s - max_retry_wait 5m + #kubepodinventory + + @type forward + @log_level debug + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + keepalive true - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 4m - buffer_type file - buffer_path %STATE_DIR_WS%/state/out_oms_kubepv*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 5s - max_retry_wait 5m + #kubepvinventory + + @type forward + @log_level debug + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + keepalive true + + + #InsightsMetrics + #kubestate + + @type forward + @log_level debug + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + keepalive true - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 4m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 5s - max_retry_wait 5m + #kubeevents + + @type forward + @log_level debug + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + keepalive true - - - type out_oms - log_level debug - num_threads 2 - buffer_chunk_limit 4m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_kubeservices*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 5s - max_retry_wait 5m - - - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 4m - buffer_type file - buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 5s - max_retry_wait 5m + + #kubeservices + + @type forward + @log_level debug + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + keepalive true + + + #kubenodeinventory + + @type forward + @log_level debug + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + keepalive true - - type out_oms - log_level debug - num_threads 3 - buffer_chunk_limit 4m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer - buffer_queue_limit 20 - flush_interval 20s - retry_limit 10 - retry_wait 5s - max_retry_wait 5m + #containernodeinventory + + @type forward + @log_level debug + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + keepalive true - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 4m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 5s - max_retry_wait 5m + #containerinventory for windows containers + + @type forward + @log_level debug + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + keepalive true + + + #perf + + @type forward + @log_level debug + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + keepalive true - type out_mdm - log_level debug - num_threads 5 - buffer_chunk_limit 4m - buffer_type file - buffer_path %STATE_DIR_WS%/out_mdm_*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 5s - max_retry_wait 5m + @type mdm + @log_level debug + + @type file + path /var/opt/microsoft/docker-cimprov/state/out_mdm_*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + retry_mdm_post_wait_minutes 30 - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 4m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_api_wincadvisorperf*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 5s - max_retry_wait 5m - - - type out_mdm - log_level debug - num_threads 5 - buffer_chunk_limit 4m - buffer_type file - buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 5s - max_retry_wait 5m + @type mdm + @log_level debug + + @type file + path /var/opt/microsoft/docker-cimprov/state/out_mdm_cdvisorperf*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + retry_mdm_post_wait_minutes 30 - - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 4m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_kubehealth*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 5s - max_retry_wait 5m + + #kubehealth + + @type forward + @log_level debug + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + keepalive true - - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 4m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_insightsmetrics*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 5s - max_retry_wait 5m - \ No newline at end of file diff --git a/build/linux/installer/conf/out_oms.conf b/build/linux/installer/conf/out_oms.conf index 74ba3195e..2f286e947 100644 --- a/build/linux/installer/conf/out_oms.conf +++ b/build/linux/installer/conf/out_oms.conf @@ -1,4 +1,3 @@ -omsadmin_conf_path=/etc/opt/microsoft/omsagent/conf/omsadmin.conf omsproxy_secret_path=/etc/omsagent-secret/PROXY adx_cluster_uri_path=/etc/config/settings/adx/ADXCLUSTERURI adx_client_id_path=/etc/config/settings/adx/ADXCLIENTID diff --git a/build/linux/installer/datafiles/base_container.data b/build/linux/installer/datafiles/base_container.data index df8fbc3da..788e7bbcf 100644 --- a/build/linux/installer/datafiles/base_container.data +++ b/build/linux/installer/datafiles/base_container.data @@ -18,89 +18,8 @@ MAINTAINER: 'Microsoft Corporation' /etc/opt/microsoft/docker-cimprov/conf/installinfo.txt; build/linux/installer/conf/installinfo.txt; 644; root; root; conffile -/opt/microsoft/omsagent/plugin/filter_docker_log.rb; source/plugins/ruby/filter_docker_log.rb; 644; root; root -/opt/microsoft/omsagent/plugin/filter_container.rb; source/plugins/ruby/filter_container.rb; 644; root; root - -/opt/microsoft/omsagent/plugin/in_kube_podinventory.rb; source/plugins/ruby/in_kube_podinventory.rb; 644; root; root -/opt/microsoft/omsagent/plugin/in_kube_pvinventory.rb; source/plugins/ruby/in_kube_pvinventory.rb; 644; root; root -/opt/microsoft/omsagent/plugin/in_kube_events.rb; source/plugins/ruby/in_kube_events.rb; 644; root; root -/opt/microsoft/omsagent/plugin/KubernetesApiClient.rb; source/plugins/ruby/KubernetesApiClient.rb; 644; root; root - /etc/opt/microsoft/docker-cimprov/container.conf; build/linux/installer/conf/container.conf; 644; root; root -/opt/microsoft/omsagent/plugin/CAdvisorMetricsAPIClient.rb; source/plugins/ruby/CAdvisorMetricsAPIClient.rb; 644; root; root -/opt/microsoft/omsagent/plugin/in_cadvisor_perf.rb; source/plugins/ruby/in_cadvisor_perf.rb; 644; root; root -/opt/microsoft/omsagent/plugin/in_win_cadvisor_perf.rb; source/plugins/ruby/in_win_cadvisor_perf.rb; 644; root; root -/opt/microsoft/omsagent/plugin/in_kube_nodes.rb; source/plugins/ruby/in_kube_nodes.rb; 644; root; root -/opt/microsoft/omsagent/plugin/in_kubestate_deployments.rb; source/plugins/ruby/in_kubestate_deployments.rb; 644; root; root -/opt/microsoft/omsagent/plugin/in_kubestate_hpa.rb; source/plugins/ruby/in_kubestate_hpa.rb; 644; root; root -/opt/microsoft/omsagent/plugin/filter_inventory2mdm.rb; source/plugins/ruby/filter_inventory2mdm.rb; 644; root; root -/opt/microsoft/omsagent/plugin/podinventory_to_mdm.rb; source/plugins/ruby/podinventory_to_mdm.rb; 644; root; root -/opt/microsoft/omsagent/plugin/kubelet_utils.rb; source/plugins/ruby/kubelet_utils.rb; 644; root; root -/opt/microsoft/omsagent/plugin/CustomMetricsUtils.rb; source/plugins/ruby/CustomMetricsUtils.rb; 644; root; root -/opt/microsoft/omsagent/plugin/constants.rb; source/plugins/ruby/constants.rb; 644; root; root -/opt/microsoft/omsagent/plugin/MdmAlertTemplates.rb; source/plugins/ruby/MdmAlertTemplates.rb; 644; root; root -/opt/microsoft/omsagent/plugin/MdmMetricsGenerator.rb; source/plugins/ruby/MdmMetricsGenerator.rb; 644; root; root - - -/opt/microsoft/omsagent/plugin/ApplicationInsightsUtility.rb; source/plugins/ruby/ApplicationInsightsUtility.rb; 644; root; root -/opt/microsoft/omsagent/plugin/ContainerInventoryState.rb; source/plugins/ruby/ContainerInventoryState.rb; 644; root; root -/opt/microsoft/omsagent/plugin/DockerApiClient.rb; source/plugins/ruby/DockerApiClient.rb; 644; root; root -/opt/microsoft/omsagent/plugin/DockerApiRestHelper.rb; source/plugins/ruby/DockerApiRestHelper.rb; 644; root; root -/opt/microsoft/omsagent/plugin/in_containerinventory.rb; source/plugins/ruby/in_containerinventory.rb; 644; root; root -/opt/microsoft/omsagent/plugin/kubernetes_container_inventory.rb; source/plugins/ruby/kubernetes_container_inventory.rb; 644; root; root -/opt/microsoft/omsagent/plugin/proxy_utils.rb; source/plugins/ruby/proxy_utils.rb; 644; root; root - -/opt/microsoft/omsagent/plugin/arc_k8s_cluster_identity.rb; source/plugins/ruby/arc_k8s_cluster_identity.rb; 644; root; root -/opt/microsoft/omsagent/plugin/out_mdm.rb; source/plugins/ruby/out_mdm.rb; 644; root; root -/opt/microsoft/omsagent/plugin/filter_cadvisor2mdm.rb; source/plugins/ruby/filter_cadvisor2mdm.rb; 644; root; root -/opt/microsoft/omsagent/plugin/filter_telegraf2mdm.rb; source/plugins/ruby/filter_telegraf2mdm.rb; 644; root; root - -/opt/microsoft/omsagent/plugin/lib/application_insights/version.rb; source/plugins/ruby/lib/application_insights/version.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/rack/track_request.rb; source/plugins/ruby/lib/application_insights/rack/track_request.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/unhandled_exception.rb; source/plugins/ruby/lib/application_insights/unhandled_exception.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/telemetry_client.rb; source/plugins/ruby/lib/application_insights/telemetry_client.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/queue_base.rb; source/plugins/ruby/lib/application_insights/channel/queue_base.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/asynchronous_queue.rb; source/plugins/ruby/lib/application_insights/channel/asynchronous_queue.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/synchronous_sender.rb; source/plugins/ruby/lib/application_insights/channel/synchronous_sender.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/data_point_type.rb; source/plugins/ruby/lib/application_insights/channel/contracts/data_point_type.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/data_point.rb; source/plugins/ruby/lib/application_insights/channel/contracts/data_point.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/stack_frame.rb; source/plugins/ruby/lib/application_insights/channel/contracts/stack_frame.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/request_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/request_data.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/session.rb; source/plugins/ruby/lib/application_insights/channel/contracts/session.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/page_view_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/page_view_data.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/remote_dependency_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/remote_dependency_data.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/exception_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/exception_data.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/location.rb; source/plugins/ruby/lib/application_insights/channel/contracts/location.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/operation.rb; source/plugins/ruby/lib/application_insights/channel/contracts/operation.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/data.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/event_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/event_data.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/metric_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/metric_data.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/device.rb; source/plugins/ruby/lib/application_insights/channel/contracts/device.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/message_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/message_data.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/dependency_source_type.rb; source/plugins/ruby/lib/application_insights/channel/contracts/dependency_source_type.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/user.rb; source/plugins/ruby/lib/application_insights/channel/contracts/user.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/severity_level.rb; source/plugins/ruby/lib/application_insights/channel/contracts/severity_level.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/application.rb; source/plugins/ruby/lib/application_insights/channel/contracts/application.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/dependency_kind.rb; source/plugins/ruby/lib/application_insights/channel/contracts/dependency_kind.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/cloud.rb; source/plugins/ruby/lib/application_insights/channel/contracts/cloud.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/envelope.rb; source/plugins/ruby/lib/application_insights/channel/contracts/envelope.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/json_serializable.rb; source/plugins/ruby/lib/application_insights/channel/contracts/json_serializable.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/domain.rb; source/plugins/ruby/lib/application_insights/channel/contracts/domain.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/base.rb; source/plugins/ruby/lib/application_insights/channel/contracts/base.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/reopenings.rb; source/plugins/ruby/lib/application_insights/channel/contracts/reopenings.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/page_view_perf_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/page_view_perf_data.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/internal.rb; source/plugins/ruby/lib/application_insights/channel/contracts/internal.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/availability_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/availability_data.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/exception_details.rb; source/plugins/ruby/lib/application_insights/channel/contracts/exception_details.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/synchronous_queue.rb; source/plugins/ruby/lib/application_insights/channel/synchronous_queue.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/sender_base.rb; source/plugins/ruby/lib/application_insights/channel/sender_base.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/telemetry_context.rb; source/plugins/ruby/lib/application_insights/channel/telemetry_context.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/asynchronous_sender.rb; source/plugins/ruby/lib/application_insights/channel/asynchronous_sender.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/telemetry_channel.rb; source/plugins/ruby/lib/application_insights/channel/telemetry_channel.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/event.rb; source/plugins/ruby/lib/application_insights/channel/event.rb; 644; root; root -/opt/microsoft/omsagent/plugin/lib/application_insights.rb; source/plugins/ruby/lib/application_insights.rb; 644; root; root - /opt/tomlrb.rb; source/toml-parser/tomlrb.rb; 644; root; root /opt/tomlrb/generated_parser.rb; source/toml-parser/tomlrb/generated_parser.rb; 644; root; root /opt/tomlrb/handler.rb; source/toml-parser/tomlrb/handler.rb; 644; root; root @@ -126,6 +45,7 @@ MAINTAINER: 'Microsoft Corporation' /opt/tomlparser-mdm-metrics-config.rb; build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb; 755; root; root /opt/tomlparser-metric-collection-config.rb; build/linux/installer/scripts/tomlparser-metric-collection-config.rb; 755; root; root + /opt/tomlparser-agent-config.rb; build/linux/installer/scripts/tomlparser-agent-config.rb; 755; root; root /opt/tomlparser.rb; build/common/installer/scripts/tomlparser.rb; 755; root; root /opt/td-agent-bit-conf-customizer.rb; build/common/installer/scripts/td-agent-bit-conf-customizer.rb; 755; root; root @@ -134,43 +54,128 @@ MAINTAINER: 'Microsoft Corporation' /opt/tomlparser-osm-config.rb; build/linux/installer/scripts/tomlparser-osm-config.rb; 755; root; root -/opt/microsoft/omsagent/plugin/filter_cadvisor_health_container.rb; source/plugins/ruby/filter_cadvisor_health_container.rb; 644; root; root -/opt/microsoft/omsagent/plugin/filter_cadvisor_health_node.rb; source/plugins/ruby/filter_cadvisor_health_node.rb; 644; root; root -/opt/microsoft/omsagent/plugin/filter_health_model_builder.rb; source/plugins/ruby/filter_health_model_builder.rb; 644; root; root -/opt/microsoft/omsagent/plugin/in_kube_health.rb; source/plugins/ruby/in_kube_health.rb; 644; root; root -/opt/microsoft/omsagent/plugin/out_health_forward.rb; source/plugins/ruby/out_health_forward.rb; 644; root; root /etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json; build/linux/installer/conf/healthmonitorconfig.json; 644; root; root /etc/opt/microsoft/docker-cimprov/health/health_model_definition.json; build/linux/installer/conf/health_model_definition.json; 644; root; root -/opt/microsoft/omsagent/plugin/health/aggregate_monitor.rb; source/plugins/ruby/health/aggregate_monitor.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/agg_monitor_id_labels.rb; source/plugins/ruby/health/agg_monitor_id_labels.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/aggregate_monitor_state_finalizer.rb; source/plugins/ruby/health/aggregate_monitor_state_finalizer.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/cluster_health_state.rb; source/plugins/ruby/health/cluster_health_state.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_container_cpu_memory_aggregator.rb; source/plugins/ruby/health/health_container_cpu_memory_aggregator.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_container_cpu_memory_record_formatter.rb; source/plugins/ruby/health/health_container_cpu_memory_record_formatter.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_hierarchy_builder.rb; source/plugins/ruby/health/health_hierarchy_builder.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_kubernetes_resources.rb; source/plugins/ruby/health/health_kubernetes_resources.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_kube_api_down_handler.rb; source/plugins/ruby/health/health_kube_api_down_handler.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_missing_signal_generator.rb; source/plugins/ruby/health/health_missing_signal_generator.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_model_buffer.rb; source/plugins/ruby/health/health_model_buffer.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_model_builder.rb; source/plugins/ruby/health/health_model_builder.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_model_constants.rb; source/plugins/ruby/health/health_model_constants.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/parent_monitor_provider.rb; source/plugins/ruby/health/parent_monitor_provider.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_model_definition_parser.rb; source/plugins/ruby/health/health_model_definition_parser.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_monitor_helpers.rb; source/plugins/ruby/health/health_monitor_helpers.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_monitor_optimizer.rb; source/plugins/ruby/health/health_monitor_optimizer.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_monitor_provider.rb; source/plugins/ruby/health/health_monitor_provider.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_monitor_record.rb; source/plugins/ruby/health/health_monitor_record.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_monitor_state.rb; source/plugins/ruby/health/health_monitor_state.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_monitor_telemetry.rb; source/plugins/ruby/health/health_monitor_telemetry.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_monitor_utils.rb; source/plugins/ruby/health/health_monitor_utils.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_signal_reducer.rb; source/plugins/ruby/health/health_signal_reducer.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/monitor_factory.rb; source/plugins/ruby/health/monitor_factory.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/monitor_set.rb; source/plugins/ruby/health/monitor_set.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/unit_monitor.rb; source/plugins/ruby/health/unit_monitor.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/version.rb; source/plugins/ruby/lib/application_insights/version.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/rack/track_request.rb; source/plugins/ruby/lib/application_insights/rack/track_request.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/unhandled_exception.rb; source/plugins/ruby/lib/application_insights/unhandled_exception.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/telemetry_client.rb; source/plugins/ruby/lib/application_insights/telemetry_client.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/queue_base.rb; source/plugins/ruby/lib/application_insights/channel/queue_base.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/asynchronous_queue.rb; source/plugins/ruby/lib/application_insights/channel/asynchronous_queue.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/synchronous_sender.rb; source/plugins/ruby/lib/application_insights/channel/synchronous_sender.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/data_point_type.rb; source/plugins/ruby/lib/application_insights/channel/contracts/data_point_type.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/data_point.rb; source/plugins/ruby/lib/application_insights/channel/contracts/data_point.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/stack_frame.rb; source/plugins/ruby/lib/application_insights/channel/contracts/stack_frame.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/request_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/request_data.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/session.rb; source/plugins/ruby/lib/application_insights/channel/contracts/session.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/page_view_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/page_view_data.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/remote_dependency_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/remote_dependency_data.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/exception_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/exception_data.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/location.rb; source/plugins/ruby/lib/application_insights/channel/contracts/location.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/operation.rb; source/plugins/ruby/lib/application_insights/channel/contracts/operation.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/data.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/event_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/event_data.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/metric_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/metric_data.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/device.rb; source/plugins/ruby/lib/application_insights/channel/contracts/device.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/message_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/message_data.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/dependency_source_type.rb; source/plugins/ruby/lib/application_insights/channel/contracts/dependency_source_type.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/user.rb; source/plugins/ruby/lib/application_insights/channel/contracts/user.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/severity_level.rb; source/plugins/ruby/lib/application_insights/channel/contracts/severity_level.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/application.rb; source/plugins/ruby/lib/application_insights/channel/contracts/application.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/dependency_kind.rb; source/plugins/ruby/lib/application_insights/channel/contracts/dependency_kind.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/cloud.rb; source/plugins/ruby/lib/application_insights/channel/contracts/cloud.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/envelope.rb; source/plugins/ruby/lib/application_insights/channel/contracts/envelope.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/json_serializable.rb; source/plugins/ruby/lib/application_insights/channel/contracts/json_serializable.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/domain.rb; source/plugins/ruby/lib/application_insights/channel/contracts/domain.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/base.rb; source/plugins/ruby/lib/application_insights/channel/contracts/base.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/reopenings.rb; source/plugins/ruby/lib/application_insights/channel/contracts/reopenings.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/page_view_perf_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/page_view_perf_data.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/internal.rb; source/plugins/ruby/lib/application_insights/channel/contracts/internal.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/availability_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/availability_data.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/contracts/exception_details.rb; source/plugins/ruby/lib/application_insights/channel/contracts/exception_details.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/synchronous_queue.rb; source/plugins/ruby/lib/application_insights/channel/synchronous_queue.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/sender_base.rb; source/plugins/ruby/lib/application_insights/channel/sender_base.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/telemetry_context.rb; source/plugins/ruby/lib/application_insights/channel/telemetry_context.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/asynchronous_sender.rb; source/plugins/ruby/lib/application_insights/channel/asynchronous_sender.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/telemetry_channel.rb; source/plugins/ruby/lib/application_insights/channel/telemetry_channel.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights/channel/event.rb; source/plugins/ruby/lib/application_insights/channel/event.rb; 644; root; root +/etc/fluent/plugin/lib/application_insights.rb; source/plugins/ruby/lib/application_insights.rb; 644; root; root + +/etc/fluent/plugin/health/aggregate_monitor.rb; source/plugins/ruby/health/aggregate_monitor.rb; 644; root; root +/etc/fluent/plugin/health/agg_monitor_id_labels.rb; source/plugins/ruby/health/agg_monitor_id_labels.rb; 644; root; root +/etc/fluent/plugin/health/aggregate_monitor_state_finalizer.rb; source/plugins/ruby/health/aggregate_monitor_state_finalizer.rb; 644; root; root +/etc/fluent/plugin/health/cluster_health_state.rb; source/plugins/ruby/health/cluster_health_state.rb; 644; root; root +/etc/fluent/plugin/health/health_container_cpu_memory_aggregator.rb; source/plugins/ruby/health/health_container_cpu_memory_aggregator.rb; 644; root; root +/etc/fluent/plugin/health/health_container_cpu_memory_record_formatter.rb; source/plugins/ruby/health/health_container_cpu_memory_record_formatter.rb; 644; root; root +/etc/fluent/plugin/health/health_hierarchy_builder.rb; source/plugins/ruby/health/health_hierarchy_builder.rb; 644; root; root +/etc/fluent/plugin/health/health_kubernetes_resources.rb; source/plugins/ruby/health/health_kubernetes_resources.rb; 644; root; root +/etc/fluent/plugin/health/health_kube_api_down_handler.rb; source/plugins/ruby/health/health_kube_api_down_handler.rb; 644; root; root +/etc/fluent/plugin/health/health_missing_signal_generator.rb; source/plugins/ruby/health/health_missing_signal_generator.rb; 644; root; root +/etc/fluent/plugin/health/health_model_buffer.rb; source/plugins/ruby/health/health_model_buffer.rb; 644; root; root +/etc/fluent/plugin/health/health_model_builder.rb; source/plugins/ruby/health/health_model_builder.rb; 644; root; root +/etc/fluent/plugin/health/health_model_constants.rb; source/plugins/ruby/health/health_model_constants.rb; 644; root; root +/etc/fluent/plugin/health/parent_monitor_provider.rb; source/plugins/ruby/health/parent_monitor_provider.rb; 644; root; root +/etc/fluent/plugin/health/health_model_definition_parser.rb; source/plugins/ruby/health/health_model_definition_parser.rb; 644; root; root +/etc/fluent/plugin/health/health_monitor_helpers.rb; source/plugins/ruby/health/health_monitor_helpers.rb; 644; root; root +/etc/fluent/plugin/health/health_monitor_optimizer.rb; source/plugins/ruby/health/health_monitor_optimizer.rb; 644; root; root +/etc/fluent/plugin/health/health_monitor_provider.rb; source/plugins/ruby/health/health_monitor_provider.rb; 644; root; root +/etc/fluent/plugin/health/health_monitor_record.rb; source/plugins/ruby/health/health_monitor_record.rb; 644; root; root +/etc/fluent/plugin/health/health_monitor_state.rb; source/plugins/ruby/health/health_monitor_state.rb; 644; root; root +/etc/fluent/plugin/health/health_monitor_telemetry.rb; source/plugins/ruby/health/health_monitor_telemetry.rb; 644; root; root +/etc/fluent/plugin/health/health_monitor_utils.rb; source/plugins/ruby/health/health_monitor_utils.rb; 644; root; root +/etc/fluent/plugin/health/health_signal_reducer.rb; source/plugins/ruby/health/health_signal_reducer.rb; 644; root; root +/etc/fluent/plugin/health/monitor_factory.rb; source/plugins/ruby/health/monitor_factory.rb; 644; root; root +/etc/fluent/plugin/health/monitor_set.rb; source/plugins/ruby/health/monitor_set.rb; 644; root; root +/etc/fluent/plugin/health/unit_monitor.rb; source/plugins/ruby/health/unit_monitor.rb; 644; root; root + +/etc/fluent/plugin/ApplicationInsightsUtility.rb; source/plugins/ruby/ApplicationInsightsUtility.rb; 644; root; root +/etc/fluent/plugin/arc_k8s_cluster_identity.rb; source/plugins/ruby/arc_k8s_cluster_identity.rb; 644; root; root +/etc/fluent/plugin/CAdvisorMetricsAPIClient.rb; source/plugins/ruby/CAdvisorMetricsAPIClient.rb; 644; root; root +/etc/fluent/plugin/constants.rb; source/plugins/ruby/constants.rb; 644; root; root +/etc/fluent/plugin/ContainerInventoryState.rb; source/plugins/ruby/ContainerInventoryState.rb; 644; root; root +/etc/fluent/plugin/CustomMetricsUtils.rb; source/plugins/ruby/CustomMetricsUtils.rb; 644; root; root +/etc/fluent/plugin/DockerApiClient.rb; source/plugins/ruby/DockerApiClient.rb; 644; root; root +/etc/fluent/plugin/DockerApiRestHelper.rb; source/plugins/ruby/DockerApiRestHelper.rb; 644; root; root +/etc/fluent/plugin/kubelet_utils.rb; source/plugins/ruby/kubelet_utils.rb; 644; root; root +/etc/fluent/plugin/proxy_utils.rb; source/plugins/ruby/proxy_utils.rb; 644; root; root +/etc/fluent/plugin/kubernetes_container_inventory.rb; source/plugins/ruby/kubernetes_container_inventory.rb; 644; root; root +/etc/fluent/plugin/podinventory_to_mdm.rb; source/plugins/ruby/podinventory_to_mdm.rb; 644; root; root +/etc/fluent/plugin/MdmMetricsGenerator.rb; source/plugins/ruby/MdmMetricsGenerator.rb; 644; root; root +/etc/fluent/plugin/MdmAlertTemplates.rb; source/plugins/ruby/MdmAlertTemplates.rb; 644; root; root + +/etc/fluent/plugin/omslog.rb; source/plugins/utils/omslog.rb; 644; root; root +/etc/fluent/plugin/oms_common.rb; source/plugins/utils/oms_common.rb; 644; root; root +/etc/fluent/plugin/oms_configuration.rb; source/plugins/utils/oms_configuration.rb; 644; root; root + +/etc/fluent/kube.conf; build/linux/installer/conf/kube.conf; 644; root; root +/etc/fluent/container.conf; build/linux/installer/conf/container.conf; 644; root; root + +/etc/fluent/plugin/in_cadvisor_perf.rb; source/plugins/ruby/in_cadvisor_perf.rb; 644; root; root +/etc/fluent/plugin/in_win_cadvisor_perf.rb; source/plugins/ruby/in_win_cadvisor_perf.rb; 644; root; root +/etc/fluent/plugin/in_containerinventory.rb; source/plugins/ruby/in_containerinventory.rb; 644; root; root +/etc/fluent/plugin/in_kube_nodes.rb; source/plugins/ruby/in_kube_nodes.rb; 644; root; root +/etc/fluent/plugin/in_kube_podinventory.rb; source/plugins/ruby/in_kube_podinventory.rb; 644; root; root +/etc/fluent/plugin/KubernetesApiClient.rb; source/plugins/ruby/KubernetesApiClient.rb; 644; root; root +/etc/fluent/plugin/in_kube_events.rb; source/plugins/ruby/in_kube_events.rb; 644; root; root +/etc/fluent/plugin/in_kube_health.rb; source/plugins/ruby/in_kube_health.rb; 644; root; root +/etc/fluent/plugin/in_kube_pvinventory.rb; source/plugins/ruby/in_kube_pvinventory.rb; 644; root; root +/etc/fluent/plugin/in_kubestate_deployments.rb; source/plugins/ruby/in_kubestate_deployments.rb; 644; root; root +/etc/fluent/plugin/in_kubestate_hpa.rb; source/plugins/ruby/in_kubestate_hpa.rb; 644; root; root + +/etc/fluent/plugin/filter_cadvisor_health_container.rb; source/plugins/ruby/filter_cadvisor_health_container.rb; 644; root; root +/etc/fluent/plugin/filter_cadvisor_health_node.rb; source/plugins/ruby/filter_cadvisor_health_node.rb; 644; root; root +/etc/fluent/plugin/filter_cadvisor2mdm.rb; source/plugins/ruby/filter_cadvisor2mdm.rb; 644; root; root +/etc/fluent/plugin/filter_health_model_builder.rb; source/plugins/ruby/filter_health_model_builder.rb; 644; root; root +/etc/fluent/plugin/filter_inventory2mdm.rb; source/plugins/ruby/filter_inventory2mdm.rb; 644; root; root +/etc/fluent/plugin/filter_telegraf2mdm.rb; source/plugins/ruby/filter_telegraf2mdm.rb; 644; root; root + +/etc/fluent/plugin/out_health_forward.rb; source/plugins/ruby/out_health_forward.rb; 644; root; root +/etc/fluent/plugin/out_mdm.rb; source/plugins/ruby/out_mdm.rb; 644; root; root + + %Links -/opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root %Directories /etc; 755; root; root; sysdir @@ -179,27 +184,18 @@ MAINTAINER: 'Microsoft Corporation' /var; 755; root; root; sysdir /var/opt; 755; root; root; sysdir +/opt/fluent; 755; root; root; sysdir + /etc/opt/microsoft; 755; root; root; sysdir /etc/opt/microsoft/docker-cimprov; 755; root; root /etc/opt/microsoft/docker-cimprov/conf; 755; root; root /etc/opt/microsoft/docker-cimprov/health; 755; root; root -/etc/opt/omi; 755; root; root; sysdir -/etc/opt/omi/conf; 755; root; root; sysdir -/etc/opt/omi/conf/omiregister; 755; root; root; sysdir -/etc/opt/omi/conf/omiregister/root-cimv2; 755; root; root - /opt/microsoft; 755; root; root; sysdir /opt/microsoft/docker-cimprov; 755; root; root /opt/microsoft/docker-cimprov/bin; 755; root; root /opt/microsoft/docker-cimprov/lib; 755; root; root -/opt/microsoft/omsagent; 755; root; root; sysdir -/opt/microsoft/omsagent/plugin; 755; root; root; sysdir -/opt/microsoft/omsagent/plugin/health; 755; root; root; sysdir - -/opt/omi; 755; root; root; sysdir -/opt/omi/lib; 755; root; root; sysdir /var/opt/microsoft; 755; root; root; sysdir /var/opt/microsoft/docker-cimprov; 755; root; root @@ -213,11 +209,14 @@ MAINTAINER: 'Microsoft Corporation' /opt/td-agent-bit/bin; 755; root; root;sysdir /etc/telegraf; 755; root; root;sysdir -/opt/microsoft/omsagent/plugin/lib; 755; root; root; sysdir -/opt/microsoft/omsagent/plugin/lib/application_insights; 755; root; root; sysdir -/opt/microsoft/omsagent/plugin/lib/application_insights/channel; 755; root; root; sysdir -/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts; 755; root; root; sysdir -/opt/microsoft/omsagent/plugin/lib/application_insights/rack; 755; root; root; sysdir +/etc/fluent; 755; root; root; sysdir +/etc/fluent/plugin; 755; root; root; sysdir +/etc/fluent/plugin/health; 755; root; root; sysdir +/etc/fluent/plugin/lib; 755; root; root; sysdir +/etc/fluent/plugin/lib/application_insights; 755; root; root; sysdir +/etc/fluent/plugin/lib/application_insights/channel; 755; root; root; sysdir +/etc/fluent/plugin/lib/application_insights/channel/contracts; 755; root; root; sysdir +/etc/fluent/plugin/lib/application_insights/rack; 755; root; root; sysdir /opt/tomlrb; 755; root; root; sysdir @@ -230,64 +229,61 @@ WriteInstallInfo() { } WriteInstallInfo -#Make omsagent owner for ContainerInventory directory. This is needed for ruby plugin to have access -chown omsagent:omsagent /var/opt/microsoft/docker-cimprov/state/ContainerInventory # Get the state file in place with proper permissions touch /var/opt/microsoft/docker-cimprov/state/LastEventQueryTime.txt chmod 644 /var/opt/microsoft/docker-cimprov/state/LastEventQueryTime.txt -chown omsagent:omsagent /var/opt/microsoft/docker-cimprov/state/LastEventQueryTime.txt touch /var/opt/microsoft/docker-cimprov/state/KubeEventQueryState.yaml chmod 644 /var/opt/microsoft/docker-cimprov/state/KubeEventQueryState.yaml -chown omsagent:omsagent /var/opt/microsoft/docker-cimprov/state/KubeEventQueryState.yaml touch /var/opt/microsoft/docker-cimprov/state/KubeLogQueryState.yaml chmod 644 /var/opt/microsoft/docker-cimprov/state/KubeLogQueryState.yaml -chown omsagent:omsagent /var/opt/microsoft/docker-cimprov/state/KubeLogQueryState.yaml + touch /var/opt/microsoft/docker-cimprov/log/kubernetes_client_log.txt chmod 666 /var/opt/microsoft/docker-cimprov/log/kubernetes_client_log.txt -chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/kubernetes_client_log.txt + touch /var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt chmod 666 /var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt -chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt + touch /var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log chmod 666 /var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log -chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log + touch /var/opt/microsoft/docker-cimprov/log/filter_telegraf2mdm.log chmod 666 /var/opt/microsoft/docker-cimprov/log/filter_telegraf2mdm.log -chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/filter_telegraf2mdm.log + touch /var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log chmod 666 /var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log -chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log + touch /var/opt/microsoft/docker-cimprov/log/mdm_metrics_generator.log chmod 666 /var/opt/microsoft/docker-cimprov/log/mdm_metrics_generator.log -chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/mdm_metrics_generator.log + touch /var/opt/microsoft/docker-cimprov/log/health_monitors.log chmod 666 /var/opt/microsoft/docker-cimprov/log/health_monitors.log -chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/health_monitors.log + touch /var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log chmod 666 /var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log -chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log + touch /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log chmod 666 /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log -chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log + touch /var/opt/microsoft/docker-cimprov/log/arc_k8s_cluster_identity.log chmod 666 /var/opt/microsoft/docker-cimprov/log/arc_k8s_cluster_identity.log -chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/arc_k8s_cluster_identity.log -mv /etc/opt/microsoft/docker-cimprov/container.conf /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf -chown omsagent:omsagent /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf + +touch /var/opt/microsoft/docker-cimprov/log/fluentd.log +chmod 666 /var/opt/microsoft/docker-cimprov/log/fluentd.log + %Postuninstall_10 # If we're an upgrade, skip all of this cleanup @@ -299,7 +295,6 @@ if ${{PERFORMING_UPGRADE_NOT}}; then rm -f /var/opt/microsoft/docker-cimprov/state/KubeLogQueryState.yaml rm -f /var/opt/microsoft/docker-cimprov/log/kubernetes_client_log.txt rm -f /var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt - rm -f /etc/opt/microsoft/omsagent/conf/omsagent.d/container.conf rmdir /var/opt/microsoft/docker-cimprov/log 2> /dev/null rmdir /var/opt/microsoft/docker-cimprov/state/ContainerInventory 2> /dev/null rmdir /var/opt/microsoft/docker-cimprov/state/ImageInventory 2> /dev/null @@ -308,14 +303,7 @@ if ${{PERFORMING_UPGRADE_NOT}}; then rmdir /etc/opt/microsoft/docker-cimprov/conf 2> /dev/null rmdir /etc/opt/microsoft/docker-cimprov 2> /dev/null rmdir /etc/opt/microsoft 2> /dev/null - rmdir /etc/opt 2> /dev/null - #Remove sudoers file edit - if [ -s /etc/sudoers.d/omsagent ] - then - chmod +w /etc/sudoers.d/omsagent - sed -i '/docker\-provider/,+1 d' /etc/sudoers.d/omsagent - chmod 440 /etc/sudoers.d/omsagent - fi + rmdir /etc/opt 2> /dev/null fi %Preinstall_0 diff --git a/build/linux/installer/datafiles/linux.data b/build/linux/installer/datafiles/linux.data index 604394d80..48af63a73 100644 --- a/build/linux/installer/datafiles/linux.data +++ b/build/linux/installer/datafiles/linux.data @@ -1,16 +1,11 @@ %Variables PF: 'Linux' -OMI_SERVICE: '/opt/omi/bin/service_control' -OMS_SERVICE: '/opt/microsoft/omsagent/bin/service_control' + %Postinstall_2000 -# Reload the OMI server -${{OMI_SERVICE}} reload -${{OMS_SERVICE}} reload -if ${{PERFORMING_UPGRADE_NOT}}; then - /opt/omi/bin/omicli ei root/cimv2 Container_HostInventory -fi + + %Postuninstall_1000 # Calling sequence for RPM pre/post scripts, during upgrade, is as follows: @@ -35,10 +30,5 @@ if ${{PERFORMING_UPGRADE_NOT}}; then fi %Postuninstall_1100 -# If we're called for upgrade, don't do anything -if ${{PERFORMING_UPGRADE_NOT}}; then - # Reload the OMI server - ${{OMI_SERVICE}} reload - ${{OMS_SERVICE}} reload -fi + diff --git a/build/linux/installer/datafiles/linux_dpkg.data b/build/linux/installer/datafiles/linux_dpkg.data index a7821642d..bdf9f2354 100644 --- a/build/linux/installer/datafiles/linux_dpkg.data +++ b/build/linux/installer/datafiles/linux_dpkg.data @@ -3,5 +3,5 @@ PERFORMING_UPGRADE_NOT: '[ "$1" != "upgrade" ]' PACKAGE_TYPE: 'DPKG' %Dependencies -omi (>= 1.0.8.6) + diff --git a/build/linux/installer/datafiles/linux_rpm.data b/build/linux/installer/datafiles/linux_rpm.data index 1b9ba009b..d537b444d 100644 --- a/build/linux/installer/datafiles/linux_rpm.data +++ b/build/linux/installer/datafiles/linux_rpm.data @@ -3,5 +3,5 @@ PERFORMING_UPGRADE_NOT: '[ "$1" -ne 1 ]' PACKAGE_TYPE: 'RPM' %Dependencies -omi >= 1.0.8-6 + diff --git a/build/linux/installer/scripts/livenessprobe.sh b/build/linux/installer/scripts/livenessprobe.sh index 198b4e87f..0f6dd0e85 100644 --- a/build/linux/installer/scripts/livenessprobe.sh +++ b/build/linux/installer/scripts/livenessprobe.sh @@ -1,19 +1,21 @@ #!/bin/bash -#test to exit non zero value if omsagent is not running -(ps -ef | grep omsagent- | grep -v "grep") +#test to exit non zero value if mdsd is not running +(ps -ef | grep "mdsd" | grep -v "grep") if [ $? -ne 0 ] then - echo " omsagent is not running" > /dev/termination-log - exit 1 + echo "mdsd is not running" > /dev/termination-log + exit 1 fi -#optionally test to exit non zero value if oneagent is not running -if [ -e "/opt/AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2" ]; then - (ps -ef | grep "mdsd" | grep -v "grep") + +#optionally test to exit non zero value if fluentd is not running +#fluentd not used in sidecar container +if [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ]; then + (ps -ef | grep "fluentd" | grep -v "grep") if [ $? -ne 0 ] then - echo "oneagent is not running" > /dev/termination-log + echo "fluentd is not running" > /dev/termination-log exit 1 fi fi diff --git a/build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb b/build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb index 5ce5d79d2..dcf179bf2 100644 --- a/build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb +++ b/build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb @@ -3,7 +3,7 @@ require_relative "tomlrb" require_relative "ConfigParseErrorLogger" -require_relative "microsoft/omsagent/plugin/constants" +require_relative "/etc/fluent/plugin/constants" @configMapMountPath = "/etc/config/settings/alertable-metrics-configuration-settings" @configVersion = "" diff --git a/build/linux/installer/scripts/tomlparser-metric-collection-config.rb b/build/linux/installer/scripts/tomlparser-metric-collection-config.rb index 40d87b7f1..cee41312b 100644 --- a/build/linux/installer/scripts/tomlparser-metric-collection-config.rb +++ b/build/linux/installer/scripts/tomlparser-metric-collection-config.rb @@ -3,7 +3,7 @@ require_relative "tomlrb" require_relative "ConfigParseErrorLogger" -require_relative "microsoft/omsagent/plugin/constants" +require_relative "/etc/fluent/plugin/constants" @configMapMountPath = "/etc/config/settings/metric_collection_settings" @configVersion = "" diff --git a/kubernetes/linux/envmdsd b/kubernetes/linux/envmdsd index 3f834bfb8..5a939fc3e 100644 --- a/kubernetes/linux/envmdsd +++ b/kubernetes/linux/envmdsd @@ -2,8 +2,6 @@ export MDSD_ROLE_PREFIX="/var/run/mdsd/default" #export MDSD_OPTIONS="-d -A -r ${MDSD_ROLE_PREFIX}" export MDSD_LOG="/var/opt/microsoft/linuxmonagent/log" export MDSD_SPOOL_DIRECTORY="/var/opt/microsoft/linuxmonagent" -export OMS_CERT_PATH="/etc/opt/microsoft/omsagent/certs/oms.crt" -export OMS_CERT_KEY_PATH="/etc/opt/microsoft/omsagent/certs/oms.key" #export CIWORKSPACE_id="" #export CIWORKSPACE_key="" export MDSD_OPTIONS="-A -c /etc/mdsd.d/mdsd.xml -r ${MDSD_ROLE_PREFIX} -S ${MDSD_SPOOL_DIRECTORY}/eh -e ${MDSD_LOG}/mdsd.err -w ${MDSD_LOG}/mdsd.warn -o ${MDSD_LOG}/mdsd.info -q ${MDSD_LOG}/mdsd.qos" diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh index f03318ad1..ac3974a59 100644 --- a/kubernetes/linux/main.sh +++ b/kubernetes/linux/main.sh @@ -1,40 +1,8 @@ #!/bin/bash -if [ -e "/etc/config/kube.conf" ]; then - cat /etc/config/kube.conf > /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf -elif [ "${CONTAINER_TYPE}" == "PrometheusSidecar" ]; then - echo "setting omsagent conf file for prometheus sidecar" - cat /etc/opt/microsoft/docker-cimprov/prometheus-side-car.conf > /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf - # omsadmin.sh replaces %MONITOR_AGENT_PORT% and %SYSLOG_PORT% in the monitor.conf and syslog.conf with default ports 25324 and 25224. - # Since we are running 2 omsagents in the same pod, we need to use a different port for the sidecar, - # else we will see the Address already in use - bind(2) for 0.0.0.0:253(2)24 error. - # Look into omsadmin.sh scripts's configure_monitor_agent()/configure_syslog() and find_available_port() methods for more info. - sed -i -e 's/port %MONITOR_AGENT_PORT%/port 25326/g' /etc/opt/microsoft/omsagent/sysconf/omsagent.d/monitor.conf - sed -i -e 's/port %SYSLOG_PORT%/port 25226/g' /etc/opt/microsoft/omsagent/sysconf/omsagent.d/syslog.conf -else - echo "setting omsagent conf file for daemonset" - sed -i -e 's/bind 127.0.0.1/bind 0.0.0.0/g' /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf -fi -sed -i -e 's/bind 127.0.0.1/bind 0.0.0.0/g' /etc/opt/microsoft/omsagent/sysconf/omsagent.d/syslog.conf -sed -i -e 's/^exit 101$/exit 0/g' /usr/sbin/policy-rc.d - -#Using the get_hostname for hostname instead of the host field in syslog messages -sed -i.bak "s/record\[\"Host\"\] = hostname/record\[\"Host\"\] = OMS::Common.get_hostname/" /opt/microsoft/omsagent/plugin/filter_syslog.rb - #using /var/opt/microsoft/docker-cimprov/state instead of /var/opt/microsoft/omsagent/state since the latter gets deleted during onboarding mkdir -p /var/opt/microsoft/docker-cimprov/state -#if [ ! -e "/etc/config/kube.conf" ]; then - # add permissions for omsagent user to access docker.sock - #sudo setfacl -m user:omsagent:rw /var/run/host/docker.sock -#fi - -# add permissions for omsagent user to access azure.json. -sudo setfacl -m user:omsagent:r /etc/kubernetes/host/azure.json - -# add permission for omsagent user to log folder. We also need 'x', else log rotation is failing. TODO: Investigate why. -sudo setfacl -m user:omsagent:rwx /var/opt/microsoft/docker-cimprov/log - #Run inotify as a daemon to track changes to the mounted configmap. inotifywait /etc/config/settings --daemon --recursive --outfile "/opt/inotifyoutput.txt" --event create,delete --format '%e : %T' --timefmt '+%s' @@ -52,6 +20,11 @@ else echo "export customResourceId=$AKS_RESOURCE_ID" >> ~/.bashrc source ~/.bashrc echo "customResourceId:$customResourceId" + + export customRegion=$AKS_REGION + echo "export customRegion=$AKS_REGION" >> ~/.bashrc + source ~/.bashrc + echo "customRegion:$customRegion" fi #set agent config schema version @@ -103,7 +76,6 @@ if [[ ( ( ! -e "/etc/config/kube.conf" ) && ( "${CONTAINER_TYPE}" == "Prometheus fi export PROXY_ENDPOINT="" - # Check for internet connectivity or workspace deletion if [ -e "/etc/omsagent-secret/WSID" ]; then workspaceId=$(cat /etc/omsagent-secret/WSID) @@ -184,6 +156,7 @@ else echo "LA Onboarding:Workspace Id not mounted, skipping the telemetry check" fi + # Set environment variable for if public cloud by checking the workspace domain. if [ -z $domain ]; then ClOUD_ENVIRONMENT="unknown" @@ -195,6 +168,12 @@ fi export CLOUD_ENVIRONMENT=$CLOUD_ENVIRONMENT echo "export CLOUD_ENVIRONMENT=$CLOUD_ENVIRONMENT" >> ~/.bashrc +#consisten naming conventions with the windows +export DOMAIN=$domain +echo "export DOMAIN=$DOMAIN" >> ~/.bashrc +export WSID=$workspaceId +echo "export WSID=$WSID" >> ~/.bashrc + # Check if the instrumentation key needs to be fetched from a storage account (as in airgapped clouds) if [ ${#APPLICATIONINSIGHTS_AUTH_URL} -ge 1 ]; then # (check if APPLICATIONINSIGHTS_AUTH_URL has length >=1) for BACKOFF in {1..4}; do @@ -229,7 +208,7 @@ source ~/.bashrc if [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ]; then #Parse the configmap to set the right environment variables. - /opt/microsoft/omsagent/ruby/bin/ruby tomlparser.rb + /usr/bin/ruby2.6 tomlparser.rb cat config_env_var | while read line; do echo $line >> ~/.bashrc @@ -240,7 +219,7 @@ fi #Parse the configmap to set the right environment variables for agent config. #Note > tomlparser-agent-config.rb has to be parsed first before td-agent-bit-conf-customizer.rb for fbit agent settings if [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ]; then - /opt/microsoft/omsagent/ruby/bin/ruby tomlparser-agent-config.rb + /usr/bin/ruby2.6 tomlparser-agent-config.rb cat agent_config_env_var | while read line; do #echo $line @@ -249,7 +228,7 @@ if [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ]; then source agent_config_env_var #Parse the configmap to set the right environment variables for network policy manager (npm) integration. - /opt/microsoft/omsagent/ruby/bin/ruby tomlparser-npm-config.rb + /usr/bin/ruby2.6 tomlparser-npm-config.rb cat integration_npm_config_env_var | while read line; do #echo $line @@ -260,11 +239,11 @@ fi #Replace the placeholders in td-agent-bit.conf file for fluentbit with custom/default values in daemonset if [ ! -e "/etc/config/kube.conf" ] && [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ]; then - /opt/microsoft/omsagent/ruby/bin/ruby td-agent-bit-conf-customizer.rb + /usr/bin/ruby2.6 td-agent-bit-conf-customizer.rb fi #Parse the prometheus configmap to create a file with new custom settings. -/opt/microsoft/omsagent/ruby/bin/ruby tomlparser-prom-customconfig.rb +/usr/bin/ruby2.6 tomlparser-prom-customconfig.rb #Setting default environment variables to be used in any case of failure in the above steps if [ ! -e "/etc/config/kube.conf" ]; then @@ -297,7 +276,7 @@ fi #Parse the configmap to set the right environment variables for MDM metrics configuration for Alerting. if [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ]; then - /opt/microsoft/omsagent/ruby/bin/ruby tomlparser-mdm-metrics-config.rb + /usr/bin/ruby2.6 tomlparser-mdm-metrics-config.rb cat config_mdm_metrics_env_var | while read line; do echo $line >> ~/.bashrc @@ -305,7 +284,7 @@ if [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ]; then source config_mdm_metrics_env_var #Parse the configmap to set the right environment variables for metric collection settings - /opt/microsoft/omsagent/ruby/bin/ruby tomlparser-metric-collection-config.rb + /usr/bin/ruby2.6 tomlparser-metric-collection-config.rb cat config_metric_collection_env_var | while read line; do echo $line >> ~/.bashrc @@ -316,7 +295,7 @@ fi # OSM scraping to be done in replicaset if sidecar car scraping is disabled and always do the scraping from the sidecar (It will always be either one of the two) if [[ ( ( ! -e "/etc/config/kube.conf" ) && ( "${CONTAINER_TYPE}" == "PrometheusSidecar" ) ) || ( ( -e "/etc/config/kube.conf" ) && ( "${SIDECAR_SCRAPING_ENABLED}" == "false" ) ) ]]; then - /opt/microsoft/omsagent/ruby/bin/ruby tomlparser-osm-config.rb + /usr/bin/ruby2.6 tomlparser-osm-config.rb if [ -e "integration_osm_config_env_var" ]; then cat integration_osm_config_env_var | while read line; do @@ -394,26 +373,11 @@ export KUBELET_RUNTIME_OPERATIONS_ERRORS_METRIC="kubelet_docker_operations_error if [ "$CONTAINER_RUNTIME" != "docker" ]; then # these metrics are avialble only on k8s versions <1.18 and will get deprecated from 1.18 export KUBELET_RUNTIME_OPERATIONS_METRIC="kubelet_runtime_operations" - export KUBELET_RUNTIME_OPERATIONS_ERRORS_METRIC="kubelet_runtime_operations_errors" -else - #if container run time is docker then add omsagent user to local docker group to get access to docker.sock - # docker.sock only use for the telemetry to get the docker version - DOCKER_SOCKET=/var/run/host/docker.sock - DOCKER_GROUP=docker - REGULAR_USER=omsagent - if [ -S ${DOCKER_SOCKET} ]; then - echo "getting gid for docker.sock" - DOCKER_GID=$(stat -c '%g' ${DOCKER_SOCKET}) - echo "creating a local docker group" - groupadd -for -g ${DOCKER_GID} ${DOCKER_GROUP} - echo "adding omsagent user to local docker group" - usermod -aG ${DOCKER_GROUP} ${REGULAR_USER} - fi + export KUBELET_RUNTIME_OPERATIONS_ERRORS_METRIC="kubelet_runtime_operations_errors" fi echo "set caps for ruby process to read container env from proc" -sudo setcap cap_sys_ptrace,cap_dac_read_search+ep /opt/microsoft/omsagent/ruby/bin/ruby - +sudo setcap cap_sys_ptrace,cap_dac_read_search+ep /usr/bin/ruby2.6 echo "export KUBELET_RUNTIME_OPERATIONS_METRIC="$KUBELET_RUNTIME_OPERATIONS_METRIC >> ~/.bashrc echo "export KUBELET_RUNTIME_OPERATIONS_ERRORS_METRIC="$KUBELET_RUNTIME_OPERATIONS_ERRORS_METRIC >> ~/.bashrc @@ -423,171 +387,70 @@ echo $NODE_NAME > /var/opt/microsoft/docker-cimprov/state/containerhostname #check if file was written successfully. cat /var/opt/microsoft/docker-cimprov/state/containerhostname - -#Commenting it for test. We do this in the installer now -#Setup sudo permission for containerlogtailfilereader -#chmod +w /etc/sudoers.d/omsagent -#echo "#run containerlogtailfilereader.rb for docker-provider" >> /etc/sudoers.d/omsagent -#echo "omsagent ALL=(ALL) NOPASSWD: /opt/microsoft/omsagent/ruby/bin/ruby /opt/microsoft/omsagent/plugin/containerlogtailfilereader.rb *" >> /etc/sudoers.d/omsagent -#chmod 440 /etc/sudoers.d/omsagent - -#Disable dsc -#/opt/microsoft/omsconfig/Scripts/OMS_MetaConfigHelper.py --disable -rm -f /etc/opt/microsoft/omsagent/conf/omsagent.d/omsconfig.consistencyinvoker.conf - -CIWORKSPACE_id="" -CIWORKSPACE_key="" - -if [ -z $INT ]; then - if [ -a /etc/omsagent-secret/PROXY ]; then - if [ -a /etc/omsagent-secret/DOMAIN ]; then - /opt/microsoft/omsagent/bin/omsadmin.sh -w `cat /etc/omsagent-secret/WSID` -s `cat /etc/omsagent-secret/KEY` -d `cat /etc/omsagent-secret/DOMAIN` -p `cat /etc/omsagent-secret/PROXY` - else - /opt/microsoft/omsagent/bin/omsadmin.sh -w `cat /etc/omsagent-secret/WSID` -s `cat /etc/omsagent-secret/KEY` -p `cat /etc/omsagent-secret/PROXY` - fi - CIWORKSPACE_id="$(cat /etc/omsagent-secret/WSID)" - CIWORKSPACE_key="$(cat /etc/omsagent-secret/KEY)" - elif [ -a /etc/omsagent-secret/DOMAIN ]; then - /opt/microsoft/omsagent/bin/omsadmin.sh -w `cat /etc/omsagent-secret/WSID` -s `cat /etc/omsagent-secret/KEY` -d `cat /etc/omsagent-secret/DOMAIN` - CIWORKSPACE_id="$(cat /etc/omsagent-secret/WSID)" - CIWORKSPACE_key="$(cat /etc/omsagent-secret/KEY)" - elif [ -a /etc/omsagent-secret/WSID ]; then - /opt/microsoft/omsagent/bin/omsadmin.sh -w `cat /etc/omsagent-secret/WSID` -s `cat /etc/omsagent-secret/KEY` - CIWORKSPACE_id="$(cat /etc/omsagent-secret/WSID)" - CIWORKSPACE_key="$(cat /etc/omsagent-secret/KEY)" - elif [ -a /run/secrets/DOMAIN ]; then - /opt/microsoft/omsagent/bin/omsadmin.sh -w `cat /run/secrets/WSID` -s `cat /run/secrets/KEY` -d `cat /run/secrets/DOMAIN` - CIWORKSPACE_id="$(cat /run/secrets/WSID)" - CIWORKSPACE_key="$(cat /run/secrets/KEY)" - elif [ -a /run/secrets/WSID ]; then - /opt/microsoft/omsagent/bin/omsadmin.sh -w `cat /run/secrets/WSID` -s `cat /run/secrets/KEY` - CIWORKSPACE_id="$(cat /run/secrets/WSID)" - CIWORKSPACE_key="$(cat /run/secrets/KEY)" - elif [ -z $DOMAIN ]; then - /opt/microsoft/omsagent/bin/omsadmin.sh -w $WSID -s $KEY - CIWORKSPACE_id="$(cat /etc/omsagent-secret/WSID)" - CIWORKSPACE_key="$(cat /etc/omsagent-secret/KEY)" - else - /opt/microsoft/omsagent/bin/omsadmin.sh -w $WSID -s $KEY -d $DOMAIN - CIWORKSPACE_id="$WSID" - CIWORKSPACE_key="$KEY" - fi -else -#To onboard to INT workspace - workspace-id (WSID-not base64 encoded), workspace-key (KEY-not base64 encoded), Domain(DOMAIN-int2.microsoftatlanta-int.com) -#need to be added to omsagent.yaml. - echo WORKSPACE_ID=$WSID > /etc/omsagent-onboard.conf - echo SHARED_KEY=$KEY >> /etc/omsagent-onboard.conf - echo URL_TLD=$DOMAIN >> /etc/omsagent-onboard.conf - /opt/microsoft/omsagent/bin/omsadmin.sh - CIWORKSPACE_id="$WSID" - CIWORKSPACE_key="$KEY" -fi - #start cron daemon for logrotate service cron start +#get docker-provider versions -#check if agent onboarded successfully -/opt/microsoft/omsagent/bin/omsadmin.sh -l - -#get omsagent and docker-provider versions -dpkg -l | grep omsagent | awk '{print $2 " " $3}' dpkg -l | grep docker-cimprov | awk '{print $2 " " $3}' DOCKER_CIMPROV_VERSION=$(dpkg -l | grep docker-cimprov | awk '{print $3}') echo "DOCKER_CIMPROV_VERSION=$DOCKER_CIMPROV_VERSION" export DOCKER_CIMPROV_VERSION=$DOCKER_CIMPROV_VERSION echo "export DOCKER_CIMPROV_VERSION=$DOCKER_CIMPROV_VERSION" >> ~/.bashrc +echo "*** activating oneagent in legacy auth mode ***" +CIWORKSPACE_id="$(cat /etc/omsagent-secret/WSID)" +#use the file path as its secure than env +CIWORKSPACE_keyFile="/etc/omsagent-secret/KEY" +cat /etc/mdsd.d/envmdsd | while read line; do + echo $line >> ~/.bashrc +done +source /etc/mdsd.d/envmdsd +echo "setting mdsd workspaceid & key for workspace:$CIWORKSPACE_id" +export CIWORKSPACE_id=$CIWORKSPACE_id +echo "export CIWORKSPACE_id=$CIWORKSPACE_id" >> ~/.bashrc +export CIWORKSPACE_keyFile=$CIWORKSPACE_keyFile +echo "export CIWORKSPACE_keyFile=$CIWORKSPACE_keyFile" >> ~/.bashrc +export OMS_TLD=$domain +echo "export OMS_TLD=$OMS_TLD" >> ~/.bashrc +export MDSD_FLUENT_SOCKET_PORT="29230" +echo "export MDSD_FLUENT_SOCKET_PORT=$MDSD_FLUENT_SOCKET_PORT" >> ~/.bashrc + +#skip imds lookup since not used in legacy auth path +export SKIP_IMDS_LOOKUP_FOR_LEGACY_AUTH="true" +echo "export SKIP_IMDS_LOOKUP_FOR_LEGACY_AUTH=$SKIP_IMDS_LOOKUP_FOR_LEGACY_AUTH" >> ~/.bashrc -#region check to auto-activate oneagent, to route container logs, -#Intent is to activate one agent routing for all managed clusters with region in the regionllist, unless overridden by configmap -# AZMON_CONTAINER_LOGS_ROUTE will have route (if any) specified in the config map -# AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE will have the final route that we compute & set, based on our region list logic -echo "************start oneagent log routing checks************" -# by default, use configmap route for safer side -AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE=$AZMON_CONTAINER_LOGS_ROUTE - -#trim region list -oneagentregions="$(echo $AZMON_CONTAINERLOGS_ONEAGENT_REGIONS | xargs)" -#lowercase region list -typeset -l oneagentregions=$oneagentregions -echo "oneagent regions: $oneagentregions" -#trim current region -currentregion="$(echo $AKS_REGION | xargs)" -#lowercase current region -typeset -l currentregion=$currentregion -echo "current region: $currentregion" - -#initilze isoneagentregion as false -isoneagentregion=false - -#set isoneagentregion as true if matching region is found -if [ ! -z $oneagentregions ] && [ ! -z $currentregion ]; then - for rgn in $(echo $oneagentregions | sed "s/,/ /g"); do - if [ "$rgn" == "$currentregion" ]; then - isoneagentregion=true - echo "current region is in oneagent regions..." - break - fi - done -else - echo "current region is not in oneagent regions..." -fi +source ~/.bashrc -if [ "$isoneagentregion" = true ]; then - #if configmap has a routing for logs, but current region is in the oneagent region list, take the configmap route - if [ ! -z $AZMON_CONTAINER_LOGS_ROUTE ]; then - AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE=$AZMON_CONTAINER_LOGS_ROUTE - echo "oneagent region is true for current region:$currentregion and config map logs route is not empty. so using config map logs route as effective route:$AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE" - else #there is no configmap route, so route thru oneagent - AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE="v2" - echo "oneagent region is true for current region:$currentregion and config map logs route is empty. so using oneagent as effective route:$AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE" - fi -else - echo "oneagent region is false for current region:$currentregion" +dpkg -l | grep mdsd | awk '{print $2 " " $3}' + +if [ "${CONTAINER_TYPE}" == "PrometheusSidecar" ]; then + echo "starting mdsd with mdsd-port=26130, fluentport=26230 and influxport=26330 in legacy auth mode in sidecar container..." + #use tenant name to avoid unix socket conflict and different ports for port conflict + #roleprefix to use container specific mdsd socket + export TENANT_NAME="${CONTAINER_TYPE}" + echo "export TENANT_NAME=$TENANT_NAME" >> ~/.bashrc + export MDSD_ROLE_PREFIX=/var/run/mdsd-${CONTAINER_TYPE}/default + echo "export MDSD_ROLE_PREFIX=$MDSD_ROLE_PREFIX" >> ~/.bashrc + source ~/.bashrc + mkdir /var/run/mdsd-${CONTAINER_TYPE} + # add -T 0xFFFF for full traces + mdsd -r ${MDSD_ROLE_PREFIX} -p 26130 -f 26230 -i 26330 -e ${MDSD_LOG}/mdsd.err -w ${MDSD_LOG}/mdsd.warn -o ${MDSD_LOG}/mdsd.info -q ${MDSD_LOG}/mdsd.qos & +else + echo "starting mdsd in legacy auth mode in main container..." + # add -T 0xFFFF for full traces + mdsd -e ${MDSD_LOG}/mdsd.err -w ${MDSD_LOG}/mdsd.warn -o ${MDSD_LOG}/mdsd.info -q ${MDSD_LOG}/mdsd.qos & fi - -#start oneagent -if [ ! -e "/etc/config/kube.conf" ] && [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ]; then - if [ ! -z $AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE ]; then - echo "container logs configmap route is $AZMON_CONTAINER_LOGS_ROUTE" - echo "container logs effective route is $AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE" - #trim - containerlogsroute="$(echo $AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE | xargs)" - # convert to lowercase - typeset -l containerlogsroute=$containerlogsroute - - echo "setting AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE as :$containerlogsroute" - export AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE=$containerlogsroute - echo "export AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE=$containerlogsroute" >> ~/.bashrc - source ~/.bashrc - - if [ "$containerlogsroute" == "v2" ]; then - echo "activating oneagent..." - echo "configuring mdsd..." - cat /etc/mdsd.d/envmdsd | while read line; do - echo $line >> ~/.bashrc - done - source /etc/mdsd.d/envmdsd - - echo "setting mdsd workspaceid & key for workspace:$CIWORKSPACE_id" - export CIWORKSPACE_id=$CIWORKSPACE_id - echo "export CIWORKSPACE_id=$CIWORKSPACE_id" >> ~/.bashrc - export CIWORKSPACE_key=$CIWORKSPACE_key - echo "export CIWORKSPACE_key=$CIWORKSPACE_key" >> ~/.bashrc - - source ~/.bashrc - - dpkg -l | grep mdsd | awk '{print $2 " " $3}' - - echo "starting mdsd ..." - mdsd -e ${MDSD_LOG}/mdsd.err -w ${MDSD_LOG}/mdsd.warn -o ${MDSD_LOG}/mdsd.info -q ${MDSD_LOG}/mdsd.qos & - - touch /opt/AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2 - fi - fi -fi -echo "************end oneagent log routing checks************" +# no dependency on fluentd for prometheus side car container +if [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ]; then + if [ ! -e "/etc/config/kube.conf" ]; then + echo "*** starting fluentd v1 in daemonset" + fluentd -c /etc/fluent/container.conf -o /var/opt/microsoft/docker-cimprov/log/fluentd.log & + else + echo "*** starting fluentd v1 in replicaset" + fluentd -c /etc/fluent/kube.conf -o /var/opt/microsoft/docker-cimprov/log/fluentd.log & + fi +fi #If config parsing was successful, a copy of the conf file with replaced custom settings file is created if [ ! -e "/etc/config/kube.conf" ]; then @@ -697,12 +560,9 @@ dpkg -l | grep td-agent-bit | awk '{print $2 " " $3}' #dpkg -l | grep telegraf | awk '{print $2 " " $3}' - - # Write messages from the liveness probe to stdout (so telemetry picks it up) touch /dev/write-to-traces - echo "stopping rsyslog..." service rsyslog stop @@ -710,7 +570,7 @@ echo "getting rsyslog status..." service rsyslog status shutdown() { - /opt/microsoft/omsagent/bin/service_control stop + pkill -f mdsd } trap "shutdown" SIGTERM diff --git a/kubernetes/linux/mdsd.xml b/kubernetes/linux/mdsd.xml index 49d329791..d99549d3f 100644 --- a/kubernetes/linux/mdsd.xml +++ b/kubernetes/linux/mdsd.xml @@ -47,6 +47,149 @@ Each column has a name, an augmented JSON source type, and a target MDS type. --> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -68,14 +211,33 @@ + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - ]]> @@ -143,11 +360,95 @@ - - ]]> + + + + + ]]> + + + + + + + + ]]> + + + + + + + + ]]> + + + + + + + + ]]> + + + + + + + + ]]> + + + + + + + + ]]> + + + + + + + + ]]> + + + + + + + + ]]> + + + + + + + + ]]> + + + + + + + ]]> + + + + + + + ]]> + + + diff --git a/kubernetes/linux/setup.sh b/kubernetes/linux/setup.sh index f065cc165..a50c310a7 100644 --- a/kubernetes/linux/setup.sh +++ b/kubernetes/linux/setup.sh @@ -9,37 +9,17 @@ sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && \ dpkg-reconfigure --frontend=noninteractive locales && \ update-locale LANG=en_US.UTF-8 -wget https://github.com/Microsoft/OMS-Agent-for-Linux/releases/download/OMSAgent_v1.10.0-1/omsagent-1.10.0-1.universal.x64.sh +#install oneagent - Official bits (10/18) +# wget https://github.com/microsoft/Docker-Provider/releases/download/10182020-oneagent/azure-mdsd_1.5.126-build.master.99_x86_64.deb +# use official build which has all the changes for the release -#create file to disable omi service startup script -touch /etc/.omi_disable_service_control +# private mdsd build +wget https://github.com/microsoft/Docker-Provider/raw/gangams/ci-aad-auth-msi/oneagent-dev/azure-mdsd_1.11.0-build.develop.1999_x86_64.deb -chmod 775 $TMPDIR/*.sh - -#Extract omsbundle -$TMPDIR/omsagent-*.universal.x64.sh --extract -mv $TMPDIR/omsbundle* $TMPDIR/omsbundle -#Install omi -/usr/bin/dpkg -i $TMPDIR/omsbundle/110/omi*.deb - -#Install scx -/usr/bin/dpkg -i $TMPDIR/omsbundle/110/scx*.deb -#$TMPDIR/omsbundle/bundles/scx-1.6.*-*.universal.x64.sh --install - -#Install omsagent - -/usr/bin/dpkg -i $TMPDIR/omsbundle/110/omsagent*.deb -#/usr/bin/dpkg -i $TMPDIR/omsbundle/100/omsconfig*.deb - -#install oneagent - Official bits (05/2021) -wget https://github.com/microsoft/Docker-Provider/releases/download/05112021-oneagent/azure-mdsd_1.8.0-build.master.189_x86_64.deb /usr/bin/dpkg -i $TMPDIR/azure-mdsd*.deb cp -f $TMPDIR/mdsd.xml /etc/mdsd.d cp -f $TMPDIR/envmdsd /etc/mdsd.d -#Assign permissions to omsagent user to access docker.sock -sudo apt-get install acl - #download inotify tools for watching configmap changes sudo apt-get update sudo apt-get install inotify-tools -y @@ -49,18 +29,7 @@ sudo apt-get install inotify-tools -y sudo apt-get install jq=1.5+dfsg-2 -y #used to setcaps for ruby process to read /proc/env -echo "installing libcap2-bin" sudo apt-get install libcap2-bin -y -#/$TMPDIR/omsbundle/oss-kits/docker-cimprov-1.0.0-*.x86_64.sh --install -#Use downloaded docker-provider instead of the bundled one - -#download and install telegraf -#wget https://dl.influxdata.com/telegraf/releases/telegraf_1.10.1-1_amd64.deb -#sudo dpkg -i telegraf_1.10.1-1_amd64.deb - -#service telegraf stop - -#wget https://github.com/microsoft/Docker-Provider/releases/download/5.0.0.0/telegraf #1.18 pre-release wget https://dl.influxdata.com/telegraf/releases/telegraf-1.18.0_linux_amd64.tar.gz @@ -79,8 +48,17 @@ sudo echo "deb https://packages.fluentbit.io/ubuntu/xenial xenial main" >> /etc/ sudo apt-get update sudo apt-get install td-agent-bit=1.6.8 -y -rm -rf $TMPDIR/omsbundle -rm -f $TMPDIR/omsagent*.sh +# install ruby2.6 +sudo apt-get install software-properties-common -y +sudo apt-add-repository ppa:brightbox/ruby-ng -y +sudo apt-get update +sudo apt-get install ruby2.6 ruby2.6-dev gcc make -y +# fluentd v1 gem +gem install fluentd -v "1.12.2" --no-document +fluentd --setup ./fluent +gem install gyoku iso8601 --no-doc + + rm -f $TMPDIR/docker-cimprov*.sh rm -f $TMPDIR/azure-mdsd*.deb rm -f $TMPDIR/mdsd.xml diff --git a/source/plugins/go/src/oms.go b/source/plugins/go/src/oms.go index d35acad3d..32eef9a24 100644 --- a/source/plugins/go/src/oms.go +++ b/source/plugins/go/src/oms.go @@ -92,6 +92,8 @@ const kubeMonAgentConfigEventFlushInterval = 60 //Eventsource name in mdsd const MdsdContainerLogSourceName = "ContainerLogSource" const MdsdContainerLogV2SourceName = "ContainerLogV2Source" +const MdsdKubeMonAgentEventsSourceName = "KubeMonAgentEventsSource" +const MdsdInsightsMetricsSourceName = "InsightsMetricsSource" //container logs route (v2=flush to oneagent, adx= flush to adx ingestion, anything else flush to ODS[default]) const ContainerLogsV2Route = "v2" @@ -101,6 +103,10 @@ const ContainerLogsADXRoute = "adx" //container logs schema (v2=ContainerLogsV2 table in LA, anything else ContainerLogs table in LA. This is applicable only if Container logs route is NOT ADX) const ContainerLogV2SchemaVersion = "v2" + +//env variable to container type +const ContainerTypeEnv = "CONTAINER_TYPE" + var ( // PluginConfiguration the plugins configuration PluginConfiguration map[string]string @@ -108,6 +114,10 @@ var ( HTTPClient http.Client // Client for MDSD msgp Unix socket MdsdMsgpUnixSocketClient net.Conn + // Client for MDSD msgp Unix socket for KubeMon Agent events + MdsdKubeMonMsgpUnixSocketClient net.Conn + // Client for MDSD msgp Unix socket for Insights Metrics + MdsdInsightsMetricsMsgpUnixSocketClient net.Conn // Ingestor for ADX ADXIngestor *ingest.Ingestion // OMSEndpoint ingestion endpoint @@ -116,6 +126,8 @@ var ( Computer string // WorkspaceID log analytics workspace id WorkspaceID string + // LogAnalyticsWorkspaceDomain log analytics workspace domain + LogAnalyticsWorkspaceDomain string // ResourceID for resource-centric log analytics data ResourceID string // Resource-centric flag (will be true if we determine if above RseourceID is non-empty - default is false) @@ -143,7 +155,17 @@ var ( // ADX tenantID AdxTenantID string //ADX client secret - AdxClientSecret string + AdxClientSecret string + // container log or container log v2 tag name for oneagent route + MdsdContainerLogTagName string + // kubemonagent events tag name for oneagent route + MdsdKubeMonAgentEventsTagName string + // InsightsMetrics tag name for oneagent route + MdsdInsightsMetricsTagName string + // flag to check if its Windows OS + IsWindows bool + // container type + ContainerType string ) var ( @@ -532,6 +554,7 @@ func flushKubeMonAgentEventRecords() { start := time.Now() var elapsed time.Duration var laKubeMonAgentEventsRecords []laKubeMonAgentEvents + var msgPackEntries []MsgPackEntry telemetryDimensions := make(map[string]string) telemetryDimensions["ConfigErrorEventCount"] = strconv.Itoa(len(ConfigErrorEvent)) @@ -558,7 +581,26 @@ func flushKubeMonAgentEventRecords() { Message: k, Tags: fmt.Sprintf("%s", tagJson), } - laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + var stringMap map[string]string + jsonBytes, err := json.Marshal(&laKubeMonAgentEventsRecord) + if err != nil { + message := fmt.Sprintf("Error while Marshalling laKubeMonAgentEventsRecord to json bytes: %s", err.Error()) + Log(message) + SendException(message) + } else { + err := json.Unmarshal(jsonBytes, &stringMap) + if err != nil { + message := fmt.Sprintf("Error while UnMarhalling json bytes to stringmap: %s", err.Error()) + Log(message) + SendException(message) + } else { + msgPackEntry := MsgPackEntry{ + Record: stringMap, + } + msgPackEntries = append(msgPackEntries, msgPackEntry) + } + } } } @@ -579,7 +621,14 @@ func flushKubeMonAgentEventRecords() { Message: k, Tags: fmt.Sprintf("%s", tagJson), } - laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + var stringMap map[string]string + inrec, _ := json.Marshal(&laKubeMonAgentEventsRecord) + json.Unmarshal(inrec, &stringMap) + msgPackEntry := MsgPackEntry{ + Record: stringMap, + } + msgPackEntries = append(msgPackEntries, msgPackEntry) } } @@ -610,11 +659,63 @@ func flushKubeMonAgentEventRecords() { Message: "No errors", Tags: fmt.Sprintf("%s", tagJson), } - laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + var stringMap map[string]string + jsonBytes, err := json.Marshal(&laKubeMonAgentEventsRecord) + if err != nil { + message := fmt.Sprintf("Error while Marshalling laKubeMonAgentEventsRecord to json bytes: %s", err.Error()) + Log(message) + SendException(message) + } else { + if err := json.Unmarshal(jsonBytes, &stringMap); err != nil { + message := fmt.Sprintf("Error while UnMarshalling json bytes to stringmap: %s", err.Error()) + Log(message) + SendException(message) + } else { + msgPackEntry := MsgPackEntry{ + Record: stringMap, + } + msgPackEntries = append(msgPackEntries, msgPackEntry) + } + } } } - - if len(laKubeMonAgentEventsRecords) > 0 { + if (IsWindows == false && len(msgPackEntries) > 0) { //for linux, mdsd route + Log("Info::mdsd:: using mdsdsource name for KubeMonAgentEvents: %s", MdsdKubeMonAgentEventsTagName) + msgpBytes := convertMsgPackEntriesToMsgpBytes(MdsdKubeMonAgentEventsTagName, msgPackEntries) + if MdsdKubeMonMsgpUnixSocketClient == nil { + Log("Error::mdsd::mdsd connection for KubeMonAgentEvents does not exist. re-connecting ...") + CreateMDSDClientKubeMon(ContainerType) + if MdsdKubeMonMsgpUnixSocketClient == nil { + Log("Error::mdsd::Unable to create mdsd client for KubeMonAgentEvents. Please check error log.") + ContainerLogTelemetryMutex.Lock() + defer ContainerLogTelemetryMutex.Unlock() + KubeMonEventsMDSDClientCreateErrors += 1 + } + } + if MdsdKubeMonMsgpUnixSocketClient != nil { + deadline := 10 * time.Second + MdsdKubeMonMsgpUnixSocketClient.SetWriteDeadline(time.Now().Add(deadline)) //this is based of clock time, so cannot reuse + bts, er := MdsdKubeMonMsgpUnixSocketClient.Write(msgpBytes) + elapsed = time.Since(start) + if er != nil { + message := fmt.Sprintf("Error::mdsd::Failed to write to kubemonagent mdsd %d records after %s. Will retry ... error : %s", len(msgPackEntries), elapsed, er.Error()) + Log(message) + if MdsdKubeMonMsgpUnixSocketClient != nil { + MdsdKubeMonMsgpUnixSocketClient.Close() + MdsdKubeMonMsgpUnixSocketClient = nil + } + SendException(message) + } else { + numRecords := len(msgPackEntries) + Log("FlushKubeMonAgentEventRecords::Info::Successfully flushed %d records that was %d bytes in %s", numRecords, bts, elapsed) + // Send telemetry to AppInsights resource + SendEvent(KubeMonAgentEventsFlushedEvent, telemetryDimensions) + } + } else { + Log("Error::mdsd::Unable to create mdsd client for KubeMonAgentEvents. Please check error log.") + } + } else if len(laKubeMonAgentEventsRecords) > 0 { //for windows, ODS direct kubeMonAgentEventEntry := KubeMonAgentEventBlob{ DataType: KubeMonAgentEventDataType, IPName: IPName, @@ -746,70 +847,145 @@ func PostTelegrafMetricsToLA(telegrafRecords []map[interface{}]interface{}) int message := fmt.Sprintf("PostTelegrafMetricsToLA::Info:derived %v metrics from %v timeseries", len(laMetrics), len(telegrafRecords)) Log(message) } + + if IsWindows == false { //for linux, mdsd route + var msgPackEntries []MsgPackEntry + var i int + start := time.Now() + var elapsed time.Duration + + for i = 0; i < len(laMetrics); i++ { + var interfaceMap map[string]interface{} + stringMap := make(map[string]string) + jsonBytes, err := json.Marshal(*laMetrics[i]) + if err != nil { + message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:when marshalling json %q", err) + Log(message) + SendException(message) + return output.FLB_OK + } else { + if err := json.Unmarshal(jsonBytes, &interfaceMap); err != nil { + message := fmt.Sprintf("Error while UnMarshalling json bytes to interfaceMap: %s", err.Error()) + Log(message) + SendException(message) + return output.FLB_OK + } else { + for key, value := range interfaceMap { + strKey := fmt.Sprintf("%v", key) + strValue := fmt.Sprintf("%v", value) + stringMap[strKey] = strValue + } + msgPackEntry := MsgPackEntry{ + Record: stringMap, + } + msgPackEntries = append(msgPackEntries, msgPackEntry) + } + } + } + if (len(msgPackEntries) > 0) { + msgpBytes := convertMsgPackEntriesToMsgpBytes(MdsdInsightsMetricsTagName, msgPackEntries) + if MdsdInsightsMetricsMsgpUnixSocketClient == nil { + Log("Error::mdsd::mdsd connection does not exist. re-connecting ...") + CreateMDSDClientInsightsMetrics(ContainerType) + if MdsdInsightsMetricsMsgpUnixSocketClient == nil { + Log("Error::mdsd::Unable to create mdsd client for insights metrics. Please check error log.") + ContainerLogTelemetryMutex.Lock() + defer ContainerLogTelemetryMutex.Unlock() + InsightsMetricsMDSDClientCreateErrors += 1 + return output.FLB_RETRY + } + } - var metrics []laTelegrafMetric - var i int + deadline := 10 * time.Second + MdsdInsightsMetricsMsgpUnixSocketClient.SetWriteDeadline(time.Now().Add(deadline)) //this is based of clock time, so cannot reuse + bts, er := MdsdInsightsMetricsMsgpUnixSocketClient.Write(msgpBytes) - for i = 0; i < len(laMetrics); i++ { - metrics = append(metrics, *laMetrics[i]) - } + elapsed = time.Since(start) - laTelegrafMetrics := InsightsMetricsBlob{ - DataType: InsightsMetricsDataType, - IPName: IPName, - DataItems: metrics} + if er != nil { + Log("Error::mdsd::Failed to write to mdsd %d records after %s. Will retry ... error : %s", len(msgPackEntries), elapsed, er.Error()) + if MdsdInsightsMetricsMsgpUnixSocketClient != nil { + MdsdInsightsMetricsMsgpUnixSocketClient.Close() + MdsdInsightsMetricsMsgpUnixSocketClient = nil + } - jsonBytes, err := json.Marshal(laTelegrafMetrics) + ContainerLogTelemetryMutex.Lock() + defer ContainerLogTelemetryMutex.Unlock() + ContainerLogsSendErrorsToMDSDFromFluent += 1 - if err != nil { - message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:when marshalling json %q", err) - Log(message) - SendException(message) - return output.FLB_OK - } + return output.FLB_RETRY + } else { + numTelegrafMetricsRecords := len(msgPackEntries) + Log("Success::mdsd::Successfully flushed %d telegraf metrics records that was %d bytes to mdsd in %s ", numTelegrafMetricsRecords, bts, elapsed) + } + } + + } else { // for windows, ODS direct - //Post metrics data to LA - req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(jsonBytes)) + var metrics []laTelegrafMetric + var i int - //req.URL.Query().Add("api-version","2016-04-01") + for i = 0; i < len(laMetrics); i++ { + metrics = append(metrics, *laMetrics[i]) + } - //set headers - req.Header.Set("x-ms-date", time.Now().Format(time.RFC3339)) - req.Header.Set("User-Agent", userAgent) - reqID := uuid.New().String() - req.Header.Set("X-Request-ID", reqID) + laTelegrafMetrics := InsightsMetricsBlob{ + DataType: InsightsMetricsDataType, + IPName: IPName, + DataItems: metrics} - //expensive to do string len for every request, so use a flag - if ResourceCentric == true { - req.Header.Set("x-ms-AzureResourceId", ResourceID) - } + jsonBytes, err := json.Marshal(laTelegrafMetrics) - start := time.Now() - resp, err := HTTPClient.Do(req) - elapsed := time.Since(start) + if err != nil { + message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:when marshalling json %q", err) + Log(message) + SendException(message) + return output.FLB_OK + } - if err != nil { - message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:(retriable) when sending %v metrics. duration:%v err:%q \n", len(laMetrics), elapsed, err.Error()) - Log(message) - UpdateNumTelegrafMetricsSentTelemetry(0, 1, 0) - return output.FLB_RETRY - } + //Post metrics data to LA + req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(jsonBytes)) + + //req.URL.Query().Add("api-version","2016-04-01") - if resp == nil || resp.StatusCode != 200 { - if resp != nil { - Log("PostTelegrafMetricsToLA::Error:(retriable) RequestID %s Response Status %v Status Code %v", reqID, resp.Status, resp.StatusCode) + //set headers + req.Header.Set("x-ms-date", time.Now().Format(time.RFC3339)) + req.Header.Set("User-Agent", userAgent) + reqID := uuid.New().String() + req.Header.Set("X-Request-ID", reqID) + + //expensive to do string len for every request, so use a flag + if ResourceCentric == true { + req.Header.Set("x-ms-AzureResourceId", ResourceID) } - if resp != nil && resp.StatusCode == 429 { - UpdateNumTelegrafMetricsSentTelemetry(0, 1, 1) + + start := time.Now() + resp, err := HTTPClient.Do(req) + elapsed := time.Since(start) + + if err != nil { + message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:(retriable) when sending %v metrics. duration:%v err:%q \n", len(laMetrics), elapsed, err.Error()) + Log(message) + UpdateNumTelegrafMetricsSentTelemetry(0, 1, 0) + return output.FLB_RETRY + } + + if resp == nil || resp.StatusCode != 200 { + if resp != nil { + Log("PostTelegrafMetricsToLA::Error:(retriable) RequestID %s Response Status %v Status Code %v", reqID, resp.Status, resp.StatusCode) + } + if resp != nil && resp.StatusCode == 429 { + UpdateNumTelegrafMetricsSentTelemetry(0, 1, 1) + } + return output.FLB_RETRY } - return output.FLB_RETRY - } - defer resp.Body.Close() + defer resp.Body.Close() - numMetrics := len(laMetrics) - UpdateNumTelegrafMetricsSentTelemetry(numMetrics, 0, 0) - Log("PostTelegrafMetricsToLA::Info:Successfully flushed %v records in %v", numMetrics, elapsed) + numMetrics := len(laMetrics) + UpdateNumTelegrafMetricsSentTelemetry(numMetrics, 0, 0) + Log("PostTelegrafMetricsToLA::Info:Successfully flushed %v records in %v", numMetrics, elapsed) + } return output.FLB_OK } @@ -986,13 +1162,10 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { numContainerLogRecords := 0 if len(msgPackEntries) > 0 && ContainerLogsRouteV2 == true { - //flush to mdsd - mdsdSourceName := MdsdContainerLogSourceName - if (ContainerLogSchemaV2 == true) { - mdsdSourceName = MdsdContainerLogV2SourceName - } + //flush to mdsd + Log("Info::mdsd:: using mdsdsource name: %s", MdsdContainerLogTagName) fluentForward := MsgPackForward{ - Tag: mdsdSourceName, + Tag: MdsdContainerLogTagName, Entries: msgPackEntries, } @@ -1019,7 +1192,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { if MdsdMsgpUnixSocketClient == nil { Log("Error::mdsd::mdsd connection does not exist. re-connecting ...") - CreateMDSDClient() + CreateMDSDClient(ContainerType) if MdsdMsgpUnixSocketClient == nil { Log("Error::mdsd::Unable to create mdsd client. Please check error log.") @@ -1286,21 +1459,31 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { log.Fatalln(message) } + ContainerType = os.Getenv(ContainerTypeEnv) + Log("Container Type %s", ContainerType) + osType := os.Getenv("OS_TYPE") // Linux if strings.Compare(strings.ToLower(osType), "windows") != 0 { Log("Reading configuration for Linux from %s", pluginConfPath) - omsadminConf, err := ReadConfiguration(pluginConfig["omsadmin_conf_path"]) - if err != nil { - message := fmt.Sprintf("Error Reading omsadmin configuration %s\n", err.Error()) + WorkspaceID = os.Getenv("WSID") + if WorkspaceID == "" { + message := fmt.Sprintf("WorkspaceID shouldnt be empty") Log(message) SendException(message) time.Sleep(30 * time.Second) log.Fatalln(message) } - OMSEndpoint = omsadminConf["OMS_ENDPOINT"] - WorkspaceID = omsadminConf["WORKSPACE_ID"] + LogAnalyticsWorkspaceDomain = os.Getenv("DOMAIN") + if LogAnalyticsWorkspaceDomain == "" { + message := fmt.Sprintf("Workspace DOMAIN shouldnt be empty") + Log(message) + SendException(message) + time.Sleep(30 * time.Second) + log.Fatalln(message) + } + OMSEndpoint = "https://" + WorkspaceID + ".ods." + LogAnalyticsWorkspaceDomain + "/OperationalData.svc/PostJsonDataItems" // Populate Computer field containerHostName, err1 := ioutil.ReadFile(pluginConfig["container_host_file_path"]) if err1 != nil { @@ -1329,6 +1512,7 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { } } else { // windows + IsWindows = true Computer = os.Getenv("HOSTNAME") WorkspaceID = os.Getenv("WSID") logAnalyticsDomain := os.Getenv("DOMAIN") @@ -1412,7 +1596,10 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { PluginConfiguration = pluginConfig - CreateHTTPClient() + if IsWindows == true { + Log("Creating HTTP Client since the OS Platform is Windows") + CreateHTTPClient() + } ContainerLogsRoute := strings.TrimSpace(strings.ToLower(os.Getenv("AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE"))) Log("AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE:%s", ContainerLogsRoute) @@ -1420,11 +1607,7 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { ContainerLogsRouteV2 = false //default is ODS ContainerLogsRouteADX = false //default is LA - if strings.Compare(ContainerLogsRoute, ContainerLogsV2Route) == 0 && strings.Compare(strings.ToLower(osType), "windows") != 0 { - ContainerLogsRouteV2 = true - Log("Routing container logs thru %s route...", ContainerLogsV2Route) - fmt.Fprintf(os.Stdout, "Routing container logs thru %s route... \n", ContainerLogsV2Route) - } else if strings.Compare(ContainerLogsRoute, ContainerLogsADXRoute) == 0 { + if strings.Compare(ContainerLogsRoute, ContainerLogsADXRoute) == 0 { //check if adx clusteruri, clientid & secret are set var err error AdxClusterUri, err = ReadFileContents(PluginConfiguration["adx_cluster_uri_path"]) @@ -1455,14 +1638,24 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { Log("Routing container logs thru %s route...", ContainerLogsADXRoute) fmt.Fprintf(os.Stdout, "Routing container logs thru %s route...\n", ContainerLogsADXRoute) } - } + } else if strings.Compare(strings.ToLower(osType), "windows") != 0 { //for linux, oneagent will be default route + ContainerLogsRouteV2 = true //default is mdsd route + Log("Routing container logs thru %s route...", ContainerLogsV2Route) + fmt.Fprintf(os.Stdout, "Routing container logs thru %s route... \n", ContainerLogsV2Route) + } if ContainerLogsRouteV2 == true { - CreateMDSDClient() + CreateMDSDClient(ContainerType) } else if ContainerLogsRouteADX == true { CreateADXClient() } + if IsWindows == false { + Log("Creating MDSD clients for KubeMonAgentEvents & InsightsMetrics") + CreateMDSDClientKubeMon(ContainerType) + CreateMDSDClientInsightsMetrics(ContainerType) + } + ContainerLogSchemaVersion := strings.TrimSpace(strings.ToLower(os.Getenv("AZMON_CONTAINER_LOG_SCHEMA_VERSION"))) Log("AZMON_CONTAINER_LOG_SCHEMA_VERSION:%s", ContainerLogSchemaVersion) @@ -1491,4 +1684,12 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { Log("Running in replicaset. Disabling container enrichment caching & updates \n") } + if ContainerLogSchemaV2 == true { + MdsdContainerLogTagName = MdsdContainerLogV2SourceName + } else { + MdsdContainerLogTagName = MdsdContainerLogSourceName + } + + MdsdInsightsMetricsTagName = MdsdInsightsMetricsSourceName + MdsdKubeMonAgentEventsTagName = MdsdKubeMonAgentEventsSourceName } \ No newline at end of file diff --git a/source/plugins/go/src/telemetry.go b/source/plugins/go/src/telemetry.go index 461fdea96..36a8e4d10 100644 --- a/source/plugins/go/src/telemetry.go +++ b/source/plugins/go/src/telemetry.go @@ -42,6 +42,10 @@ var ( ContainerLogsSendErrorsToMDSDFromFluent float64 //Tracks the number of mdsd client create errors for containerlogs (uses ContainerLogTelemetryTicker) ContainerLogsMDSDClientCreateErrors float64 + //Tracks the number of mdsd client create errors for insightsmetrics (uses ContainerLogTelemetryTicker) + InsightsMetricsMDSDClientCreateErrors float64 + //Tracks the number of mdsd client create errors for kubemonevents (uses ContainerLogTelemetryTicker) + KubeMonEventsMDSDClientCreateErrors float64 //Tracks the number of write/send errors to ADX for containerlogs (uses ContainerLogTelemetryTicker) ContainerLogsSendErrorsToADXFromFluent float64 //Tracks the number of ADX client create errors for containerlogs (uses ContainerLogTelemetryTicker) diff --git a/source/plugins/go/src/utils.go b/source/plugins/go/src/utils.go index 61d047e52..6873ce847 100644 --- a/source/plugins/go/src/utils.go +++ b/source/plugins/go/src/utils.go @@ -12,11 +12,12 @@ import ( "net/url" "os" "strings" - "time" - + "time" + "github.com/Azure/azure-kusto-go/kusto" "github.com/Azure/azure-kusto-go/kusto/ingest" "github.com/Azure/go-autorest/autorest/azure/auth" + "github.com/tinylib/msgp/msgp" ) // ReadConfiguration reads a property file @@ -93,7 +94,7 @@ func CreateHTTPClient() { HTTPClient = http.Client{ Transport: transport, Timeout: 30 * time.Second, - } + } Log("Successfully created HTTP Client") } @@ -110,7 +111,7 @@ func ToString(s interface{}) string { } //mdsdSocketClient to write msgp messages -func CreateMDSDClient() { +func CreateMDSDClient(containerType string) { if MdsdMsgpUnixSocketClient != nil { MdsdMsgpUnixSocketClient.Close() MdsdMsgpUnixSocketClient = nil @@ -119,17 +120,71 @@ func CreateMDSDClient() { FluentSocketPath:"/var/run/mdsd/default_fluent.socket", WriteTimeout: 5 * time.Second, RequestAck: true}) */ + mdsdfluentSocket := "/var/run/mdsd/default_fluent.socket" + if containerType != "" && strings.Compare(strings.ToLower(containerType), "prometheussidecar") == 0 { + mdsdfluentSocket = fmt.Sprintf("/var/run/mdsd-%s/default_fluent.socket", containerType) + } conn, err := net.DialTimeout("unix", - "/var/run/mdsd/default_fluent.socket", 10*time.Second) + mdsdfluentSocket, 10*time.Second) if err != nil { Log("Error::mdsd::Unable to open MDSD msgp socket connection %s", err.Error()) //log.Fatalf("Unable to open MDSD msgp socket connection %s", err.Error()) } else { - Log("Successfully created MDSD msgp socket connection") + Log("Successfully created MDSD msgp socket connection: %s", mdsdfluentSocket) MdsdMsgpUnixSocketClient = conn } } +//mdsdSocketClient to write msgp messages for KubeMonAgent Events +func CreateMDSDClientKubeMon(containerType string) { + if MdsdKubeMonMsgpUnixSocketClient != nil { + MdsdKubeMonMsgpUnixSocketClient.Close() + MdsdKubeMonMsgpUnixSocketClient = nil + } + /*conn, err := fluent.New(fluent.Config{FluentNetwork:"unix", + FluentSocketPath:"/var/run/mdsd/default_fluent.socket", + WriteTimeout: 5 * time.Second, + RequestAck: true}) */ + mdsdfluentSocket := "/var/run/mdsd/default_fluent.socket" + if containerType != "" && strings.Compare(strings.ToLower(containerType), "prometheussidecar") == 0 { + mdsdfluentSocket = fmt.Sprintf("/var/run/mdsd-%s/default_fluent.socket",containerType) + } + conn, err := net.DialTimeout("unix", + mdsdfluentSocket, 10*time.Second) + if err != nil { + Log("Error::mdsd::Unable to open MDSD msgp socket connection for KubeMon events %s", err.Error()) + //log.Fatalf("Unable to open MDSD msgp socket connection %s", err.Error()) + } else { + Log("Successfully created MDSD msgp socket connection for KubeMon events:%s", mdsdfluentSocket) + MdsdKubeMonMsgpUnixSocketClient = conn + } +} + +//mdsdSocketClient to write msgp messages for KubeMonAgent Events +func CreateMDSDClientInsightsMetrics(containerType string) { + if MdsdInsightsMetricsMsgpUnixSocketClient != nil { + MdsdInsightsMetricsMsgpUnixSocketClient.Close() + MdsdInsightsMetricsMsgpUnixSocketClient = nil + } + /*conn, err := fluent.New(fluent.Config{FluentNetwork:"unix", + FluentSocketPath:"/var/run/mdsd/default_fluent.socket", + WriteTimeout: 5 * time.Second, + RequestAck: true}) */ + mdsdfluentSocket := "/var/run/mdsd/default_fluent.socket" + if containerType != "" && strings.Compare(strings.ToLower(containerType), "prometheussidecar") == 0 { + mdsdfluentSocket = fmt.Sprintf("/var/run/mdsd-%s/default_fluent.socket",containerType) + } + conn, err := net.DialTimeout("unix", + mdsdfluentSocket, 10*time.Second) + if err != nil { + Log("Error::mdsd::Unable to open MDSD msgp socket connectionfor insights metrics %s", err.Error()) + //log.Fatalf("Unable to open MDSD msgp socket connection %s", err.Error()) + } else { + Log("Successfully created MDSD msgp socket connection for Insights metrics %s", mdsdfluentSocket) + MdsdInsightsMetricsMsgpUnixSocketClient = conn + } +} + //ADX client to write to ADX func CreateADXClient() { @@ -178,3 +233,33 @@ func isValidUrl(uri string) bool { } return true } + +func convertMsgPackEntriesToMsgpBytes(fluentForwardTag string, msgPackEntries []MsgPackEntry) []byte { + var msgpBytes []byte + + fluentForward := MsgPackForward{ + Tag: fluentForwardTag, + Entries: msgPackEntries, + } + //determine the size of msgp message + msgpSize := 1 + msgp.StringPrefixSize + len(fluentForward.Tag) + msgp.ArrayHeaderSize + for i := range fluentForward.Entries { + msgpSize += 1 + msgp.Int64Size + msgp.GuessSize(fluentForward.Entries[i].Record) + } + + //allocate buffer for msgp message + msgpBytes = msgp.Require(nil, msgpSize) + + //construct the stream + msgpBytes = append(msgpBytes, 0x92) + msgpBytes = msgp.AppendString(msgpBytes, fluentForward.Tag) + msgpBytes = msgp.AppendArrayHeader(msgpBytes, uint32(len(fluentForward.Entries))) + batchTime := time.Now().Unix() + for entry := range fluentForward.Entries { + msgpBytes = append(msgpBytes, 0x92) + msgpBytes = msgp.AppendInt64(msgpBytes, batchTime) + msgpBytes = msgp.AppendMapStrStr(msgpBytes, fluentForward.Entries[entry].Record) + } + + return msgpBytes +} \ No newline at end of file diff --git a/source/plugins/ruby/ApplicationInsightsUtility.rb b/source/plugins/ruby/ApplicationInsightsUtility.rb index b118cc646..6ae567337 100644 --- a/source/plugins/ruby/ApplicationInsightsUtility.rb +++ b/source/plugins/ruby/ApplicationInsightsUtility.rb @@ -14,7 +14,6 @@ class ApplicationInsightsUtility @@Exception = "ExceptionEvent" @@AcsClusterType = "ACS" @@AksClusterType = "AKS" - @OmsAdminFilePath = "/etc/opt/microsoft/omsagent/conf/omsadmin.conf" @@EnvAcsResourceName = "ACS_RESOURCE_NAME" @@EnvAksRegion = "AKS_REGION" @@EnvAgentVersion = "AGENT_VERSION" @@ -263,14 +262,11 @@ def sendMetricTelemetry(metricName, metricValue, properties) end def getWorkspaceId() - begin - adminConf = {} - confFile = File.open(@OmsAdminFilePath, "r") - confFile.each_line do |line| - splitStrings = line.split("=") - adminConf[splitStrings[0]] = splitStrings[1] + begin + workspaceId = ENV["WSID"] + if workspaceId.nil? || workspaceId.empty? + $log.warn("Exception in AppInsightsUtility: getWorkspaceId - WorkspaceID either nil or empty") end - workspaceId = adminConf["WORKSPACE_ID"] return workspaceId rescue => errorStr $log.warn("Exception in AppInsightsUtility: getWorkspaceId - error: #{errorStr}") @@ -278,14 +274,8 @@ def getWorkspaceId() end def getWorkspaceCloud() - begin - adminConf = {} - confFile = File.open(@OmsAdminFilePath, "r") - confFile.each_line do |line| - splitStrings = line.split("=") - adminConf[splitStrings[0]] = splitStrings[1] - end - workspaceDomain = adminConf["URL_TLD"].strip + begin + workspaceDomain = ENV["DOMAIN"] workspaceCloud = "AzureCloud" if workspaceDomain.casecmp("opinsights.azure.com") == 0 workspaceCloud = "AzureCloud" diff --git a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb index 8cb6f603e..93b0eaa39 100644 --- a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb +++ b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb @@ -203,23 +203,25 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met containerName = container["name"] metricValue = container["cpu"][cpuMetricNameToCollect] metricTime = metricPollTime #container["cpu"]["time"] - metricItem = {} - metricItem["DataItems"] = [] - - metricProps = {} - metricProps["Timestamp"] = metricTime - metricProps["Host"] = hostName - metricProps["ObjectName"] = Constants::OBJECT_NAME_K8S_CONTAINER - metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName - - metricProps["Collections"] = [] - metricCollections = {} - metricCollections["CounterName"] = metricNametoReturn - metricCollections["Value"] = metricValue + - metricProps["Collections"].push(metricCollections) - metricItem["DataItems"].push(metricProps) - metricItems.push(metricItem) + metricItem = {} + metricItem["Timestamp"] = metricTime + metricItem["Host"] = hostName + metricItem["ObjectName"] = Constants::OBJECT_NAME_K8S_CONTAINER + metricItem["InstanceName"] = clusterId + "/" + podUid + "/" + containerName + + + metricCollection = {} + metricCollection["CounterName"] = metricNametoReturn + metricCollection["Value"] = metricValue + + metricItem["json_Collections"] = [] + metricCollections = [] + metricCollections.push(metricCollection) + metricItem["json_Collections"] = metricCollections.to_json + metricItems.push(metricItem) + #Telemetry about agent performance begin # we can only do this much now. Ideally would like to use the docker image repository to find our pods/containers @@ -254,7 +256,7 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met telemetryProps["containerLogsRoute"] = "v2" elsif (!@containerLogsRoute.nil? && !@containerLogsRoute.empty?) telemetryProps["containerLogsRoute"] = @containerLogsRoute - end + end #telemetry about health model if (!@hmEnabled.nil? && !@hmEnabled.empty?) telemetryProps["hmEnabled"] = @hmEnabled @@ -503,18 +505,16 @@ def getContainerCpuMetricItemRate(metricJSON, hostName, cpuMetricNameToCollect, containerName = container["name"] metricValue = container["cpu"][cpuMetricNameToCollect] metricTime = metricPollTime #container["cpu"]["time"] + metricItem = {} - metricItem["DataItems"] = [] - - metricProps = {} - metricProps["Timestamp"] = metricTime - metricProps["Host"] = hostName - metricProps["ObjectName"] = Constants::OBJECT_NAME_K8S_CONTAINER - metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName - - metricProps["Collections"] = [] - metricCollections = {} - metricCollections["CounterName"] = metricNametoReturn + metricItem["Timestamp"] = metricTime + metricItem["Host"] = hostName + metricItem["ObjectName"] = Constants::OBJECT_NAME_K8S_CONTAINER + metricItem["InstanceName"] = clusterId + "/" + podUid + "/" + containerName + + metricItem["json_Collections"] = [] + metricCollection = {} + metricCollection["CounterName"] = metricNametoReturn containerId = podUid + "/" + containerName # Adding the containers to the winContainerIdCache so that it can be used by the cleanup routine @@ -545,9 +545,11 @@ def getContainerCpuMetricItemRate(metricJSON, hostName, cpuMetricNameToCollect, @@winContainerPrevMetricRate[containerId] = metricRateValue end - metricCollections["Value"] = metricValue - metricProps["Collections"].push(metricCollections) - metricItem["DataItems"].push(metricProps) + metricCollection["Value"] = metricValue + + metricCollections = [] + metricCollections.push(metricCollection) + metricItem["json_Collections"] = metricCollections.to_json metricItems.push(metricItem) #Telemetry about agent performance begin @@ -629,22 +631,21 @@ def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollec metricTime = metricPollTime #container["memory"]["time"] metricItem = {} - metricItem["DataItems"] = [] - - metricProps = {} - metricProps["Timestamp"] = metricTime - metricProps["Host"] = hostName - metricProps["ObjectName"] = Constants::OBJECT_NAME_K8S_CONTAINER - metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName + metricItem["Timestamp"] = metricTime + metricItem["Host"] = hostName + metricItem["ObjectName"] = Constants::OBJECT_NAME_K8S_CONTAINER + metricItem["InstanceName"] = clusterId + "/" + podUid + "/" + containerName + + metricCollection = {} + metricCollection["CounterName"] = metricNametoReturn + metricCollection["Value"] = metricValue + + metricItem["json_Collections"] = [] + metricCollections = [] + metricCollections.push(metricCollection) + metricItem["json_Collections"] = metricCollections.to_json + metricItems.push(metricItem) - metricProps["Collections"] = [] - metricCollections = {} - metricCollections["CounterName"] = metricNametoReturn - metricCollections["Value"] = metricValue - - metricProps["Collections"].push(metricCollections) - metricItem["DataItems"].push(metricProps) - metricItems.push(metricItem) #Telemetry about agent performance begin # we can only do this much now. Ideally would like to use the docker image repository to find our pods/containers @@ -687,22 +688,21 @@ def getNodeMetricItem(metricJSON, hostName, metricCategory, metricNameToCollect, if !node[metricCategory].nil? metricValue = node[metricCategory][metricNameToCollect] metricTime = metricPollTime #node[metricCategory]["time"] - - metricItem["DataItems"] = [] - - metricProps = {} - metricProps["Timestamp"] = metricTime - metricProps["Host"] = hostName - metricProps["ObjectName"] = Constants::OBJECT_NAME_K8S_NODE - metricProps["InstanceName"] = clusterId + "/" + nodeName - - metricProps["Collections"] = [] - metricCollections = {} - metricCollections["CounterName"] = metricNametoReturn - metricCollections["Value"] = metricValue - - metricProps["Collections"].push(metricCollections) - metricItem["DataItems"].push(metricProps) + + metricItem["Timestamp"] = metricTime + metricItem["Host"] = hostName + metricItem["ObjectName"] = Constants::OBJECT_NAME_K8S_NODE + metricItem["InstanceName"] = clusterId + "/" + nodeName + + + metricCollection = {} + metricCollection["CounterName"] = metricNametoReturn + metricCollection["Value"] = metricValue + + metricItem["json_Collections"] = [] + metricCollections = [] + metricCollections.push(metricCollection) + metricItem["json_Collections"] = metricCollections.to_json end rescue => error @Log.warn("getNodeMetricItem failed: #{error} for metric #{metricNameToCollect}") @@ -805,21 +805,20 @@ def getNodeMetricItemRate(metricJSON, hostName, metricCategory, metricNameToColl end end end - metricItem["DataItems"] = [] - - metricProps = {} - metricProps["Timestamp"] = metricTime - metricProps["Host"] = hostName - metricProps["ObjectName"] = Constants::OBJECT_NAME_K8S_NODE - metricProps["InstanceName"] = clusterId + "/" + nodeName - - metricProps["Collections"] = [] - metricCollections = {} - metricCollections["CounterName"] = metricNametoReturn - metricCollections["Value"] = metricValue - - metricProps["Collections"].push(metricCollections) - metricItem["DataItems"].push(metricProps) + + metricItem["Timestamp"] = metricTime + metricItem["Host"] = hostName + metricItem["ObjectName"] = Constants::OBJECT_NAME_K8S_NODE + metricItem["InstanceName"] = clusterId + "/" + nodeName + + metricCollection = {} + metricCollection["CounterName"] = metricNametoReturn + metricCollection["Value"] = metricValue + + metricItem["json_Collections"] = [] + metricCollections = [] + metricCollections.push(metricCollection) + metricItem["json_Collections"] = metricCollections.to_json end rescue => error @Log.warn("getNodeMetricItemRate failed: #{error} for metric #{metricNameToCollect}") @@ -841,22 +840,22 @@ def getNodeLastRebootTimeMetric(metricJSON, hostName, metricNametoReturn, metric metricValue = node["startTime"] metricTime = metricPollTime #Time.now.utc.iso8601 #2018-01-30T19:36:14Z - metricItem["DataItems"] = [] - - metricProps = {} - metricProps["Timestamp"] = metricTime - metricProps["Host"] = hostName - metricProps["ObjectName"] = Constants::OBJECT_NAME_K8S_NODE - metricProps["InstanceName"] = clusterId + "/" + nodeName + + metricItem["Timestamp"] = metricTime + metricItem["Host"] = hostName + metricItem["ObjectName"] = Constants::OBJECT_NAME_K8S_NODE + metricItem["InstanceName"] = clusterId + "/" + nodeName - metricProps["Collections"] = [] - metricCollections = {} - metricCollections["CounterName"] = metricNametoReturn + + metricCollection = {} + metricCollection["CounterName"] = metricNametoReturn #Read it from /proc/uptime - metricCollections["Value"] = DateTime.parse(metricTime).to_time.to_i - IO.read("/proc/uptime").split[0].to_f + metricCollection["Value"] = DateTime.parse(metricTime).to_time.to_i - IO.read("/proc/uptime").split[0].to_f - metricProps["Collections"].push(metricCollections) - metricItem["DataItems"].push(metricProps) + metricItem["json_Collections"] = [] + metricCollections = [] + metricCollections.push(metricCollection) + metricItem["json_Collections"] = metricCollections.to_json rescue => error @Log.warn("getNodeLastRebootTimeMetric failed: #{error} ") @Log.warn metricJSON @@ -880,21 +879,19 @@ def getContainerStartTimeMetricItems(metricJSON, hostName, metricNametoReturn, m metricTime = metricPollTime #currentTime metricItem = {} - metricItem["DataItems"] = [] - - metricProps = {} - metricProps["Timestamp"] = metricTime - metricProps["Host"] = hostName - metricProps["ObjectName"] = Constants::OBJECT_NAME_K8S_CONTAINER - metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName - - metricProps["Collections"] = [] - metricCollections = {} - metricCollections["CounterName"] = metricNametoReturn - metricCollections["Value"] = DateTime.parse(metricValue).to_time.to_i - - metricProps["Collections"].push(metricCollections) - metricItem["DataItems"].push(metricProps) + metricItem["Timestamp"] = metricTime + metricItem["Host"] = hostName + metricItem["ObjectName"] = Constants::OBJECT_NAME_K8S_CONTAINER + metricItem["InstanceName"] = clusterId + "/" + podUid + "/" + containerName + + metricCollection = {} + metricCollection["CounterName"] = metricNametoReturn + metricCollection["Value"] = DateTime.parse(metricValue).to_time.to_i + + metricItem["json_Collections"] = [] + metricCollections = [] + metricCollections.push(metricCollection) + metricItem["json_Collections"] = metricCollections.to_json metricItems.push(metricItem) end end diff --git a/source/plugins/ruby/DockerApiClient.rb b/source/plugins/ruby/DockerApiClient.rb index f2828b357..53dd1f39f 100644 --- a/source/plugins/ruby/DockerApiClient.rb +++ b/source/plugins/ruby/DockerApiClient.rb @@ -29,7 +29,7 @@ def getResponse(request, isMultiJson, isVersion) loop do begin responseChunk = "" - timeout(@@TimeoutInSeconds) do + Timeout.timeout(@@TimeoutInSeconds) do responseChunk = socket.recv(@@ChunkSize) end dockerResponse += responseChunk diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb index 98347d272..a791b5ddf 100644 --- a/source/plugins/ruby/KubernetesApiClient.rb +++ b/source/plugins/ruby/KubernetesApiClient.rb @@ -405,12 +405,9 @@ def getPodUid(podNameSpace, podMetadata) def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601) metricItems = [] - timeDifference = (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs - timeDifferenceInMinutes = timeDifference / 60 begin clusterId = getClusterId podNameSpace = pod["metadata"]["namespace"] - podName = pod["metadata"]["name"] podUid = getPodUid(podNameSpace, pod["metadata"]) if podUid.nil? return metricItems @@ -442,8 +439,8 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?) metricValue = getMetricNumericValue(metricNameToCollect, container["resources"][metricCategory][metricNameToCollect]) - metricItem = {} - metricItem["DataItems"] = [] + # metricItem = {} + # metricItem["DataItems"] = [] metricProps = {} metricProps["Timestamp"] = metricTime @@ -453,49 +450,23 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle metricProps["ObjectName"] = "K8SContainer" metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName - metricProps["Collections"] = [] - metricCollections = {} - metricCollections["CounterName"] = metricNametoReturn - metricCollections["Value"] = metricValue - - metricProps["Collections"].push(metricCollections) - metricItem["DataItems"].push(metricProps) - metricItems.push(metricItem) - #Telemetry about omsagent requests and limits - begin - if (podName.downcase.start_with?("omsagent-") && podNameSpace.eql?("kube-system") && containerName.downcase.start_with?("omsagent")) - nodePodContainerKey = [nodeName, podName, containerName, metricNametoReturn].join("~~") - @@resourceLimitsTelemetryHash[nodePodContainerKey] = metricValue - end - if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES) - @@resourceLimitsTelemetryHash.each { |key, value| - keyElements = key.split("~~") - if keyElements.length != 4 - next - end - - # get dimension values by key - telemetryProps = {} - telemetryProps["Computer"] = keyElements[0] - telemetryProps["PodName"] = keyElements[1] - telemetryProps["ContainerName"] = keyElements[2] - metricNameFromKey = keyElements[3] - ApplicationInsightsUtility.sendMetricTelemetry(metricNameFromKey, value, telemetryProps) - } - @@telemetryTimeTracker = DateTime.now.to_time.to_i - @@resourceLimitsTelemetryHash = {} - end - rescue => errorStr - $log.warn("Exception while generating Telemetry from getContainerResourceRequestsAndLimits failed: #{errorStr} for metric #{metricNameToCollect}") - end + metricCollection = {} + metricCollection["CounterName"] = metricNametoReturn + metricCollection["Value"] = metricValue + + metricProps["json_Collections"] = [] + metricCollections = [] + metricCollections.push(metricCollection) + metricProps["json_Collections"] = metricCollections.to_json + metricItems.push(metricProps) #No container level limit for the given metric, so default to node level limit else nodeMetricsHashKey = clusterId + "/" + nodeName + "_" + "allocatable" + "_" + metricNameToCollect if (metricCategory == "limits" && @@NodeMetrics.has_key?(nodeMetricsHashKey)) metricValue = @@NodeMetrics[nodeMetricsHashKey] #@Log.info("Limits not set for container #{clusterId + "/" + podUid + "/" + containerName} using node level limits: #{nodeMetricsHashKey}=#{metricValue} ") - metricItem = {} - metricItem["DataItems"] = [] + # metricItem = {} + # metricItem["DataItems"] = [] metricProps = {} metricProps["Timestamp"] = metricTime @@ -505,14 +476,14 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle metricProps["ObjectName"] = "K8SContainer" metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName - metricProps["Collections"] = [] - metricCollections = {} - metricCollections["CounterName"] = metricNametoReturn - metricCollections["Value"] = metricValue - - metricProps["Collections"].push(metricCollections) - metricItem["DataItems"].push(metricProps) - metricItems.push(metricItem) + metricCollection = {} + metricCollection["CounterName"] = metricNametoReturn + metricCollection["Value"] = metricValue + metricProps["json_Collections"] = [] + metricCollections = [] + metricCollections.push(metricCollection) + metricProps["json_Collections"] = metricCollections.to_json + metricItems.push(metricProps) end end end @@ -632,22 +603,22 @@ def parseNodeLimitsFromNodeItem(node, metricCategory, metricNameToCollect, metri # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory" metricValue = getMetricNumericValue(metricNameToCollect, node["status"][metricCategory][metricNameToCollect]) - metricItem["DataItems"] = [] - metricProps = {} - metricProps["Timestamp"] = metricTime - metricProps["Host"] = node["metadata"]["name"] + metricItem["Timestamp"] = metricTime + metricItem["Host"] = node["metadata"]["name"] # Adding this so that it is not set by base omsagent since it was not set earlier and being set by base omsagent - metricProps["Computer"] = node["metadata"]["name"] - metricProps["ObjectName"] = "K8SNode" - metricProps["InstanceName"] = clusterId + "/" + node["metadata"]["name"] - metricProps["Collections"] = [] - metricCollections = {} - metricCollections["CounterName"] = metricNametoReturn - metricCollections["Value"] = metricValue - - metricProps["Collections"].push(metricCollections) - metricItem["DataItems"].push(metricProps) - + metricItem["Computer"] = node["metadata"]["name"] + metricItem["ObjectName"] = "K8SNode" + metricItem["InstanceName"] = clusterId + "/" + node["metadata"]["name"] + + metricCollection = {} + metricCollection["CounterName"] = metricNametoReturn + metricCollection["Value"] = metricValue + metricCollections = [] + metricCollections.push(metricCollection) + + metricItem["json_Collections"] = [] + metricItem["json_Collections"] = metricCollections.to_json + #push node level metrics to a inmem hash so that we can use it looking up at container level. #Currently if container level cpu & memory limits are not defined we default to node level limits @@NodeMetrics[clusterId + "/" + node["metadata"]["name"] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue diff --git a/source/plugins/ruby/MdmMetricsGenerator.rb b/source/plugins/ruby/MdmMetricsGenerator.rb index 6641456af..a809087dc 100644 --- a/source/plugins/ruby/MdmMetricsGenerator.rb +++ b/source/plugins/ruby/MdmMetricsGenerator.rb @@ -525,11 +525,11 @@ def getNodeResourceMetricRecords(record, metric_name, metric_value, percentage_m records = [] begin custommetricrecord = MdmAlertTemplates::Node_resource_metrics_template % { - timestamp: record["DataItems"][0]["Timestamp"], + timestamp: record["Timestamp"], metricName: metric_name, - hostvalue: record["DataItems"][0]["Host"], - objectnamevalue: record["DataItems"][0]["ObjectName"], - instancenamevalue: record["DataItems"][0]["InstanceName"], + hostvalue: record["Host"], + objectnamevalue: record["ObjectName"], + instancenamevalue: record["InstanceName"], metricminvalue: metric_value, metricmaxvalue: metric_value, metricsumvalue: metric_value, @@ -538,11 +538,11 @@ def getNodeResourceMetricRecords(record, metric_name, metric_value, percentage_m if !percentage_metric_value.nil? additional_record = MdmAlertTemplates::Node_resource_metrics_template % { - timestamp: record["DataItems"][0]["Timestamp"], + timestamp: record["Timestamp"], metricName: @@node_metric_name_metric_percentage_name_hash[metric_name], - hostvalue: record["DataItems"][0]["Host"], - objectnamevalue: record["DataItems"][0]["ObjectName"], - instancenamevalue: record["DataItems"][0]["InstanceName"], + hostvalue: record["Host"], + objectnamevalue: record["ObjectName"], + instancenamevalue: record["InstanceName"], metricminvalue: percentage_metric_value, metricmaxvalue: percentage_metric_value, metricsumvalue: percentage_metric_value, diff --git a/source/plugins/ruby/filter_cadvisor2mdm.rb b/source/plugins/ruby/filter_cadvisor2mdm.rb index 659e3000c..62dcf31dc 100644 --- a/source/plugins/ruby/filter_cadvisor2mdm.rb +++ b/source/plugins/ruby/filter_cadvisor2mdm.rb @@ -2,7 +2,9 @@ # frozen_string_literal: true -module Fluent +require 'fluent/plugin/filter' + +module Fluent::Plugin require "logger" require "yajl/json_gem" require_relative "oms_common" @@ -12,7 +14,7 @@ module Fluent require_relative "in_kube_nodes" class CAdvisor2MdmFilter < Filter - Fluent::Plugin.register_filter("filter_cadvisor2mdm", self) + Fluent::Plugin.register_filter("cadvisor2mdm", self) config_param :enable_log, :integer, :default => 0 config_param :log_path, :string, :default => "/var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log" @@ -65,7 +67,7 @@ def start @containerResourceDimensionHash = {} @pvUsageHash = {} @@metric_threshold_hash = MdmMetricsGenerator.getContainerResourceUtilizationThresholds - @NodeCache = Fluent::NodeStatsCache.new() + @NodeCache = Fluent::Plugin::NodeStatsCache.new() end rescue => e @log.info "Error initializing plugin #{e}" @@ -148,16 +150,16 @@ def filter(tag, time, record) begin if @process_incoming_stream - # Check if insights metrics for PV metrics - data_type = record["DataType"] - if data_type == "INSIGHTS_METRICS_BLOB" + # Check if insights metrics for PV metrics + if record["Name"] == Constants::PV_USED_BYTES return filterPVInsightsMetrics(record) end - object_name = record["DataItems"][0]["ObjectName"] - counter_name = record["DataItems"][0]["Collections"][0]["CounterName"] + object_name = record["ObjectName"] + counter_name = JSON.parse(record["json_Collections"])[0]["CounterName"] + percentage_metric_value = 0.0 - metric_value = record["DataItems"][0]["Collections"][0]["Value"] + metric_value = JSON.parse(record["json_Collections"])[0]["Value"] if object_name == Constants::OBJECT_NAME_K8S_NODE && @metrics_to_collect_hash.key?(counter_name.downcase) # Compute and send % CPU and Memory @@ -165,7 +167,7 @@ def filter(tag, time, record) metric_name = Constants::CPU_USAGE_MILLI_CORES metric_value /= 1000000 #cadvisor record is in nanocores. Convert to mc if @@controller_type.downcase == "replicaset" - target_node_cpu_capacity_mc = @NodeCache.cpu.get_capacity(record["DataItems"][0]["Host"]) / 1000000 + target_node_cpu_capacity_mc = @NodeCache.cpu.get_capacity(record["Host"]) / 1000000 else target_node_cpu_capacity_mc = @cpu_capacity end @@ -178,7 +180,7 @@ def filter(tag, time, record) if counter_name.start_with?("memory") metric_name = counter_name if @@controller_type.downcase == "replicaset" - target_node_mem_capacity = @NodeCache.mem.get_capacity(record["DataItems"][0]["Host"]) + target_node_mem_capacity = @NodeCache.mem.get_capacity(record["Host"]) else target_node_mem_capacity = @memory_capacity end @@ -187,12 +189,12 @@ def filter(tag, time, record) percentage_metric_value = metric_value * 100 / target_node_mem_capacity end end - @log.info "percentage_metric_value for metric: #{metric_name} for instance: #{record["DataItems"][0]["Host"]} percentage: #{percentage_metric_value}" + @log.info "percentage_metric_value for metric: #{metric_name} for instance: #{record["Host"]} percentage: #{percentage_metric_value}" # do some sanity checking. Do we want this? if percentage_metric_value > 100.0 or percentage_metric_value < 0.0 telemetryProperties = {} - telemetryProperties["Computer"] = record["DataItems"][0]["Host"] + telemetryProperties["Computer"] = record["Host"] telemetryProperties["MetricName"] = metric_name telemetryProperties["MetricPercentageValue"] = percentage_metric_value ApplicationInsightsUtility.sendCustomEvent("ErrorPercentageOutOfBounds", telemetryProperties) @@ -200,7 +202,7 @@ def filter(tag, time, record) return MdmMetricsGenerator.getNodeResourceMetricRecords(record, metric_name, metric_value, percentage_metric_value) elsif object_name == Constants::OBJECT_NAME_K8S_CONTAINER && @metrics_to_collect_hash.key?(counter_name.downcase) - instanceName = record["DataItems"][0]["InstanceName"] + instanceName = record["InstanceName"] metricName = counter_name # Using node cpu capacity in the absence of container cpu capacity since the container will end up using the # node's capacity in this case. Converting this to nanocores for computation purposes, since this is in millicores @@ -235,7 +237,7 @@ def filter(tag, time, record) flushMetricTelemetry if percentage_metric_value >= thresholdPercentage setThresholdExceededTelemetry(metricName) - return MdmMetricsGenerator.getContainerResourceUtilMetricRecords(record["DataItems"][0]["Timestamp"], + return MdmMetricsGenerator.getContainerResourceUtilMetricRecords(record["Timestamp"], metricName, percentage_metric_value, @containerResourceDimensionHash[instanceName], @@ -256,39 +258,36 @@ def filter(tag, time, record) end end - def filterPVInsightsMetrics(record) + def filterPVInsightsMetrics(record) begin mdmMetrics = [] - record["DataItems"].each do |dataItem| - - if dataItem["Name"] == Constants::PV_USED_BYTES && @metrics_to_collect_hash.key?(dataItem["Name"].downcase) - metricName = dataItem["Name"] - usage = dataItem["Value"] - capacity = dataItem["Tags"][Constants::INSIGHTSMETRICS_TAGS_PV_CAPACITY_BYTES] - if capacity != 0 - percentage_metric_value = (usage * 100.0) / capacity - end - @log.info "percentage_metric_value for metric: #{metricName} percentage: #{percentage_metric_value}" - @log.info "@@metric_threshold_hash for #{metricName}: #{@@metric_threshold_hash[metricName]}" - - computer = dataItem["Computer"] - resourceDimensions = dataItem["Tags"] - thresholdPercentage = @@metric_threshold_hash[metricName] - - flushMetricTelemetry - if percentage_metric_value >= thresholdPercentage - setThresholdExceededTelemetry(metricName) - return MdmMetricsGenerator.getPVResourceUtilMetricRecords(dataItem["CollectionTime"], - metricName, - computer, - percentage_metric_value, - resourceDimensions, - thresholdPercentage) - else - return [] - end # end if block for percentage metric > configured threshold % check - end # end if block for dataItem name check - end # end for block of looping through data items + if record["Name"] == Constants::PV_USED_BYTES && @metrics_to_collect_hash.key?(record["Name"].downcase) + metricName = record["Name"] + usage = record["Value"] + capacity = record["Tags"][Constants::INSIGHTSMETRICS_TAGS_PV_CAPACITY_BYTES] + if capacity != 0 + percentage_metric_value = (usage * 100.0) / capacity + end + @log.info "percentage_metric_value for metric: #{metricName} percentage: #{percentage_metric_value}" + @log.info "@@metric_threshold_hash for #{metricName}: #{@@metric_threshold_hash[metricName]}" + + computer = record["Computer"] + resourceDimensions = record["Tags"] + thresholdPercentage = @@metric_threshold_hash[metricName] + + flushMetricTelemetry + if percentage_metric_value >= thresholdPercentage + setThresholdExceededTelemetry(metricName) + return MdmMetricsGenerator.getPVResourceUtilMetricRecords(record["CollectionTime"], + metricName, + computer, + percentage_metric_value, + resourceDimensions, + thresholdPercentage) + else + return [] + end # end if block for percentage metric > configured threshold % check + end # end if block for dataItem name check return [] rescue Exception => e @log.info "Error processing cadvisor insights metrics record Exception: #{e.class} Message: #{e.message}" @@ -316,16 +315,22 @@ def ensure_cpu_memory_capacity_set end if !nodeInventory.nil? cpu_capacity_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores") - if !cpu_capacity_json.nil? && !cpu_capacity_json[0]["DataItems"][0]["Collections"][0]["Value"].to_s.nil? - @cpu_capacity = cpu_capacity_json[0]["DataItems"][0]["Collections"][0]["Value"] - @log.info "CPU Limit #{@cpu_capacity}" + if !cpu_capacity_json.nil? + metricVal = JSON.parse(cpu_capacity_json[0]["json_Collections"])[0]["Value"] + if !metricVal.to_s.nil? + @cpu_capacity = metricVal + @log.info "CPU Limit #{@cpu_capacity}" + end else @log.info "Error getting cpu_capacity" end memory_capacity_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "memory", "memoryCapacityBytes") - if !memory_capacity_json.nil? && !memory_capacity_json[0]["DataItems"][0]["Collections"][0]["Value"].to_s.nil? - @memory_capacity = memory_capacity_json[0]["DataItems"][0]["Collections"][0]["Value"] - @log.info "Memory Limit #{@memory_capacity}" + if !memory_capacity_json.nil? + metricVal = JSON.parse(cpu_capacity_json[0]["json_Collections"])[0]["Value"] + if !metricVal.to_s.nil? + @memory_capacity = metricVal + @log.info "Memory Limit #{@memory_capacity}" + end else @log.info "Error getting memory_capacity" end @@ -346,7 +351,7 @@ def ensure_cpu_memory_capacity_set end def filter_stream(tag, es) - new_es = MultiEventStream.new + new_es = Fluent::MultiEventStream.new begin ensure_cpu_memory_capacity_set # Getting container limits hash diff --git a/source/plugins/ruby/filter_cadvisor_health_container.rb b/source/plugins/ruby/filter_cadvisor_health_container.rb index 870fcd6d6..ab64b6e61 100644 --- a/source/plugins/ruby/filter_cadvisor_health_container.rb +++ b/source/plugins/ruby/filter_cadvisor_health_container.rb @@ -1,7 +1,9 @@ #!/usr/local/bin/ruby # frozen_string_literal: true -module Fluent +require 'fluent/plugin/filter' + +module Fluent::Plugin require 'logger' require 'yajl/json_gem' require_relative 'oms_common' @@ -11,7 +13,7 @@ module Fluent class CAdvisor2ContainerHealthFilter < Filter include HealthModel - Fluent::Plugin.register_filter('filter_cadvisor_health_container', self) + Fluent::Plugin.register_filter('cadvisor_health_container', self) config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/health_monitors.log' config_param :metrics_to_collect, :string, :default => 'cpuUsageNanoCores,memoryRssBytes' @@ -58,9 +60,9 @@ def start def filter_stream(tag, es) if !@@cluster_health_model_enabled @log.info "Cluster Health Model disabled in filter_cadvisor_health_container" - return MultiEventStream.new + return Fluent::MultiEventStream.new end - new_es = MultiEventStream.new + new_es = Fluent::MultiEventStream.new records_count = 0 es.each { |time, record| begin @@ -83,8 +85,9 @@ def filter(tag, time, record) if record.key?("MonitorLabels") return record end - object_name = record['DataItems'][0]['ObjectName'] - counter_name = record['DataItems'][0]['Collections'][0]['CounterName'].downcase + + object_name = record['ObjectName'] + counter_name = JSON.parse(record['json_Collections'])[0]['CounterName'].downcase if @metrics_to_collect_hash.key?(counter_name) if object_name == @@object_name_k8s_container return @formatter.get_record_from_cadvisor_record(record) diff --git a/source/plugins/ruby/filter_cadvisor_health_node.rb b/source/plugins/ruby/filter_cadvisor_health_node.rb index 27e5bc255..ddbb871e8 100644 --- a/source/plugins/ruby/filter_cadvisor_health_node.rb +++ b/source/plugins/ruby/filter_cadvisor_health_node.rb @@ -1,7 +1,9 @@ #!/usr/local/bin/ruby # frozen_string_literal: true -module Fluent +require 'fluent/plugin/filter' + +module Fluent::Plugin require 'logger' require 'yajl/json_gem' require_relative 'oms_common' @@ -11,8 +13,8 @@ module Fluent class CAdvisor2NodeHealthFilter < Filter include HealthModel - Fluent::Plugin.register_filter('filter_cadvisor_health_node', self) - + Fluent::Plugin.register_filter('cadvisor_health_node', self) + attr_accessor :provider, :resources config_param :metrics_to_collect, :string, :default => 'cpuUsageNanoCores,memoryRssBytes' @@ -75,13 +77,13 @@ def start def filter_stream(tag, es) if !@@cluster_health_model_enabled @log.info "Cluster Health Model disabled in filter_cadvisor_health_node" - return MultiEventStream.new + return Fluent::MultiEventStream.new end begin node_capacity = HealthMonitorUtils.ensure_cpu_memory_capacity_set(@@hm_log, @cpu_capacity, @memory_capacity, @@hostName) @cpu_capacity = node_capacity[0] @memory_capacity = node_capacity[1] - new_es = MultiEventStream.new + new_es = Fluent::MultiEventStream.new records_count = 0 es.each { |time, record| filtered_record = filter(tag, time, record) @@ -95,7 +97,7 @@ def filter_stream(tag, es) rescue => e @log.info "Error in filter_cadvisor_health_node filter_stream #{e.backtrace}" ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"}) - return MultiEventStream.new + return Fluent::MultiEventStream.new end end @@ -105,10 +107,10 @@ def filter(tag, time, record) return record end - object_name = record['DataItems'][0]['ObjectName'] - counter_name = record['DataItems'][0]['Collections'][0]['CounterName'].downcase + object_name = record['ObjectName'] + counter_name = JSON.parse(record['json_Collections'])[0]['CounterName'].downcase if @metrics_to_collect_hash.key?(counter_name.downcase) - metric_value = record['DataItems'][0]['Collections'][0]['Value'] + metric_value = JSON.parse(record['json_Collections'])[0]['Value'] case object_name when @@object_name_k8s_node case counter_name.downcase @@ -134,14 +136,14 @@ def process_node_cpu_record(record, metric_value) if record.nil? return nil else - instance_name = record['DataItems'][0]['InstanceName'] + instance_name = record['InstanceName'] #@log.info "CPU capacity #{@cpu_capacity}" metric_value /= 1000000 percent = (metric_value.to_f/@cpu_capacity*100).round(2) #@log.debug "Percentage of CPU limit: #{percent}" state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(MonitorId::NODE_CPU_MONITOR_ID)) #@log.debug "Computed State : #{state}" - timestamp = record['DataItems'][0]['Timestamp'] + timestamp = record['Timestamp'] health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value, "cpuUtilizationPercentage" => percent}} monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId, @@hostName]) @@ -166,14 +168,14 @@ def process_node_memory_record(record, metric_value) if record.nil? return nil else - instance_name = record['DataItems'][0]['InstanceName'] + instance_name = record['InstanceName'] #@log.info "Memory capacity #{@memory_capacity}" percent = (metric_value.to_f/@memory_capacity*100).round(2) #@log.debug "Percentage of Memory limit: #{percent}" state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(MonitorId::NODE_MEMORY_MONITOR_ID)) #@log.debug "Computed State : #{state}" - timestamp = record['DataItems'][0]['Timestamp'] + timestamp = record['Timestamp'] health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}} #@log.info health_monitor_record diff --git a/source/plugins/ruby/filter_container.rb b/source/plugins/ruby/filter_container.rb deleted file mode 100644 index b72e82dbc..000000000 --- a/source/plugins/ruby/filter_container.rb +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. - -# frozen_string_literal: true - -module Fluent - require 'logger' - - class ContainerFilter < Filter - Fluent::Plugin.register_filter('filter_container', self) - - config_param :enable_log, :integer, :default => 0 - config_param :log_path, :string, :default => '/var/opt/microsoft/omsagent/log/filter_container.log' - - def initialize - super - end - - def configure(conf) - super - @log = nil - - if @enable_log - @log = Logger.new(@log_path, 'weekly') - @log.debug {'Starting filter_container plugin'} - end - end - - def start - super - end - - def shutdown - super - end - - def filter(tag, time, record) - dataType = nil - - record.each do |r| - if dataType == nil - dataType = case r["ClassName"] - when "Container_ImageInventory" then "CONTAINER_IMAGE_INVENTORY_BLOB" - when "Container_ContainerInventory" then "CONTAINER_INVENTORY_BLOB" - when "Container_DaemonEvent" then "CONTAINER_SERVICE_LOG_BLOB" - when "Container_ContainerLog" then "CONTAINER_LOG_BLOB" - end - end - end - - wrapper = { - "DataType"=>dataType, - "IPName"=>"Containers", - "DataItems"=>record - } - - wrapper - end - end -end diff --git a/source/plugins/ruby/filter_docker_log.rb b/source/plugins/ruby/filter_docker_log.rb deleted file mode 100644 index b80f4c204..000000000 --- a/source/plugins/ruby/filter_docker_log.rb +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. - -# frozen_string_literal: true - -module Fluent - require 'logger' - require 'socket' - require 'yajl/json_gem' - - class DockerLogFilter < Filter - Plugin.register_filter('filter_docker_log', self) - - # Set to 1 in config file to enable logging - config_param :enable_log, :integer, :default => 0 - config_param :log_path, :string, :default => '/var/opt/microsoft/omsagent/log/filter_docker_log.txt' - - # This method is called before starting. - def configure(conf) - super - @hostname = Socket.gethostname - # in case get full name, extract up to '.' - dotpos = @hostname.index('.') - if dotpos != nil - @hostname = @hostname[0..dotpos-1] - end - - # Cache the image name and ID of each container so we don't have to inspect each time - @containerCache = Hash.new - - @log = nil - - if @enable_log - @log = Logger.new(@log_path, 'weekly') - @log.debug {'Starting filter_docker_log plugin on ' + @hostname} - end - end - - def filter(tag, time, record) - if @log != nil - @log.debug {'Accepted a log from container ' + record['container_id']} - end - - wrapper = Hash.new - - if record['log'].empty? - if @log != nil - @log.debug {'Log from container ' + record['container_id'] + ' had length 0 and will be discarded'} - end - else - # Need to query image information from ID - containerId = record['container_id'] - - unless @containerCache.has_key?(containerId) - if @log != nil - @log.debug {'Container ' + containerId + ' information is not in the cache, inspecting'} - end - - # Value not in cache, use inspect - @containerCache[containerId] = Hash.new - details = '' - - begin - details = JSON.parse(`sudo docker inspect #{containerId}`) - rescue => e - if @log != nil - @log.error {'sudo docker inspect ' + containerId + ' failed'} - end - end - - if details.empty? - # This should not occur - @containerCache[containerId]['Image'] = 'Unknown' - @containerCache[containerId]['ImageName'] = 'Unknown' - - if @log != nil - @log.warn {'The image ID of container ' + containerId + ' could not be determined'} - end - else - @containerCache[containerId]['Image'] = details[0]['Config']['Image'] - @containerCache[containerId]['ImageName'] = details[0]['Config']['Image'] - end - end - - newRecord = @containerCache[containerId] - - # No query is required - newRecord['Id'] = containerId - newRecord['Name'] = record['container_name'][0] == "/" ? record['container_name'][1..-1] : record['container_name'] - newRecord['LogEntrySource'] = record['source'] - newRecord['LogEntry'] = record['log'] - newRecord['Computer'] = @hostname - - wrapper = { - "DataType"=>"CONTAINER_LOG_BLOB", - "IPName"=>"Containers", - "DataItems"=>[newRecord] - } - end - - wrapper - end - end -end diff --git a/source/plugins/ruby/filter_health_model_builder.rb b/source/plugins/ruby/filter_health_model_builder.rb index 36e4801d7..67dfff2ca 100644 --- a/source/plugins/ruby/filter_health_model_builder.rb +++ b/source/plugins/ruby/filter_health_model_builder.rb @@ -2,15 +2,17 @@ # frozen_string_literal: true -module Fluent +require 'fluent/plugin/filter' + +module Fluent::Plugin require 'logger' require 'yajl/json_gem' Dir[File.join(__dir__, './health', '*.rb')].each { |file| require file } - + class FilterHealthModelBuilder < Filter include HealthModel - Fluent::Plugin.register_filter('filter_health_model_builder', self) + Fluent::Plugin.register_filter('health_model_builder', self) config_param :enable_log, :integer, :default => 0 config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log' @@ -20,7 +22,7 @@ class FilterHealthModelBuilder < Filter attr_reader :buffer, :model_builder, :health_model_definition, :monitor_factory, :state_finalizers, :monitor_set, :model_builder, :hierarchy_builder, :resources, :kube_api_down_handler, :provider, :reducer, :state, :generator, :telemetry - @@rewrite_tag = 'kubehealth.Signals' + @@cluster_id = KubernetesApiClient.getClusterId @@token_file_path = "/var/run/secrets/kubernetes.io/serviceaccount/token" @@cert_file_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" @@ -29,6 +31,7 @@ class FilterHealthModelBuilder < Filter def initialize begin super + @rewrite_tag = 'oneagent.containerInsights.KUBE_HEALTH_BLOB' @buffer = HealthModel::HealthModelBuffer.new @cluster_health_state = ClusterHealthState.new(@@token_file_path, @@cert_file_path) @health_model_definition = HealthModel::ParentMonitorProvider.new(HealthModel::HealthModelDefinitionParser.new(@model_definition_path).parse_file) @@ -53,6 +56,7 @@ def initialize deserialized_state_info = @cluster_health_state.get_state @state.initialize_state(deserialized_state_info) end + rescue => e ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"}) end @@ -82,11 +86,11 @@ def shutdown def filter_stream(tag, es) if !@@cluster_health_model_enabled @log.info "Cluster Health Model disabled in filter_health_model_builder" - return MultiEventStream.new + return Fluent::MultiEventStream.new end begin - new_es = MultiEventStream.new - time = Time.now + new_es = Fluent::MultiEventStream.new + time = Time.now if tag.start_with?("kubehealth.DaemonSet.Node") node_records = [] @@ -96,7 +100,7 @@ def filter_stream(tag, es) } @buffer.add_to_buffer(node_records) end - return MultiEventStream.new + return Fluent::MultiEventStream.new elsif tag.start_with?("kubehealth.DaemonSet.Container") container_records = [] if !es.nil? @@ -110,7 +114,7 @@ def filter_stream(tag, es) @container_cpu_memory_records = [] #in some clusters, this is null, so initialize it again. end @container_cpu_memory_records.push(*container_records) # push the records for aggregation later - return MultiEventStream.new + return Fluent::MultiEventStream.new elsif tag.start_with?("kubehealth.ReplicaSet") records = [] es.each{|time, record| @@ -218,8 +222,7 @@ def filter_stream(tag, es) @log.info "after optimizing health signals all_monitors.size #{all_monitors.size}" - current_time = Time.now - emit_time = current_time.to_f + # for each key in monitor.keys, # get the state from health_monitor_state # generate the record to send @@ -241,17 +244,12 @@ def filter_stream(tag, es) @cluster_new_state = new_state end end - end - record_wrapper = { - "DataType" => "KUBE_HEALTH_BLOB", - "IPName" => "ContainerInsights", - "DataItems" => [record.each { |k, v| record[k] = v }], - } - new_es.add(emit_time, record_wrapper) + end + new_es.add(Fluent::Engine.now, record) } #emit the stream - router.emit_stream(@@rewrite_tag, new_es) + router.emit_stream(@rewrite_tag, new_es) #initialize monitor_set and model_builder @monitor_set = HealthModel::MonitorSet.new @@ -261,8 +259,8 @@ def filter_stream(tag, es) @cluster_health_state.update_state(@state.to_h) @telemetry.send # return an empty event stream, else the match will throw a NoMethodError - return MultiEventStream.new - elsif tag.start_with?("kubehealth.Signals") + return Fluent::MultiEventStream.new + elsif tag.start_with?(@rewrite_tag) # this filter also acts as a pass through as we are rewriting the tag and emitting to the fluent stream es else @@ -274,6 +272,6 @@ def filter_stream(tag, es) @log.warn "Message: #{e.message} Backtrace: #{e.backtrace}" return nil end - end + end end end diff --git a/source/plugins/ruby/filter_inventory2mdm.rb b/source/plugins/ruby/filter_inventory2mdm.rb index 38ccab885..509ac608e 100644 --- a/source/plugins/ruby/filter_inventory2mdm.rb +++ b/source/plugins/ruby/filter_inventory2mdm.rb @@ -2,14 +2,16 @@ # frozen_string_literal: true -module Fluent +require 'fluent/plugin/filter' + +module Fluent::Plugin require 'logger' require 'yajl/json_gem' require_relative 'oms_common' require_relative 'CustomMetricsUtils' class Inventory2MdmFilter < Filter - Fluent::Plugin.register_filter('filter_inventory2mdm', self) + Fluent::Plugin.register_filter('inventory2mdm', self) config_param :enable_log, :integer, :default => 0 config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log' @@ -115,8 +117,8 @@ def process_node_inventory_records(es) es.each{|time,record| begin - timestamp = record['DataItems'][0]['CollectionTime'] - node_status = record['DataItems'][0]['Status'] + timestamp = record['CollectionTime'] + node_status = record['Status'] if node_status.downcase.split(",").include? @@node_status_ready.downcase node_ready_count = node_ready_count+1 else @@ -161,8 +163,8 @@ def process_pod_inventory_records(es) records = [] es.each{|time,record| record_count += 1 - timestamp = record['DataItems'][0]['CollectionTime'] - podUid = record['DataItems'][0]['PodUid'] + timestamp = record['CollectionTime'] + podUid = record['PodUid'] if podUids.key?(podUid) #@log.info "pod with #{podUid} already counted" @@ -170,10 +172,10 @@ def process_pod_inventory_records(es) end podUids[podUid] = true - podPhaseDimValue = record['DataItems'][0]['PodStatus'] - podNamespaceDimValue = record['DataItems'][0]['Namespace'] - podControllerNameDimValue = record['DataItems'][0]['ControllerName'] - podNodeDimValue = record['DataItems'][0]['Computer'] + podPhaseDimValue = record['PodStatus'] + podNamespaceDimValue = record['Namespace'] + podControllerNameDimValue = record['ControllerName'] + podNodeDimValue = record['Computer'] if podControllerNameDimValue.nil? || podControllerNameDimValue.empty? podControllerNameDimValue = 'No Controller' @@ -263,7 +265,7 @@ def process_pod_inventory_records(es) end def filter_stream(tag, es) - new_es = MultiEventStream.new + new_es = Fluent::MultiEventStream.new filtered_records = [] time = DateTime.now begin diff --git a/source/plugins/ruby/filter_telegraf2mdm.rb b/source/plugins/ruby/filter_telegraf2mdm.rb index 88ae428d1..fd71f1682 100644 --- a/source/plugins/ruby/filter_telegraf2mdm.rb +++ b/source/plugins/ruby/filter_telegraf2mdm.rb @@ -2,7 +2,9 @@ # frozen_string_literal: true -module Fluent +require 'fluent/plugin/filter' + +module Fluent::Plugin require "logger" require "yajl/json_gem" require_relative "oms_common" @@ -11,7 +13,7 @@ module Fluent require_relative "constants" class Telegraf2MdmFilter < Filter - Fluent::Plugin.register_filter("filter_telegraf2mdm", self) + Fluent::Plugin.register_filter("telegraf2mdm", self) config_param :enable_log, :integer, :default => 0 config_param :log_path, :string, :default => "/var/opt/microsoft/docker-cimprov/log/filter_telegraf2mdm.log" @@ -64,7 +66,7 @@ def filter(tag, time, record) end def filter_stream(tag, es) - new_es = MultiEventStream.new + new_es = Fluent::MultiEventStream.new begin es.each { |time, record| filtered_records = filter(tag, time, record) diff --git a/source/plugins/ruby/health/health_container_cpu_memory_record_formatter.rb b/source/plugins/ruby/health/health_container_cpu_memory_record_formatter.rb index 12c72a120..ebf3abd7e 100644 --- a/source/plugins/ruby/health/health_container_cpu_memory_record_formatter.rb +++ b/source/plugins/ruby/health/health_container_cpu_memory_record_formatter.rb @@ -17,10 +17,10 @@ def initialize def get_record_from_cadvisor_record(cadvisor_record) begin - instance_name = cadvisor_record['DataItems'][0]['InstanceName'] - counter_name = cadvisor_record['DataItems'][0]['Collections'][0]['CounterName'] - metric_value = cadvisor_record['DataItems'][0]['Collections'][0]['Value'] - timestamp = cadvisor_record['DataItems'][0]['Timestamp'] + instance_name = cadvisor_record['InstanceName'] + counter_name = JSON.parse(cadvisor_record['json_Collections'])[0]['CounterName'] + metric_value = JSON.parse(cadvisor_record['json_Collections'])[0]['Value'] + timestamp = cadvisor_record['Timestamp'] health_container_cpu_memory_record = @@health_container_cpu_memory_record_template % { instance_name: instance_name, diff --git a/source/plugins/ruby/health/health_monitor_utils.rb b/source/plugins/ruby/health/health_monitor_utils.rb index c23d8824a..f6e23de8d 100644 --- a/source/plugins/ruby/health/health_monitor_utils.rb +++ b/source/plugins/ruby/health/health_monitor_utils.rb @@ -171,8 +171,9 @@ def get_cluster_cpu_memory_capacity(log, node_inventory: nil) cpu_capacity_json = KubernetesApiClient.parseNodeLimits(node_inventory, "capacity", "cpu", "cpuCapacityNanoCores") if !cpu_capacity_json.nil? cpu_capacity_json.each do |cpu_capacity_node| - if !cpu_capacity_node['DataItems'][0]['Collections'][0]['Value'].to_s.nil? - cluster_cpu_capacity += cpu_capacity_node['DataItems'][0]['Collections'][0]['Value'] + metricVal = JSON.parse(cpu_capacity_node['json_Collections'])[0]['Value'] + if !metricVal.to_s.nil? + cluster_cpu_capacity += metricVal end end else @@ -181,8 +182,9 @@ def get_cluster_cpu_memory_capacity(log, node_inventory: nil) memory_capacity_json = KubernetesApiClient.parseNodeLimits(node_inventory, "capacity", "memory", "memoryCapacityBytes") if !memory_capacity_json.nil? memory_capacity_json.each do |memory_capacity_node| - if !memory_capacity_node['DataItems'][0]['Collections'][0]['Value'].to_s.nil? - cluster_memory_capacity += memory_capacity_node['DataItems'][0]['Collections'][0]['Value'] + metricVal = JSON.parse(memory_capacity_node['json_Collections'])[0]['Value'] + if !metricVal.to_s.nil? + cluster_memory_capacity += metricVal end end else diff --git a/source/plugins/ruby/in_cadvisor_perf.rb b/source/plugins/ruby/in_cadvisor_perf.rb index b706ff00a..cfad9e393 100644 --- a/source/plugins/ruby/in_cadvisor_perf.rb +++ b/source/plugins/ruby/in_cadvisor_perf.rb @@ -1,10 +1,11 @@ #!/usr/local/bin/ruby # frozen_string_literal: true +require 'fluent/plugin/input' -module Fluent +module Fluent::Plugin class CAdvisor_Perf_Input < Input - Plugin.register_input("cadvisorperf", self) + Fluent::Plugin.register_input("cadvisor_perf", self) def initialize super @@ -15,14 +16,15 @@ def initialize require_relative "CAdvisorMetricsAPIClient" require_relative "oms_common" require_relative "omslog" - require_relative "constants" + require_relative "constants" end config_param :run_interval, :time, :default => 60 - config_param :tag, :string, :default => "oms.api.cadvisorperf" + config_param :tag, :string, :default => "oneagent.containerInsights.LINUX_PERF_BLOB" config_param :mdmtag, :string, :default => "mdm.cadvisorperf" config_param :nodehealthtag, :string, :default => "kubehealth.DaemonSet.Node" config_param :containerhealthtag, :string, :default => "kubehealth.DaemonSet.Container" + config_param :insightsmetricstag, :string, :default => "oneagent.containerInsights.INSIGHTS_METRICS_BLOB" def configure(conf) super @@ -30,6 +32,7 @@ def configure(conf) def start if @run_interval + super @finished = false @condition = ConditionVariable.new @mutex = Mutex.new @@ -44,6 +47,7 @@ def shutdown @condition.signal } @thread.join + super # This super must be at the end of shutdown method end end @@ -53,15 +57,13 @@ def enumerate() batchTime = currentTime.utc.iso8601 @@istestvar = ENV["ISTEST"] begin - eventStream = MultiEventStream.new - insightsMetricsEventStream = MultiEventStream.new + eventStream = Fluent::MultiEventStream.new + insightsMetricsEventStream = Fluent::MultiEventStream.new metricData = CAdvisorMetricsAPIClient.getMetrics(winNode: nil, metricTime: batchTime ) - metricData.each do |record| - record["DataType"] = "LINUX_PERF_BLOB" - record["IPName"] = "LogManagement" - eventStream.add(time, record) if record - end - + metricData.each do |record| + eventStream.add(Fluent::Engine.now, record) if record + end + router.emit_stream(@tag, eventStream) if eventStream router.emit_stream(@mdmtag, eventStream) if eventStream router.emit_stream(@containerhealthtag, eventStream) if eventStream @@ -75,19 +77,13 @@ def enumerate() #start GPU InsightsMetrics items begin containerGPUusageInsightsMetricsDataItems = [] - containerGPUusageInsightsMetricsDataItems.concat(CAdvisorMetricsAPIClient.getInsightsMetrics(winNode: nil, metricTime: batchTime)) - + containerGPUusageInsightsMetricsDataItems.concat(CAdvisorMetricsAPIClient.getInsightsMetrics(winNode: nil, metricTime: batchTime)) containerGPUusageInsightsMetricsDataItems.each do |insightsMetricsRecord| - wrapper = { - "DataType" => "INSIGHTS_METRICS_BLOB", - "IPName" => "ContainerInsights", - "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }], - } - insightsMetricsEventStream.add(time, wrapper) if wrapper + insightsMetricsEventStream.add(Fluent::Engine.now, insightsMetricsRecord) if insightsMetricsRecord end - router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream + router.emit_stream(@insightsmetricstag, insightsMetricsEventStream) if insightsMetricsEventStream router.emit_stream(@mdmtag, insightsMetricsEventStream) if insightsMetricsEventStream if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0) @@ -135,6 +131,6 @@ def run_periodic @mutex.lock end @mutex.unlock - end + end end # CAdvisor_Perf_Input end # module diff --git a/source/plugins/ruby/in_containerinventory.rb b/source/plugins/ruby/in_containerinventory.rb index c1126aa4e..f4f05f9ce 100644 --- a/source/plugins/ruby/in_containerinventory.rb +++ b/source/plugins/ruby/in_containerinventory.rb @@ -1,9 +1,11 @@ #!/usr/local/bin/ruby # frozen_string_literal: true -module Fluent +require 'fluent/plugin/input' + +module Fluent::Plugin class Container_Inventory_Input < Input - Plugin.register_input("containerinventory", self) + Fluent::Plugin.register_input("containerinventory", self) @@PluginName = "ContainerInventory" @@ -19,7 +21,7 @@ def initialize end config_param :run_interval, :time, :default => 60 - config_param :tag, :string, :default => "oms.containerinsights.containerinventory" + config_param :tag, :string, :default => "oneagent.containerInsights.CONTAINER_INVENTORY_BLOB" def configure(conf) super @@ -27,6 +29,7 @@ def configure(conf) def start if @run_interval + super @finished = false @condition = ConditionVariable.new @mutex = Mutex.new @@ -42,17 +45,17 @@ def shutdown @condition.signal } @thread.join + super # This super must be at the end of shutdown method end end def enumerate - currentTime = Time.now - emitTime = currentTime.to_f + currentTime = Time.now batchTime = currentTime.utc.iso8601 containerInventory = Array.new - eventStream = MultiEventStream.new + eventStream = Fluent::MultiEventStream.new hostName = "" - $log.info("in_container_inventory::enumerate : Begin processing @ #{Time.now.utc.iso8601}") + $log.info("in_container_inventory::enumerate : Begin processing @ #{Time.now.utc.iso8601}") begin containerRuntimeEnv = ENV["CONTAINER_RUNTIME"] $log.info("in_container_inventory::enumerate : container runtime : #{containerRuntimeEnv}") @@ -89,13 +92,8 @@ def enumerate end end end - containerInventory.each do |record| - wrapper = { - "DataType" => "CONTAINER_INVENTORY_BLOB", - "IPName" => "ContainerInsights", - "DataItems" => [record.each { |k, v| record[k] = v }], - } - eventStream.add(emitTime, wrapper) if wrapper + containerInventory.each do |record| + eventStream.add(Fluent::Engine.now, record) if record end router.emit_stream(@tag, eventStream) if eventStream @@istestvar = ENV["ISTEST"] @@ -149,6 +147,6 @@ def run_periodic @mutex.lock end @mutex.unlock - end + end end # Container_Inventory_Input end # module diff --git a/source/plugins/ruby/in_kube_events.rb b/source/plugins/ruby/in_kube_events.rb index f50019a01..6bad35096 100644 --- a/source/plugins/ruby/in_kube_events.rb +++ b/source/plugins/ruby/in_kube_events.rb @@ -1,9 +1,11 @@ #!/usr/local/bin/ruby # frozen_string_literal: true -module Fluent +require 'fluent/plugin/input' + +module Fluent::Plugin class Kube_Event_Input < Input - Plugin.register_input("kubeevents", self) + Fluent::Plugin.register_input("kube_events", self) @@KubeEventsStateFile = "/var/opt/microsoft/docker-cimprov/state/KubeEventQueryState.yaml" def initialize @@ -29,14 +31,15 @@ def initialize end config_param :run_interval, :time, :default => 60 - config_param :tag, :string, :default => "oms.containerinsights.KubeEvents" + config_param :tag, :string, :default => "oneagent.containerInsights.KUBE_EVENTS_BLOB" def configure(conf) super end - def start + def start if @run_interval + super if !ENV["EVENTS_CHUNK_SIZE"].nil? && !ENV["EVENTS_CHUNK_SIZE"].empty? && ENV["EVENTS_CHUNK_SIZE"].to_i > 0 @EVENTS_CHUNK_SIZE = ENV["EVENTS_CHUNK_SIZE"].to_i else @@ -70,6 +73,7 @@ def shutdown @condition.signal } @thread.join + super end end @@ -80,8 +84,8 @@ def enumerate batchTime = currentTime.utc.iso8601 eventQueryState = getEventQueryState newEventQueryState = [] - @eventsCount = 0 - + @eventsCount = 0 + # Initializing continuation token to nil continuationToken = nil $log.info("in_kube_events::enumerate : Getting events from Kube API @ #{Time.now.utc.iso8601}") @@ -127,11 +131,10 @@ def enumerate end # end enumerate def parse_and_emit_records(events, eventQueryState, newEventQueryState, batchTime = Time.utc.iso8601) - currentTime = Time.now - emitTime = currentTime.to_f + currentTime = Time.now @@istestvar = ENV["ISTEST"] begin - eventStream = MultiEventStream.new + eventStream = Fluent::MultiEventStream.new events["items"].each do |items| record = {} # - Not sure if ingestion has the below mapping for this custom type. Fix it as part of fixed type conversion @@ -162,13 +165,8 @@ def parse_and_emit_records(events, eventQueryState, newEventQueryState, batchTim record["Count"] = items["count"] record["Computer"] = nodeName record["ClusterName"] = KubernetesApiClient.getClusterName - record["ClusterId"] = KubernetesApiClient.getClusterId - wrapper = { - "DataType" => "KUBE_EVENTS_BLOB", - "IPName" => "ContainerInsights", - "DataItems" => [record.each { |k, v| record[k] = v }], - } - eventStream.add(emitTime, wrapper) if wrapper + record["ClusterId"] = KubernetesApiClient.getClusterId + eventStream.add(Fluent::Engine.now, record) if record @eventsCount += 1 end router.emit_stream(@tag, eventStream) if eventStream diff --git a/source/plugins/ruby/in_kube_health.rb b/source/plugins/ruby/in_kube_health.rb index 874be26f6..10da8d33d 100644 --- a/source/plugins/ruby/in_kube_health.rb +++ b/source/plugins/ruby/in_kube_health.rb @@ -1,17 +1,19 @@ #!/usr/local/bin/ruby # frozen_string_literal: true +require 'fluent/plugin/input' + require_relative "KubernetesApiClient" require_relative "oms_common" require_relative "omslog" require_relative "ApplicationInsightsUtility" -module Fluent +module Fluent::Plugin Dir[File.join(__dir__, "./health", "*.rb")].each { |file| require file } class KubeHealthInput < Input include HealthModel - Plugin.register_input("kubehealth", self) + Fluent::Plugin.register_input("kube_health", self) config_param :health_monitor_config_path, :default => "/etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json" @@ -46,6 +48,7 @@ def configure(conf) def start begin + super if @run_interval @finished = false @condition = ConditionVariable.new @@ -76,20 +79,20 @@ def shutdown @condition.signal } @thread.join + super # This super must be at the end of shutdown method end end def enumerate if !@@cluster_health_model_enabled @@hmlog.info "Cluster Health Model disabled in in_kube_health" - return MultiEventStream.new + return Fluent::MultiEventStream.new end begin - currentTime = Time.now - emitTime = currentTime.to_f + currentTime = Time.now batchTime = currentTime.utc.iso8601 health_monitor_records = [] - eventStream = MultiEventStream.new + eventStream = Fluent::MultiEventStream.new #HealthMonitorUtils.refresh_kubernetes_api_data(@@hmlog, nil) # we do this so that if the call fails, we get a response code/header etc. @@ -156,7 +159,7 @@ def enumerate end health_monitor_records.each do |record| - eventStream.add(emitTime, record) + eventStream.add(Fluent::Engine.now, record) end router.emit_stream(@tag, eventStream) if eventStream rescue => errorStr diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb index 99e804302..8a5f99f43 100644 --- a/source/plugins/ruby/in_kube_nodes.rb +++ b/source/plugins/ruby/in_kube_nodes.rb @@ -1,17 +1,17 @@ #!/usr/local/bin/ruby # frozen_string_literal: true -module Fluent - class Kube_nodeInventory_Input < Input - Plugin.register_input("kubenodeinventory", self) +require 'fluent/plugin/input' - @@ContainerNodeInventoryTag = "oms.containerinsights.ContainerNodeInventory" - @@MDMKubeNodeInventoryTag = "mdm.kubenodeinventory" +module Fluent::Plugin + class Kube_nodeInventory_Input < Input + Fluent::Plugin.register_input("kube_nodes", self) + @@configMapMountPath = "/etc/config/settings/log-data-collection-settings" @@promConfigMountPath = "/etc/config/settings/prometheus-data-collection-settings" @@osmConfigMountPath = "/etc/config/osm-settings/osm-metric-collection-configuration" @@AzStackCloudFileName = "/etc/kubernetes/host/azurestackcloud.json" - @@kubeperfTag = "oms.api.KubePerf" + @@rsPromInterval = ENV["TELEMETRY_RS_PROM_INTERVAL"] @@rsPromFieldPassCount = ENV["TELEMETRY_RS_PROM_FIELDPASS_LENGTH"] @@ -35,7 +35,13 @@ def initialize require_relative "KubernetesApiClient" require_relative "ApplicationInsightsUtility" require_relative "oms_common" - require_relative "omslog" + require_relative "omslog" + + @ContainerNodeInventoryTag = "oneagent.containerInsights.CONTAINER_NODE_INVENTORY_BLOB" + @insightsMetricsTag = "oneagent.containerInsights.INSIGHTS_METRICS_BLOB" + @MDMKubeNodeInventoryTag = "mdm.kubenodeinventory" + @kubeperfTag = "oneagent.containerInsights.LINUX_PERF_BLOB" + # refer tomlparser-agent-config for the defaults @NODES_CHUNK_SIZE = 0 @NODES_EMIT_STREAM_BATCH_SIZE = 0 @@ -48,14 +54,15 @@ def initialize end config_param :run_interval, :time, :default => 60 - config_param :tag, :string, :default => "oms.containerinsights.KubeNodeInventory" + config_param :tag, :string, :default => "oneagent.containerInsights.KUBE_NODE_INVENTORY_BLOB" def configure(conf) super end - def start + def start if @run_interval + super if !ENV["NODES_CHUNK_SIZE"].nil? && !ENV["NODES_CHUNK_SIZE"].empty? && ENV["NODES_CHUNK_SIZE"].to_i > 0 @NODES_CHUNK_SIZE = ENV["NODES_CHUNK_SIZE"].to_i else @@ -90,6 +97,7 @@ def shutdown @condition.signal } @thread.join + super # This super must be at the end of shutdown method end end @@ -101,8 +109,10 @@ def enumerate @nodesAPIE2ELatencyMs = 0 @nodeInventoryE2EProcessingLatencyMs = 0 - nodeInventoryStartTime = (Time.now.to_f * 1000).to_i + nodeInventoryStartTime = (Time.now.to_f * 1000).to_i + nodesAPIChunkStartTime = (Time.now.to_f * 1000).to_i + # Initializing continuation token to nil continuationToken = nil $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") @@ -151,49 +161,37 @@ def enumerate def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601) begin - currentTime = Time.now - emitTime = currentTime.to_f + currentTime = Time.now telemetrySent = false - eventStream = MultiEventStream.new - containerNodeInventoryEventStream = MultiEventStream.new - insightsMetricsEventStream = MultiEventStream.new - kubePerfEventStream = MultiEventStream.new + eventStream = Fluent::MultiEventStream.new + containerNodeInventoryEventStream = Fluent::MultiEventStream.new + insightsMetricsEventStream = Fluent::MultiEventStream.new + kubePerfEventStream = Fluent::MultiEventStream.new @@istestvar = ENV["ISTEST"] #get node inventory nodeInventory["items"].each do |item| # node inventory nodeInventoryRecord = getNodeInventoryRecord(item, batchTime) - wrapper = { - "DataType" => "KUBE_NODE_INVENTORY_BLOB", - "IPName" => "ContainerInsights", - "DataItems" => [nodeInventoryRecord.each { |k, v| nodeInventoryRecord[k] = v }], - } - eventStream.add(emitTime, wrapper) if wrapper + eventStream.add(Fluent::Engine.now, nodeInventoryRecord) if nodeInventoryRecord if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && eventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE $log.info("in_kube_node::parse_and_emit_records: number of node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}") router.emit_stream(@tag, eventStream) if eventStream $log.info("in_kube_node::parse_and_emit_records: number of mdm node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}") - router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream - + router.emit_stream(@MDMKubeNodeInventoryTag, eventStream) if eventStream if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") end - eventStream = MultiEventStream.new + eventStream = Fluent::MultiEventStream.new end # container node inventory - containerNodeInventoryRecord = getContainerNodeInventoryRecord(item, batchTime) - containerNodeInventoryWrapper = { - "DataType" => "CONTAINER_NODE_INVENTORY_BLOB", - "IPName" => "ContainerInsights", - "DataItems" => [containerNodeInventoryRecord.each { |k, v| containerNodeInventoryRecord[k] = v }], - } - containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper + containerNodeInventoryRecord = getContainerNodeInventoryRecord(item, batchTime) + containerNodeInventoryEventStream.add(Fluent::Engine.now, containerNodeInventoryRecord) if containerNodeInventoryRecord if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && containerNodeInventoryEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE $log.info("in_kube_node::parse_and_emit_records: number of container node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}") - router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream - containerNodeInventoryEventStream = MultiEventStream.new + router.emit_stream(@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream + containerNodeInventoryEventStream = Fluent::MultiEventStream.new if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) $log.info("containerNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") end @@ -223,7 +221,8 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601) nodeMetricRecords.push(nodeMetricRecord) # add data to the cache so filter_cadvisor2mdm.rb can use it if is_windows_node - @NodeCache.cpu.set_capacity(nodeMetricRecord["DataItems"][0]["Host"], nodeMetricRecord["DataItems"][0]["Collections"][0]["Value"]) + metricVal = JSON.parse(nodeMetricRecord["json_Collections"])[0]["Value"] + @NodeCache.cpu.set_capacity(nodeMetricRecord["Host"], metricVal) end end nodeMetricRecord = KubernetesApiClient.parseNodeLimitsFromNodeItem(item, "capacity", "memory", "memoryCapacityBytes", batchTime) @@ -231,18 +230,17 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601) nodeMetricRecords.push(nodeMetricRecord) # add data to the cache so filter_cadvisor2mdm.rb can use it if is_windows_node - @NodeCache.mem.set_capacity(nodeMetricRecord["DataItems"][0]["Host"], nodeMetricRecord["DataItems"][0]["Collections"][0]["Value"]) + metricVal = JSON.parse(nodeMetricRecord["json_Collections"])[0]["Value"] + @NodeCache.mem.set_capacity(nodeMetricRecord["Host"], metricVal) end end - nodeMetricRecords.each do |metricRecord| - metricRecord["DataType"] = "LINUX_PERF_BLOB" - metricRecord["IPName"] = "LogManagement" - kubePerfEventStream.add(emitTime, metricRecord) if metricRecord + nodeMetricRecords.each do |metricRecord| + kubePerfEventStream.add(Fluent::Engine.now, metricRecord) if metricRecord end if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && kubePerfEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE $log.info("in_kube_nodes::parse_and_emit_records: number of node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}") - router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream - kubePerfEventStream = MultiEventStream.new + router.emit_stream(@kubeperfTag, kubePerfEventStream) if kubePerfEventStream + kubePerfEventStream = Fluent::MultiEventStream.new if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) $log.info("kubeNodePerfEmitStreamSuccess @ #{Time.now.utc.iso8601}") end @@ -266,18 +264,13 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601) if !insightsMetricsRecord.nil? && !insightsMetricsRecord.empty? nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord) end - nodeGPUInsightsMetricsRecords.each do |insightsMetricsRecord| - wrapper = { - "DataType" => "INSIGHTS_METRICS_BLOB", - "IPName" => "ContainerInsights", - "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }], - } - insightsMetricsEventStream.add(emitTime, wrapper) if wrapper + nodeGPUInsightsMetricsRecords.each do |insightsMetricsRecord| + insightsMetricsEventStream.add(Fluent::Engine.now, insightsMetricsRecord) if insightsMetricsRecord end if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && insightsMetricsEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE $log.info("in_kube_nodes::parse_and_emit_records: number of GPU node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}") - router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream - insightsMetricsEventStream = MultiEventStream.new + router.emit_stream(@insightsMetricsTag, insightsMetricsEventStream) if insightsMetricsEventStream + insightsMetricsEventStream = Fluent::MultiEventStream.new if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) $log.info("kubeNodeInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}") end @@ -337,15 +330,15 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601) $log.info("in_kube_node::parse_and_emit_records: number of node inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}") router.emit_stream(@tag, eventStream) if eventStream $log.info("in_kube_node::parse_and_emit_records: number of mdm node inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}") - router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream + router.emit_stream(@MDMKubeNodeInventoryTag, eventStream) if eventStream if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") end - eventStream = nil + eventStream = nil end if containerNodeInventoryEventStream.count > 0 $log.info("in_kube_node::parse_and_emit_records: number of container node inventory records emitted #{containerNodeInventoryEventStream.count} @ #{Time.now.utc.iso8601}") - router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream + router.emit_stream(@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream containerNodeInventoryEventStream = nil if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) $log.info("containerNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") @@ -354,7 +347,7 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601) if kubePerfEventStream.count > 0 $log.info("in_kube_nodes::parse_and_emit_records: number of node perf metric records emitted #{kubePerfEventStream.count} @ #{Time.now.utc.iso8601}") - router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream + router.emit_stream(@kubeperfTag, kubePerfEventStream) if kubePerfEventStream kubePerfEventStream = nil if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) $log.info("kubeNodePerfInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") @@ -362,7 +355,7 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601) end if insightsMetricsEventStream.count > 0 $log.info("in_kube_nodes::parse_and_emit_records: number of GPU node perf metric records emitted #{insightsMetricsEventStream.count} @ #{Time.now.utc.iso8601}") - router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream + router.emit_stream(@insightsMetricsTag, insightsMetricsEventStream) if insightsMetricsEventStream insightsMetricsEventStream = nil if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) $log.info("kubeNodeInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}") @@ -513,10 +506,8 @@ def getNodeTelemetryProps(item) $log.warn "in_kube_nodes::getContainerNodeIngetNodeTelemetryPropsventoryRecord:Failed: #{errorStr}" end return properties - end + end end # Kube_Node_Input - - class NodeStatsCache # inner class for caching implementation (CPU and memory caching is handled the exact same way, so logic to do so is moved to a private inner class) # (to reduce code duplication) @@ -586,6 +577,5 @@ def cpu() def mem() return @@memCache end - end - + end end # module diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb index 5256eb159..40b3934d3 100644 --- a/source/plugins/ruby/in_kube_podinventory.rb +++ b/source/plugins/ruby/in_kube_podinventory.rb @@ -1,16 +1,17 @@ #!/usr/local/bin/ruby # frozen_string_literal: true -module Fluent +require 'fluent/plugin/input' + +module Fluent::Plugin require_relative "podinventory_to_mdm" class Kube_PodInventory_Input < Input - Plugin.register_input("kubepodinventory", self) + Fluent::Plugin.register_input("kube_podinventory", self) @@MDMKubePodInventoryTag = "mdm.kubepodinventory" @@hostName = (OMS::Common.get_hostname) - @@kubeperfTag = "oms.api.KubePerf" - @@kubeservicesTag = "oms.containerinsights.KubeServices" + def initialize super @@ -38,19 +39,25 @@ def initialize @winContainerCount = 0 @controllerData = {} @podInventoryE2EProcessingLatencyMs = 0 - @podsAPIE2ELatencyMs = 0 + @podsAPIE2ELatencyMs = 0 + + @kubeperfTag = "oneagent.containerInsights.LINUX_PERF_BLOB" + @kubeservicesTag = "oneagent.containerInsights.KUBE_SERVICES_BLOB" + @containerInventoryTag = "oneagent.containerInsights.CONTAINER_INVENTORY_BLOB" + @insightsMetricsTag = "oneagent.containerInsights.INSIGHTS_METRICS_BLOB" end config_param :run_interval, :time, :default => 60 - config_param :tag, :string, :default => "oms.containerinsights.KubePodInventory" + config_param :tag, :string, :default => "oneagent.containerInsights.KUBE_POD_INVENTORY_BLOB" def configure(conf) super @inventoryToMdmConvertor = Inventory2MdmConvertor.new() end - def start + def start if @run_interval + super if !ENV["PODS_CHUNK_SIZE"].nil? && !ENV["PODS_CHUNK_SIZE"].empty? && ENV["PODS_CHUNK_SIZE"].to_i > 0 @PODS_CHUNK_SIZE = ENV["PODS_CHUNK_SIZE"].to_i else @@ -58,7 +65,7 @@ def start $log.warn("in_kube_podinventory::start: setting to default value since got PODS_CHUNK_SIZE nil or empty") @PODS_CHUNK_SIZE = 1000 end - $log.info("in_kube_podinventory::start : PODS_CHUNK_SIZE @ #{@PODS_CHUNK_SIZE}") + $log.info("in_kube_podinventory::start: PODS_CHUNK_SIZE @ #{@PODS_CHUNK_SIZE}") if !ENV["PODS_EMIT_STREAM_BATCH_SIZE"].nil? && !ENV["PODS_EMIT_STREAM_BATCH_SIZE"].empty? && ENV["PODS_EMIT_STREAM_BATCH_SIZE"].to_i > 0 @PODS_EMIT_STREAM_BATCH_SIZE = ENV["PODS_EMIT_STREAM_BATCH_SIZE"].to_i @@ -67,8 +74,7 @@ def start $log.warn("in_kube_podinventory::start: setting to default value since got PODS_EMIT_STREAM_BATCH_SIZE nil or empty") @PODS_EMIT_STREAM_BATCH_SIZE = 200 end - $log.info("in_kube_podinventory::start : PODS_EMIT_STREAM_BATCH_SIZE @ #{@PODS_EMIT_STREAM_BATCH_SIZE}") - + $log.info("in_kube_podinventory::start: PODS_EMIT_STREAM_BATCH_SIZE @ #{@PODS_EMIT_STREAM_BATCH_SIZE}") @finished = false @condition = ConditionVariable.new @mutex = Mutex.new @@ -84,6 +90,7 @@ def shutdown @condition.signal } @thread.join + super # This super must be at the end of shutdown method end end @@ -100,7 +107,8 @@ def enumerate(podList = nil) batchTime = currentTime.utc.iso8601 serviceRecords = [] @podInventoryE2EProcessingLatencyMs = 0 - podInventoryStartTime = (Time.now.to_f * 1000).to_i + podInventoryStartTime = (Time.now.to_f * 1000).to_i + # Get services first so that we dont need to make a call for very chunk $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}") serviceInfo = KubernetesApiClient.getKubeResourceInfo("services") @@ -189,12 +197,12 @@ def enumerate(podList = nil) end def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batchTime = Time.utc.iso8601) - currentTime = Time.now - emitTime = currentTime.to_f + currentTime = Time.now #batchTime = currentTime.utc.iso8601 - eventStream = MultiEventStream.new - kubePerfEventStream = MultiEventStream.new - insightsMetricsEventStream = MultiEventStream.new + eventStream = Fluent::MultiEventStream.new + containerInventoryStream = Fluent::MultiEventStream.new + kubePerfEventStream = Fluent::MultiEventStream.new + insightsMetricsEventStream = Fluent::MultiEventStream.new @@istestvar = ENV["ISTEST"] begin #begin block start @@ -205,13 +213,8 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc podInventoryRecords = getPodInventoryRecords(item, serviceRecords, batchTime) podInventoryRecords.each do |record| if !record.nil? - wrapper = { - "DataType" => "KUBE_POD_INVENTORY_BLOB", - "IPName" => "ContainerInsights", - "DataItems" => [record.each { |k, v| record[k] = v }], - } - eventStream.add(emitTime, wrapper) if wrapper - @inventoryToMdmConvertor.process_pod_inventory_record(wrapper) + eventStream.add(Fluent::Engine.now, record) if record + @inventoryToMdmConvertor.process_pod_inventory_record(record) end end # Setting this flag to true so that we can send ContainerInventory records for containers @@ -228,13 +231,8 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc # Send container inventory records for containers on windows nodes @winContainerCount += containerInventoryRecords.length containerInventoryRecords.each do |cirecord| - if !cirecord.nil? - ciwrapper = { - "DataType" => "CONTAINER_INVENTORY_BLOB", - "IPName" => "ContainerInsights", - "DataItems" => [cirecord.each { |k, v| cirecord[k] = v }], - } - eventStream.add(emitTime, ciwrapper) if ciwrapper + if !cirecord.nil? + containerInventoryStream.add(Fluent::Engine.now, cirecord) if cirecord end end end @@ -246,7 +244,7 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") end router.emit_stream(@tag, eventStream) if eventStream - eventStream = MultiEventStream.new + eventStream = Fluent::MultiEventStream.new end #container perf records @@ -256,19 +254,17 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(item, "limits", "cpu", "cpuLimitNanoCores", batchTime)) containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(item, "limits", "memory", "memoryLimitBytes", batchTime)) - containerMetricDataItems.each do |record| - record["DataType"] = "LINUX_PERF_BLOB" - record["IPName"] = "LogManagement" - kubePerfEventStream.add(emitTime, record) if record + containerMetricDataItems.each do |record| + kubePerfEventStream.add(Fluent::Engine.now, record) if record end if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && kubePerfEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE $log.info("in_kube_podinventory::parse_and_emit_records: number of container perf records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}") - router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream + router.emit_stream(@kubeperfTag, kubePerfEventStream) if kubePerfEventStream if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) $log.info("kubeContainerPerfEventEmitStreamSuccess @ #{Time.now.utc.iso8601}") end - kubePerfEventStream = MultiEventStream.new + kubePerfEventStream = Fluent::MultiEventStream.new end # container GPU records @@ -277,13 +273,8 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(item, "limits", "nvidia.com/gpu", "containerGpuLimits", batchTime)) containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(item, "requests", "amd.com/gpu", "containerGpuRequests", batchTime)) containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(item, "limits", "amd.com/gpu", "containerGpuLimits", batchTime)) - containerGPUInsightsMetricsDataItems.each do |insightsMetricsRecord| - wrapper = { - "DataType" => "INSIGHTS_METRICS_BLOB", - "IPName" => "ContainerInsights", - "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }], - } - insightsMetricsEventStream.add(emitTime, wrapper) if wrapper + containerGPUInsightsMetricsDataItems.each do |insightsMetricsRecord| + insightsMetricsEventStream.add(Fluent::Engine.now, insightsMetricsRecord) if insightsMetricsRecord end if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && insightsMetricsEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE @@ -291,8 +282,8 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) $log.info("kubePodInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}") end - router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream - insightsMetricsEventStream = MultiEventStream.new + router.emit_stream(@insightsMetricsTag, insightsMetricsEventStream) if insightsMetricsEventStream + insightsMetricsEventStream = Fluent::MultiEventStream.new end end #podInventory block end @@ -305,9 +296,18 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc eventStream = nil end + if containerInventoryStream.count > 0 + $log.info("in_kube_podinventory::parse_and_emit_records: number of windows container inventory records emitted #{containerInventoryStream.count} @ #{Time.now.utc.iso8601}") + router.emit_stream(@containerInventoryTag, containerInventoryStream) if containerInventoryStream + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) + $log.info("kubeWindowsContainerInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") + end + containerInventoryStream = nil + end + if kubePerfEventStream.count > 0 $log.info("in_kube_podinventory::parse_and_emit_records: number of perf records emitted #{kubePerfEventStream.count} @ #{Time.now.utc.iso8601}") - router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream + router.emit_stream(@kubeperfTag, kubePerfEventStream) if kubePerfEventStream kubePerfEventStream = nil if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) $log.info("kubeContainerPerfEventEmitStreamSuccess @ #{Time.now.utc.iso8601}") @@ -316,7 +316,7 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc if insightsMetricsEventStream.count > 0 $log.info("in_kube_podinventory::parse_and_emit_records: number of insights metrics records emitted #{insightsMetricsEventStream.count} @ #{Time.now.utc.iso8601}") - router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream + router.emit_stream(@insightsMetricsTag, insightsMetricsEventStream) if insightsMetricsEventStream if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) $log.info("kubePodInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}") end @@ -327,7 +327,7 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc @log.info "Sending pod inventory mdm records to out_mdm" pod_inventory_mdm_records = @inventoryToMdmConvertor.get_pod_inventory_mdm_records(batchTime) @log.info "pod_inventory_mdm_records.size #{pod_inventory_mdm_records.size}" - mdm_pod_inventory_es = MultiEventStream.new + mdm_pod_inventory_es = Fluent::MultiEventStream.new pod_inventory_mdm_records.each { |pod_inventory_mdm_record| mdm_pod_inventory_es.add(batchTime, pod_inventory_mdm_record) if pod_inventory_mdm_record } if pod_inventory_mdm_records @@ -335,22 +335,17 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc end if continuationToken.nil? # sending kube services inventory records - kubeServicesEventStream = MultiEventStream.new + kubeServicesEventStream = Fluent::MultiEventStream.new serviceRecords.each do |kubeServiceRecord| if !kubeServiceRecord.nil? # adding before emit to reduce memory foot print kubeServiceRecord["ClusterId"] = KubernetesApiClient.getClusterId - kubeServiceRecord["ClusterName"] = KubernetesApiClient.getClusterName - kubeServicewrapper = { - "DataType" => "KUBE_SERVICES_BLOB", - "IPName" => "ContainerInsights", - "DataItems" => [kubeServiceRecord.each { |k, v| kubeServiceRecord[k] = v }], - } - kubeServicesEventStream.add(emitTime, kubeServicewrapper) if kubeServicewrapper + kubeServiceRecord["ClusterName"] = KubernetesApiClient.getClusterName + kubeServicesEventStream.add(Fluent::Engine.now, kubeServiceRecord) if kubeServiceRecord if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && kubeServicesEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE $log.info("in_kube_podinventory::parse_and_emit_records: number of service records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}") - router.emit_stream(@@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream - kubeServicesEventStream = MultiEventStream.new + router.emit_stream(@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream + kubeServicesEventStream = Fluent::MultiEventStream.new if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) $log.info("kubeServicesEventEmitStreamSuccess @ #{Time.now.utc.iso8601}") end @@ -360,7 +355,7 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc if kubeServicesEventStream.count > 0 $log.info("in_kube_podinventory::parse_and_emit_records : number of service records emitted #{kubeServicesEventStream.count} @ #{Time.now.utc.iso8601}") - router.emit_stream(@@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream + router.emit_stream(@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) $log.info("kubeServicesEventEmitStreamSuccess @ #{Time.now.utc.iso8601}") end @@ -652,6 +647,6 @@ def getServiceNameFromLabels(namespace, labels, serviceRecords) ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end return serviceName - end + end end # Kube_Pod_Input end # module diff --git a/source/plugins/ruby/in_kube_pvinventory.rb b/source/plugins/ruby/in_kube_pvinventory.rb index 4efe86f61..fbd86787e 100644 --- a/source/plugins/ruby/in_kube_pvinventory.rb +++ b/source/plugins/ruby/in_kube_pvinventory.rb @@ -1,6 +1,11 @@ -module Fluent +#!/usr/local/bin/ruby +# frozen_string_literal: true + +require 'fluent/plugin/input' + +module Fluent::Plugin class Kube_PVInventory_Input < Input - Plugin.register_input("kubepvinventory", self) + Fluent::Plugin.register_input("kube_pvinventory", self) @@hostName = (OMS::Common.get_hostname) @@ -22,14 +27,15 @@ def initialize end config_param :run_interval, :time, :default => 60 - config_param :tag, :string, :default => "oms.containerinsights.KubePVInventory" + config_param :tag, :string, :default => "oneagent.containerInsights.KUBE_PV_INVENTORY_BLOB" def configure(conf) super end - def start + def start if @run_interval + super @finished = false @condition = ConditionVariable.new @mutex = Mutex.new @@ -45,6 +51,7 @@ def shutdown @condition.signal } @thread.join + super end end @@ -54,7 +61,7 @@ def enumerate telemetryFlush = false @pvTypeToCountHash = {} currentTime = Time.now - batchTime = currentTime.utc.iso8601 + batchTime = currentTime.utc.iso8601 continuationToken = nil $log.info("in_kube_pvinventory::enumerate : Getting PVs from Kube API @ #{Time.now.utc.iso8601}") @@ -103,9 +110,8 @@ def enumerate end # end enumerate def parse_and_emit_records(pvInventory, batchTime = Time.utc.iso8601) - currentTime = Time.now - emitTime = currentTime.to_f - eventStream = MultiEventStream.new + currentTime = Time.now + eventStream = Fluent::MultiEventStream.new @@istestvar = ENV["ISTEST"] begin records = [] @@ -145,13 +151,8 @@ def parse_and_emit_records(pvInventory, batchTime = Time.utc.iso8601) end records.each do |record| - if !record.nil? - wrapper = { - "DataType" => "KUBE_PV_INVENTORY_BLOB", - "IPName" => "ContainerInsights", - "DataItems" => [record.each { |k, v| record[k] = v }], - } - eventStream.add(emitTime, wrapper) if wrapper + if !record.nil? + eventStream.add(Fluent::Engine.now, record) end end @@ -250,7 +251,6 @@ def run_periodic @mutex.lock end @mutex.unlock - end - + end end # Kube_PVInventory_Input end # module \ No newline at end of file diff --git a/source/plugins/ruby/in_kubestate_deployments.rb b/source/plugins/ruby/in_kubestate_deployments.rb index 27e4709a2..d17830cd5 100644 --- a/source/plugins/ruby/in_kubestate_deployments.rb +++ b/source/plugins/ruby/in_kubestate_deployments.rb @@ -1,9 +1,11 @@ #!/usr/local/bin/ruby # frozen_string_literal: true -module Fluent +require 'fluent/plugin/input' + +module Fluent::Plugin class Kube_Kubestate_Deployments_Input < Input - Plugin.register_input("kubestatedeployments", self) + Fluent::Plugin.register_input("kubestate_deployments", self) @@istestvar = ENV["ISTEST"] # telemetry - To keep telemetry cost reasonable, we keep track of the max deployments over a period of 15m @@deploymentsCount = 0 @@ -36,14 +38,15 @@ def initialize end config_param :run_interval, :time, :default => 60 - config_param :tag, :string, :default => Constants::INSIGHTSMETRICS_FLUENT_TAG + config_param :tag, :string, :default => "oneagent.containerInsights.INSIGHTS_METRICS_BLOB" def configure(conf) super end - def start + def start if @run_interval + super if !ENV["DEPLOYMENTS_CHUNK_SIZE"].nil? && !ENV["DEPLOYMENTS_CHUNK_SIZE"].empty? && ENV["DEPLOYMENTS_CHUNK_SIZE"].to_i > 0 @DEPLOYMENTS_CHUNK_SIZE = ENV["DEPLOYMENTS_CHUNK_SIZE"].to_i else @@ -52,11 +55,11 @@ def start @DEPLOYMENTS_CHUNK_SIZE = 500 end $log.info("in_kubestate_deployments::start : DEPLOYMENTS_CHUNK_SIZE @ #{@DEPLOYMENTS_CHUNK_SIZE}") - + @finished = false @condition = ConditionVariable.new @mutex = Mutex.new - @thread = Thread.new(&method(:run_periodic)) + @thread = Thread.new(&method(:run_periodic)) end end @@ -67,6 +70,7 @@ def shutdown @condition.signal } @thread.join + super # This super must be at the end of shutdown method end end @@ -77,8 +81,8 @@ def enumerate batchTime = currentTime.utc.iso8601 #set the running total for this batch to 0 - @deploymentsRunningTotal = 0 - + @deploymentsRunningTotal = 0 + # Initializing continuation token to nil continuationToken = nil $log.info("in_kubestate_deployments::enumerate : Getting deployments from Kube API @ #{Time.now.utc.iso8601}") @@ -126,7 +130,7 @@ def enumerate def parse_and_emit_records(deployments, batchTime = Time.utc.iso8601) metricItems = [] - insightsMetricsEventStream = MultiEventStream.new + insightsMetricsEventStream = Fluent::MultiEventStream.new begin metricInfo = deployments metricInfo["items"].each do |deployment| @@ -182,16 +186,11 @@ def parse_and_emit_records(deployments, batchTime = Time.utc.iso8601) end time = Time.now.to_f - metricItems.each do |insightsMetricsRecord| - wrapper = { - "DataType" => "INSIGHTS_METRICS_BLOB", - "IPName" => "ContainerInsights", - "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }], - } - insightsMetricsEventStream.add(time, wrapper) if wrapper + metricItems.each do |insightsMetricsRecord| + insightsMetricsEventStream.add(Fluent::Engine.now, insightsMetricsRecord) if insightsMetricsRecord end - router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream + router.emit_stream(@tag, insightsMetricsEventStream) if insightsMetricsEventStream $log.info("successfully emitted #{metricItems.length()} kube_state_deployment metrics") @deploymentsRunningTotal = @deploymentsRunningTotal + metricItems.length() @@ -234,6 +233,6 @@ def run_periodic @mutex.lock end @mutex.unlock - end + end end end diff --git a/source/plugins/ruby/in_kubestate_hpa.rb b/source/plugins/ruby/in_kubestate_hpa.rb index afecf8e3b..833d1a0ae 100644 --- a/source/plugins/ruby/in_kubestate_hpa.rb +++ b/source/plugins/ruby/in_kubestate_hpa.rb @@ -1,9 +1,11 @@ #!/usr/local/bin/ruby # frozen_string_literal: true -module Fluent +require 'fluent/plugin/input' + +module Fluent::Plugin class Kube_Kubestate_HPA_Input < Input - Plugin.register_input("kubestatehpa", self) + Fluent::Plugin.register_input("kubestate_hpa", self) @@istestvar = ENV["ISTEST"] def initialize @@ -16,7 +18,7 @@ def initialize require_relative "oms_common" require_relative "omslog" require_relative "ApplicationInsightsUtility" - require_relative "constants" + require_relative "constants" # refer tomlparser-agent-config for defaults # this configurable via configmap @@ -33,14 +35,15 @@ def initialize end config_param :run_interval, :time, :default => 60 - config_param :tag, :string, :default => Constants::INSIGHTSMETRICS_FLUENT_TAG + config_param :tag, :string, :default => "oneagent.containerInsights.INSIGHTS_METRICS_BLOB" def configure(conf) super end - def start + def start if @run_interval + super if !ENV["HPA_CHUNK_SIZE"].nil? && !ENV["HPA_CHUNK_SIZE"].empty? && ENV["HPA_CHUNK_SIZE"].to_i > 0 @HPA_CHUNK_SIZE = ENV["HPA_CHUNK_SIZE"].to_i else @@ -64,6 +67,7 @@ def shutdown @condition.signal } @thread.join + super end end @@ -74,7 +78,7 @@ def enumerate batchTime = currentTime.utc.iso8601 @hpaCount = 0 - + # Initializing continuation token to nil continuationToken = nil $log.info("in_kubestate_hpa::enumerate : Getting HPAs from Kube API @ #{Time.now.utc.iso8601}") @@ -113,7 +117,7 @@ def enumerate def parse_and_emit_records(hpas, batchTime = Time.utc.iso8601) metricItems = [] - insightsMetricsEventStream = MultiEventStream.new + insightsMetricsEventStream = Fluent::MultiEventStream.new begin metricInfo = hpas metricInfo["items"].each do |hpa| @@ -182,16 +186,11 @@ def parse_and_emit_records(hpas, batchTime = Time.utc.iso8601) end time = Time.now.to_f - metricItems.each do |insightsMetricsRecord| - wrapper = { - "DataType" => "INSIGHTS_METRICS_BLOB", - "IPName" => "ContainerInsights", - "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }], - } - insightsMetricsEventStream.add(time, wrapper) if wrapper + metricItems.each do |insightsMetricsRecord| + insightsMetricsEventStream.add(Fluent::Engine.now, insightsMetricsRecord) if insightsMetricsRecord end - router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream + router.emit_stream(@tag, insightsMetricsEventStream) if insightsMetricsEventStream $log.info("successfully emitted #{metricItems.length()} kube_state_hpa metrics") if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0) $log.info("kubestatehpaInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}") @@ -232,6 +231,6 @@ def run_periodic @mutex.lock end @mutex.unlock - end + end end end diff --git a/source/plugins/ruby/in_win_cadvisor_perf.rb b/source/plugins/ruby/in_win_cadvisor_perf.rb index 9c267cf4f..61e823ea6 100644 --- a/source/plugins/ruby/in_win_cadvisor_perf.rb +++ b/source/plugins/ruby/in_win_cadvisor_perf.rb @@ -1,9 +1,11 @@ #!/usr/local/bin/ruby # frozen_string_literal: true -module Fluent +require 'fluent/plugin/input' + +module Fluent::Plugin class Win_CAdvisor_Perf_Input < Input - Plugin.register_input("wincadvisorperf", self) + Fluent::Plugin.register_input("win_cadvisor_perf", self) @@winNodes = [] @@ -18,10 +20,11 @@ def initialize require_relative "oms_common" require_relative "omslog" require_relative "constants" + @insightsMetricsTag = "oneagent.containerInsights.INSIGHTS_METRICS_BLOB" end config_param :run_interval, :time, :default => 60 - config_param :tag, :string, :default => "oms.api.wincadvisorperf" + config_param :tag, :string, :default => "oneagent.containerInsights.LINUX_PERF_BLOB" config_param :mdmtag, :string, :default => "mdm.cadvisorperf" def configure(conf) @@ -50,11 +53,11 @@ def shutdown end def enumerate() - time = Time.now.to_f + time = Fluent::Engine.now begin timeDifference = (DateTime.now.to_time.to_i - @@winNodeQueryTimeTracker).abs timeDifferenceInMinutes = timeDifference / 60 - @@istestvar = ENV["ISTEST"] + @@istestvar = ENV["ISTEST"] #Resetting this cache so that it is populated with the current set of containers with every call CAdvisorMetricsAPIClient.resetWinContainerIdCache() @@ -68,12 +71,10 @@ def enumerate() @@winNodeQueryTimeTracker = DateTime.now.to_time.to_i end @@winNodes.each do |winNode| - eventStream = MultiEventStream.new + eventStream = Fluent::MultiEventStream.new metricData = CAdvisorMetricsAPIClient.getMetrics(winNode: winNode, metricTime: Time.now.utc.iso8601) metricData.each do |record| if !record.empty? - record["DataType"] = "LINUX_PERF_BLOB" - record["IPName"] = "LogManagement" eventStream.add(time, record) if record end end @@ -88,18 +89,13 @@ def enumerate() begin containerGPUusageInsightsMetricsDataItems = [] containerGPUusageInsightsMetricsDataItems.concat(CAdvisorMetricsAPIClient.getInsightsMetrics(winNode: winNode, metricTime: Time.now.utc.iso8601)) - insightsMetricsEventStream = MultiEventStream.new + insightsMetricsEventStream = Fluent::MultiEventStream.new containerGPUusageInsightsMetricsDataItems.each do |insightsMetricsRecord| - wrapper = { - "DataType" => "INSIGHTS_METRICS_BLOB", - "IPName" => "ContainerInsights", - "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }], - } - insightsMetricsEventStream.add(time, wrapper) if wrapper + insightsMetricsEventStream.add(time, insightsMetricsRecord) if insightsMetricsRecord end - router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream + router.emit_stream(@insightsMetricsTag, insightsMetricsEventStream) if insightsMetricsEventStream router.emit_stream(@mdmtag, insightsMetricsEventStream) if insightsMetricsEventStream if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0) $log.info("winCAdvisorInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}") diff --git a/source/plugins/ruby/out_health_forward.rb b/source/plugins/ruby/out_health_forward.rb index 6fcfe368b..59eed97da 100644 --- a/source/plugins/ruby/out_health_forward.rb +++ b/source/plugins/ruby/out_health_forward.rb @@ -15,469 +15,593 @@ # limitations under the License. # -require 'base64' -require 'socket' -require 'fileutils' - -require 'cool.io' - require 'fluent/output' require 'fluent/config/error' +require 'fluent/clock' +require 'fluent/tls' +require 'base64' +require 'forwardable' -module Fluent - class ForwardOutputError < StandardError - end - - class ForwardOutputResponseError < ForwardOutputError - end +require 'fluent/compat/socket_util' +require 'fluent/plugin/out_forward/handshake_protocol' +require 'fluent/plugin/out_forward/load_balancer' +require 'fluent/plugin/out_forward/socket_cache' +require 'fluent/plugin/out_forward/failure_detector' +require 'fluent/plugin/out_forward/error' +require 'fluent/plugin/out_forward/connection_manager' +require 'fluent/plugin/out_forward/ack_handler' - class ForwardOutputConnectionClosedError < ForwardOutputError - end +module Fluent::Plugin + class HealthForwardOutput < Output + Fluent::Plugin.register_output('health_forward', self) - class ForwardOutputACKTimeoutError < ForwardOutputResponseError - end + helpers :socket, :server, :timer, :thread, :compat_parameters, :service_discovery - class HealthForwardOutput < ObjectBufferedOutput - Plugin.register_output('health_forward', self) + LISTEN_PORT = 25227 - def initialize - super - require 'fluent/plugin/socket_util' - @nodes = [] #=> [Node] - end + desc 'The transport protocol.' + config_param :transport, :enum, list: [:tcp, :tls], default: :tcp + # TODO: TLS session cache/tickets desc 'The timeout time when sending event logs.' config_param :send_timeout, :time, default: 60 - desc 'The transport protocol to use for heartbeats.(udp,tcp,none)' - config_param :heartbeat_type, default: :udp do |val| - case val.downcase - when 'tcp' - :tcp - when 'udp' - :udp - when 'none' - :none - else - raise ConfigError, "forward output heartbeat type should be 'tcp', 'udp', or 'none'" - end - end + desc 'The timeout time for socket connect' + config_param :connect_timeout, :time, default: nil + # TODO: add linger_timeout, recv_timeout + + desc 'The protocol to use for heartbeats (default is the same with "transport").' + config_param :heartbeat_type, :enum, list: [:transport, :tcp, :udp, :none], default: :transport desc 'The interval of the heartbeat packer.' config_param :heartbeat_interval, :time, default: 1 desc 'The wait time before accepting a server fault recovery.' config_param :recover_wait, :time, default: 10 desc 'The hard timeout used to detect server failure.' config_param :hard_timeout, :time, default: 60 - desc 'Set TTL to expire DNS cache in seconds.' - config_param :expire_dns_cache, :time, default: nil # 0 means disable cache desc 'The threshold parameter used to detect server faults.' config_param :phi_threshold, :integer, default: 16 desc 'Use the "Phi accrual failure detector" to detect server failure.' config_param :phi_failure_detector, :bool, default: true - # if any options added that requires extended forward api, fix @extend_internal_protocol - desc 'Change the protocol to at-least-once.' config_param :require_ack_response, :bool, default: false # require in_forward to respond with ack - desc 'This option is used when require_ack_response is true.' - config_param :ack_response_timeout, :time, default: 190 # 0 means do not wait for ack responses + + ## The reason of default value of :ack_response_timeout: # Linux default tcp_syn_retries is 5 (in many environment) # 3 + 6 + 12 + 24 + 48 + 96 -> 189 (sec) + desc 'This option is used when require_ack_response is true.' + config_param :ack_response_timeout, :time, default: 190 + + desc 'The interval while reading data from server' + config_param :read_interval_msec, :integer, default: 50 # 50ms + desc 'Reading data size from server' + config_param :read_length, :size, default: 512 # 512bytes + + desc 'Set TTL to expire DNS cache in seconds.' + config_param :expire_dns_cache, :time, default: nil # 0 means disable cache desc 'Enable client-side DNS round robin.' config_param :dns_round_robin, :bool, default: false # heartbeat_type 'udp' is not available for this + desc 'Ignore DNS resolution and errors at startup time.' + config_param :ignore_network_errors_at_startup, :bool, default: false + + desc 'Verify that a connection can be made with one of out_forward nodes at the time of startup.' + config_param :verify_connection_at_startup, :bool, default: false + + desc 'Compress buffered data.' + config_param :compress, :enum, list: [:text, :gzip], default: :text + + desc 'The default version of TLS transport.' + config_param :tls_version, :enum, list: Fluent::TLS::SUPPORTED_VERSIONS, default: Fluent::TLS::DEFAULT_VERSION + desc 'The cipher configuration of TLS transport.' + config_param :tls_ciphers, :string, default: Fluent::TLS::CIPHERS_DEFAULT + desc 'Skip all verification of certificates or not.' + config_param :tls_insecure_mode, :bool, default: false + desc 'Allow self signed certificates or not.' + config_param :tls_allow_self_signed_cert, :bool, default: false + desc 'Verify hostname of servers and certificates or not in TLS transport.' + config_param :tls_verify_hostname, :bool, default: true + desc 'The additional CA certificate path for TLS.' + config_param :tls_ca_cert_path, :array, value_type: :string, default: nil + desc 'The additional certificate path for TLS.' + config_param :tls_cert_path, :array, value_type: :string, default: nil + desc 'The client certificate path for TLS.' + config_param :tls_client_cert_path, :string, default: nil + desc 'The client private key path for TLS.' + config_param :tls_client_private_key_path, :string, default: nil + desc 'The client private key passphrase for TLS.' + config_param :tls_client_private_key_passphrase, :string, default: nil, secret: true + desc 'The certificate thumbprint for searching from Windows system certstore.' + config_param :tls_cert_thumbprint, :string, default: nil, secret: true + desc 'The certificate logical store name on Windows system certstore.' + config_param :tls_cert_logical_store_name, :string, default: nil + desc 'Enable to use certificate enterprise store on Windows system certstore.' + config_param :tls_cert_use_enterprise_store, :bool, default: true + desc "Enable keepalive connection." + config_param :keepalive, :bool, default: false + desc "Expired time of keepalive. Default value is nil, which means to keep connection as long as possible" + config_param :keepalive_timeout, :time, default: nil + + config_section :security, required: false, multi: false do + desc 'The hostname' + config_param :self_hostname, :string + desc 'Shared key for authentication' + config_param :shared_key, :string, secret: true + end + + config_section :server, param_name: :servers do + desc "The IP address or host name of the server." + config_param :host, :string + desc "The name of the server. Used for logging and certificate verification in TLS transport (when host is address)." + config_param :name, :string, default: nil + desc "The port number of the host." + config_param :port, :integer, default: LISTEN_PORT + desc "The shared key per server." + config_param :shared_key, :string, default: nil, secret: true + desc "The username for authentication." + config_param :username, :string, default: '' + desc "The password for authentication." + config_param :password, :string, default: '', secret: true + desc "Marks a node as the standby node for an Active-Standby model between Fluentd nodes." + config_param :standby, :bool, default: false + desc "The load balancing weight." + config_param :weight, :integer, default: 60 + end + attr_reader :nodes - config_param :port, :integer, default: DEFAULT_LISTEN_PORT, deprecated: "User host xxx instead." - config_param :host, :string, default: nil, deprecated: "Use port xxx instead." - desc 'Skip network related error, e.g. DNS error, during plugin setup' - config_param :skip_network_error_at_init, :bool, :default => false + config_param :port, :integer, default: LISTEN_PORT, obsoleted: "User section instead." + config_param :host, :string, default: nil, obsoleted: "Use section instead." + config_section :buffer do + config_set_default :chunk_keys, ["tag"] + end - attr_accessor :extend_internal_protocol + attr_reader :read_interval, :recover_sample_size - def configure(conf) + def initialize super - # backward compatibility - if host = conf['host'] - port = conf['port'] - port = port ? port.to_i : DEFAULT_LISTEN_PORT - e = conf.add_element('server') - e['host'] = host - e['port'] = port.to_s - end + @nodes = [] #=> [Node] + @loop = nil + @thread = nil - recover_sample_size = @recover_wait / @heartbeat_interval + @usock = nil + @keep_alive_watcher_interval = 5 # TODO + @suspend_flush = false + end - # add options here if any options addes which uses extended protocol - @extend_internal_protocol = if @require_ack_response - true - else - false - end + def configure(conf) + compat_parameters_convert(conf, :buffer, default_chunk_key: 'tag') - if @dns_round_robin - if @heartbeat_type == :udp - raise ConfigError, "forward output heartbeat type must be 'tcp' or 'none' to use dns_round_robin option" - end - end + super - conf.elements.each {|e| - next if e.name != "server" + unless @chunk_key_tag + raise Fluent::ConfigError, "buffer chunk key must include 'tag' for forward output" + end - host = e['host'] - port = e['port'] - port = port ? port.to_i : DEFAULT_LISTEN_PORT + @read_interval = @read_interval_msec / 1000.0 + @recover_sample_size = @recover_wait / @heartbeat_interval - weight = e['weight'] - weight = weight ? weight.to_i : 60 + if @heartbeat_type == :tcp + log.warn "'heartbeat_type tcp' is deprecated. use 'transport' instead." + @heartbeat_type = :transport + end - standby = !!e['standby'] + if @dns_round_robin && @heartbeat_type == :udp + raise Fluent::ConfigError, "forward output heartbeat type must be 'transport' or 'none' to use dns_round_robin option" + end - name = e['name'] - unless name - name = "#{host}:#{port}" + if @transport == :tls + # socket helper adds CA cert or signed certificate to same cert store internally so unify it in this place. + if @tls_cert_path && !@tls_cert_path.empty? + @tls_ca_cert_path = @tls_cert_path + end + if @tls_ca_cert_path && !@tls_ca_cert_path.empty? + @tls_ca_cert_path.each do |path| + raise Fluent::ConfigError, "specified cert path does not exist:#{path}" unless File.exist?(path) + raise Fluent::ConfigError, "specified cert path is not readable:#{path}" unless File.readable?(path) + end end - failure = FailureDetector.new(@heartbeat_interval, @hard_timeout, Time.now.to_i.to_f) - - node_conf = NodeConfig2.new(name, host, port, weight, standby, failure, - @phi_threshold, recover_sample_size, @expire_dns_cache, @phi_failure_detector, @dns_round_robin, @skip_network_error_at_init) + if @tls_insecure_mode + log.warn "TLS transport is configured in insecure way" + @tls_verify_hostname = false + @tls_allow_self_signed_cert = true + end - if @heartbeat_type == :none - @nodes << NoneHeartbeatNode.new(log, node_conf) + if Fluent.windows? + if (@tls_cert_path || @tls_ca_cert_path) && @tls_cert_logical_store_name + raise Fluent::ConfigError, "specified both cert path and tls_cert_logical_store_name is not permitted" + end else - @nodes << Node.new(log, node_conf) + raise Fluent::ConfigError, "This parameter is for only Windows" if @tls_cert_logical_store_name + raise Fluent::ConfigError, "This parameter is for only Windows" if @tls_cert_thumbprint end - log.info "adding forwarding server '#{name}'", host: host, port: port, weight: weight, plugin_id: plugin_id - } + end + + @ack_handler = @require_ack_response ? AckHandler.new(timeout: @ack_response_timeout, log: @log, read_length: @read_length) : nil + socket_cache = @keepalive ? SocketCache.new(@keepalive_timeout, @log) : nil + @connection_manager = Fluent::Plugin::ForwardOutput::ConnectionManager.new( + log: @log, + secure: !!@security, + connection_factory: method(:create_transfer_socket), + socket_cache: socket_cache, + ) - if @nodes.empty? - raise ConfigError, "forward output plugin requires at least one is required" + configs = [] + + # rewrite for using server as sd_static + conf.elements(name: 'server').each do |s| + s.name = 'service' end - end - def start - super + unless conf.elements(name: 'service').empty? + # To copy `services` element only + new_elem = Fluent::Config::Element.new('static_service_discovery', {}, {}, conf.elements(name: 'service')) + configs << { type: :static, conf: new_elem } + end - @rand_seed = Random.new.seed - rebuild_weight_array - @rr = 0 + conf.elements(name: 'service_discovery').each_with_index do |c, i| + configs << { type: @service_discovery[i][:@type], conf: c } + end - unless @heartbeat_type == :none - @loop = Coolio::Loop.new + service_discovery_create_manager( + :out_forward_service_discovery_watcher, + configurations: configs, + load_balancer: Fluent::Plugin::ForwardOutput::LoadBalancer.new(log), + custom_build_method: method(:build_node), + ) - if @heartbeat_type == :udp - # assuming all hosts use udp - @usock = SocketUtil.create_udp_socket(@nodes.first.host) - @usock.fcntl(Fcntl::F_SETFL, Fcntl::O_NONBLOCK) - @hb = HeartbeatHandler.new(@usock, method(:on_heartbeat)) - @loop.attach(@hb) + discovery_manager.services.each do |server| + # it's only for test + @nodes << server + unless @heartbeat_type == :none + begin + server.validate_host_resolution! + rescue => e + raise unless @ignore_network_errors_at_startup + log.warn "failed to resolve node name when configured", server: (server.name || server.host), error: e + server.disable! + end end + end - @timer = HeartbeatRequestTimer.new(@heartbeat_interval, method(:on_timer)) - @loop.attach(@timer) + unless @as_secondary + if @compress == :gzip && @buffer.compress == :text + @buffer.compress = :gzip + elsif @compress == :text && @buffer.compress == :gzip + log.info "buffer is compressed. If you also want to save the bandwidth of a network, Add `compress` configuration in " + end + end - @thread = Thread.new(&method(:run)) + if discovery_manager.services.empty? + raise Fluent::ConfigError, "forward output plugin requires at least one node is required. Add or " end - end - def shutdown - @finished = true - if @loop - @loop.watchers.each {|w| w.detach } - @loop.stop + if !@keepalive && @keepalive_timeout + log.warn('The value of keepalive_timeout is ignored. if you want to use keepalive, please add `keepalive true` to your conf.') end - @thread.join if @thread - @usock.close if @usock + + raise Fluent::ConfigError, "ack_response_timeout must be a positive integer" if @ack_response_timeout < 1 end - def run - @loop.run if @loop - rescue - log.error "unexpected error", error: $!.to_s - log.error_backtrace + def multi_workers_ready? + true end - def write_objects(tag, chunk) - return if chunk.empty? + def prefer_delayed_commit + @require_ack_response + end - error = nil + def overwrite_delayed_commit_timeout + # Output#start sets @delayed_commit_timeout by @buffer_config.delayed_commit_timeout + # But it should be overwritten by ack_response_timeout to rollback chunks after timeout + if @delayed_commit_timeout != @ack_response_timeout + log.info "delayed_commit_timeout is overwritten by ack_response_timeout" + @delayed_commit_timeout = @ack_response_timeout + 2 # minimum ack_reader IO.select interval is 1s + end + end - wlen = @weight_array.length - wlen.times do - @rr = (@rr + 1) % wlen - node = @weight_array[@rr] + def start + super - if node.available? + unless @heartbeat_type == :none + if @heartbeat_type == :udp + @usock = socket_create_udp(discovery_manager.services.first.host, discovery_manager.services.first.port, nonblock: true) + server_create_udp(:out_forward_heartbeat_receiver, 0, socket: @usock, max_bytes: @read_length, &method(:on_udp_heatbeat_response_recv)) + end + timer_execute(:out_forward_heartbeat_request, @heartbeat_interval, &method(:on_heartbeat_timer)) + end + + if @require_ack_response + overwrite_delayed_commit_timeout + thread_create(:out_forward_receiving_ack, &method(:ack_reader)) + end + + if @verify_connection_at_startup + discovery_manager.services.each do |node| begin - send_data(node, tag, chunk) - return - rescue - # for load balancing during detecting crashed servers - error = $! # use the latest error + node.verify_connection + rescue StandardError => e + log.fatal "forward's connection setting error: #{e.message}" + raise Fluent::UnrecoverableError, e.message end end end - if error - raise error - else - raise "no nodes are available" # TODO message + if @keepalive + timer_execute(:out_forward_keep_alived_socket_watcher, @keep_alive_watcher_interval, &method(:on_purge_obsolete_socks)) end end - private + def close + if @usock + # close socket and ignore errors: this socket will not be used anyway. + @usock.close rescue nil + end - def rebuild_weight_array - standby_nodes, regular_nodes = @nodes.partition {|n| - n.standby? - } + super + end - lost_weight = 0 - regular_nodes.each {|n| - unless n.available? - lost_weight += n.weight - end - } - log.debug "rebuilding weight array", lost_weight: lost_weight - - if lost_weight > 0 - standby_nodes.each {|n| - if n.available? - regular_nodes << n - log.warn "using standby node #{n.host}:#{n.port}", weight: n.weight - lost_weight -= n.weight - break if lost_weight <= 0 - end - } + def stop + super + + if @keepalive + @connection_manager.stop end + end + + def before_shutdown + super + @suspend_flush = true + end + + def after_shutdown + last_ack if @require_ack_response + super + end - weight_array = [] - gcd = regular_nodes.map {|n| n.weight }.inject(0) {|r,w| r.gcd(w) } - regular_nodes.each {|n| - (n.weight / gcd).times { - weight_array << n - } - } + def try_flush + return if @require_ack_response && @suspend_flush + super + end - # for load balancing during detecting crashed servers - coe = (regular_nodes.size * 6) / weight_array.size - weight_array *= coe if coe > 1 + def last_ack + overwrite_delayed_commit_timeout + ack_check(ack_select_interval) + end - r = Random.new(@rand_seed) - weight_array.sort_by! { r.rand } + def write(chunk) + return if chunk.empty? + tag = chunk.metadata.tag - @weight_array = weight_array + discovery_manager.select_service { |node| node.send_data(tag, chunk) } end - # MessagePack FixArray length = 3 (if @extend_internal_protocol) - # = 2 (else) - FORWARD_HEADER = [0x92].pack('C').freeze - FORWARD_HEADER_EXT = [0x93].pack('C').freeze - def forward_header - if @extend_internal_protocol - FORWARD_HEADER_EXT - else - FORWARD_HEADER + def try_write(chunk) + log.trace "writing a chunk to destination", chunk_id: dump_unique_id_hex(chunk.unique_id) + if chunk.empty? + commit_write(chunk.unique_id) + return end + tag = chunk.metadata.tag + discovery_manager.select_service { |node| node.send_data(tag, chunk) } + last_ack if @require_ack_response && @suspend_flush end - #FORWARD_TCP_HEARTBEAT_DATA = FORWARD_HEADER + ''.to_msgpack + [].to_msgpack - def send_heartbeat_tcp(node) - sock = connect(node) - begin - opt = [1, @send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; } - sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt) - opt = [@send_timeout.to_i, 0].pack('L!L!') # struct timeval - # don't send any data to not cause a compatibility problem - #sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt) - #sock.write FORWARD_TCP_HEARTBEAT_DATA - node.heartbeat(true) - ensure - sock.close + def create_transfer_socket(host, port, hostname, &block) + case @transport + when :tls + socket_create_tls( + host, port, + version: @tls_version, + ciphers: @tls_ciphers, + insecure: @tls_insecure_mode, + verify_fqdn: @tls_verify_hostname, + fqdn: hostname, + allow_self_signed_cert: @tls_allow_self_signed_cert, + cert_paths: @tls_ca_cert_path, + cert_path: @tls_client_cert_path, + private_key_path: @tls_client_private_key_path, + private_key_passphrase: @tls_client_private_key_passphrase, + cert_thumbprint: @tls_cert_thumbprint, + cert_logical_store_name: @tls_cert_logical_store_name, + cert_use_enterprise_store: @tls_cert_use_enterprise_store, + + # Enabling SO_LINGER causes tcp port exhaustion on Windows. + # This is because dynamic ports are only 16384 (from 49152 to 65535) and + # expiring SO_LINGER enabled ports should wait 4 minutes + # where set by TcpTimeDelay. Its default value is 4 minutes. + # So, we should disable SO_LINGER on Windows to prevent flood of waiting ports. + linger_timeout: Fluent.windows? ? nil : @send_timeout, + send_timeout: @send_timeout, + recv_timeout: @ack_response_timeout, + connect_timeout: @connect_timeout, + &block + ) + when :tcp + socket_create_tcp( + host, port, + linger_timeout: @send_timeout, + send_timeout: @send_timeout, + recv_timeout: @ack_response_timeout, + connect_timeout: @connect_timeout, + &block + ) + else + raise "BUG: unknown transport protocol #{@transport}" end end - def send_data(node, tag, chunk) - sock = connect(node) - begin - opt = [1, @send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; } - sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt) - - opt = [@send_timeout.to_i, 0].pack('L!L!') # struct timeval - sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt) - - # beginArray(2) - sock.write forward_header - - # writeRaw(tag) - sock.write tag.to_msgpack # tag - - # beginRaw(size) - sz = chunk.size - #if sz < 32 - # # FixRaw - # sock.write [0xa0 | sz].pack('C') - #elsif sz < 65536 - # # raw 16 - # sock.write [0xda, sz].pack('Cn') - #else - # raw 32 - sock.write [0xdb, sz].pack('CN') - #end - - # writeRawBody(packed_es) - chunk.write_to(sock) - - if @extend_internal_protocol - option = {} - option['chunk'] = Base64.encode64(chunk.unique_id) if @require_ack_response - sock.write option.to_msgpack - - if @require_ack_response && @ack_response_timeout > 0 - # Waiting for a response here results in a decrease of throughput because a chunk queue is locked. - # To avoid a decrease of troughput, it is necessary to prepare a list of chunks that wait for responses - # and process them asynchronously. - if IO.select([sock], nil, nil, @ack_response_timeout) - raw_data = sock.recv(1024) - - # When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF. - # If this happens we assume the data wasn't delivered and retry it. - if raw_data.empty? - @log.warn "node #{node.host}:#{node.port} closed the connection. regard it as unavailable." - node.disable! - raise ForwardOutputConnectionClosedError, "node #{node.host}:#{node.port} closed connection" - else - # Serialization type of the response is same as sent data. - res = MessagePack.unpack(raw_data) - - if res['ack'] != option['chunk'] - # Some errors may have occured when ack and chunk id is different, so send the chunk again. - raise ForwardOutputResponseError, "ack in response and chunk id in sent data are different" - end - end - - else - # IO.select returns nil on timeout. - # There are 2 types of cases when no response has been received: - # (1) the node does not support sending responses - # (2) the node does support sending response but responses have not arrived for some reasons. - @log.warn "no response from #{node.host}:#{node.port}. regard it as unavailable." - node.disable! - raise ForwardOutputACKTimeoutError, "node #{node.host}:#{node.port} does not return ACK" - end - end + def statistics + stats = super + services = discovery_manager.services + healthy_nodes_count = 0 + registed_nodes_count = services.size + services.each do |s| + if s.available? + healthy_nodes_count += 1 end - - node.heartbeat(false) - return res # for test - ensure - sock.close end + + stats.merge( + 'healthy_nodes_count' => healthy_nodes_count, + 'registered_nodes_count' => registed_nodes_count, + ) end - def connect(node) - # TODO unix socket? - TCPSocket.new(node.resolved_host, node.port) + # MessagePack FixArray length is 3 + FORWARD_HEADER = [0x93].pack('C').freeze + def forward_header + FORWARD_HEADER end - class HeartbeatRequestTimer < Coolio::TimerWatcher - def initialize(interval, callback) - super(interval, true) - @callback = callback - end + private - def on_timer - @callback.call - rescue - # TODO log? + def build_node(server) + name = server.name || "#{server.host}:#{server.port}" + log.info "adding forwarding server '#{name}'", host: server.host, port: server.port, weight: server.weight, plugin_id: plugin_id + + failure = Fluent::Plugin::ForwardOutput::FailureDetector.new(@heartbeat_interval, @hard_timeout, Time.now.to_i.to_f) + if @heartbeat_type == :none + NoneHeartbeatNode.new(self, server, failure: failure, connection_manager: @connection_manager, ack_handler: @ack_handler) + else + Node.new(self, server, failure: failure, connection_manager: @connection_manager, ack_handler: @ack_handler) end end - def on_timer - return if @finished - @nodes.each {|n| - if n.tick - rebuild_weight_array - end + def on_heartbeat_timer + need_rebuild = false + discovery_manager.services.each do |n| begin - #log.trace "sending heartbeat #{n.host}:#{n.port} on #{@heartbeat_type}" - if @heartbeat_type == :tcp - send_heartbeat_tcp(n) - else - @usock.send "\0", 0, Socket.pack_sockaddr_in(n.port, n.resolved_host) - end - rescue Errno::EAGAIN, Errno::EWOULDBLOCK, Errno::EINTR, Errno::ECONNREFUSED - # TODO log - log.debug "failed to send heartbeat packet to #{n.host}:#{n.port}", error: $!.to_s + log.trace "sending heartbeat", host: n.host, port: n.port, heartbeat_type: @heartbeat_type + n.usock = @usock if @usock + need_rebuild = n.send_heartbeat || need_rebuild + rescue Errno::EAGAIN, Errno::EWOULDBLOCK, Errno::EINTR, Errno::ECONNREFUSED, Errno::ETIMEDOUT => e + log.debug "failed to send heartbeat packet", host: n.host, port: n.port, heartbeat_type: @heartbeat_type, error: e + rescue => e + log.debug "unexpected error happen during heartbeat", host: n.host, port: n.port, heartbeat_type: @heartbeat_type, error: e end - } - end - class HeartbeatHandler < Coolio::IO - def initialize(io, callback) - super(io) - @io = io - @callback = callback + need_rebuild = n.tick || need_rebuild end - def on_readable - begin - msg, addr = @io.recvfrom(1024) - rescue Errno::EAGAIN, Errno::EWOULDBLOCK, Errno::EINTR - return - end - host = addr[3] - port = addr[1] - sockaddr = Socket.pack_sockaddr_in(port, host) - @callback.call(sockaddr, msg) - rescue - # TODO log? + if need_rebuild + discovery_manager.rebalance end end - def on_heartbeat(sockaddr, msg) - port, host = Socket.unpack_sockaddr_in(sockaddr) - if node = @nodes.find {|n| n.sockaddr == sockaddr } - #log.trace "heartbeat from '#{node.name}'", :host=>node.host, :port=>node.port + def on_udp_heatbeat_response_recv(data, sock) + sockaddr = Socket.pack_sockaddr_in(sock.remote_port, sock.remote_host) + if node = discovery_manager.services.find { |n| n.sockaddr == sockaddr } + # log.trace "heartbeat arrived", name: node.name, host: node.host, port: node.port if node.heartbeat - rebuild_weight_array + discovery_manager.rebalance end + else + log.warn("Unknown heartbeat response received from #{sock.remote_host}:#{sock.remote_port}. It may service out") end end - NodeConfig2 = Struct.new("NodeConfig2", :name, :host, :port, :weight, :standby, :failure, - :phi_threshold, :recover_sample_size, :expire_dns_cache, :phi_failure_detector, :dns_round_robin, :skip_network_error) + def on_purge_obsolete_socks + @connection_manager.purge_obsolete_socks + end + + def ack_select_interval + if @delayed_commit_timeout > 3 + 1 + else + @delayed_commit_timeout / 3.0 + end + end + + def ack_reader + select_interval = ack_select_interval + + while thread_current_running? + ack_check(select_interval) + end + end + + def ack_check(select_interval) + @ack_handler.collect_response(select_interval) do |chunk_id, node, sock, result| + @connection_manager.close(sock) + + case result + when AckHandler::Result::SUCCESS + commit_write(chunk_id) + when AckHandler::Result::FAILED + node.disable! + rollback_write(chunk_id, update_retry: false) + when AckHandler::Result::CHUNKID_UNMATCHED + rollback_write(chunk_id, update_retry: false) + else + log.warn("BUG: invalid status #{result} #{chunk_id}") + + if chunk_id + rollback_write(chunk_id, update_retry: false) + end + end + end + end class Node - def initialize(log, conf) - @log = log - @conf = conf - @name = @conf.name - @host = @conf.host - @port = @conf.port - @weight = @conf.weight - @failure = @conf.failure + extend Forwardable + def_delegators :@server, :discovery_id, :host, :port, :name, :weight, :standby + + # @param connection_manager [Fluent::Plugin::ForwardOutput::ConnectionManager] + # @param ack_handler [Fluent::Plugin::ForwardOutput::AckHandler] + def initialize(sender, server, failure:, connection_manager:, ack_handler:) + @sender = sender + @log = sender.log + @compress = sender.compress + @server = server + + @name = server.name + @host = server.host + @port = server.port + @weight = server.weight + @standby = server.standby + @failure = failure @available = true + # @hostname is used for certificate verification & TLS SNI + host_is_hostname = !(IPAddr.new(@host) rescue false) + @hostname = case + when host_is_hostname then @host + when @name then @name + else nil + end + + @usock = nil + + @handshake = Fluent::Plugin::ForwardOutput::HandshakeProtocol.new( + log: @log, + hostname: sender.security && sender.security.self_hostname, + shared_key: server.shared_key || (sender.security && sender.security.shared_key) || '', + password: server.password || '', + username: server.username || '', + ) + + @unpacker = Fluent::MessagePackFactory.msgpack_unpacker + @resolved_host = nil @resolved_time = 0 - begin - resolved_host # check dns - rescue => e - if @conf.skip_network_error - log.warn "#{@name} got network error during setup. Resolve host later", :error => e, :error_class => e.class - else - raise - end - end - end + @resolved_once = false + + @connection_manager = connection_manager + @ack_handler = ack_handler + end + + attr_accessor :usock - attr_reader :conf - attr_reader :name, :host, :port, :weight - attr_reader :sockaddr # used by on_heartbeat - attr_reader :failure, :available # for test + attr_reader :state + attr_reader :sockaddr # used by on_udp_heatbeat_response_recv + attr_reader :failure # for test + + def validate_host_resolution! + resolved_host + end def available? @available @@ -488,41 +612,158 @@ def disable! end def standby? - @conf.standby + @standby + end + + def verify_connection + connect do |sock, ri| + ensure_established_connection(sock, ri) + end + end + + def establish_connection(sock, ri) + while ri.state != :established + begin + # TODO: On Ruby 2.2 or earlier, read_nonblock doesn't work expectedly. + # We need rewrite around here using new socket/server plugin helper. + buf = sock.read_nonblock(@sender.read_length) + if buf.empty? + sleep @sender.read_interval + next + end + @unpacker.feed_each(buf) do |data| + if @handshake.invoke(sock, ri, data) == :established + @log.debug "connection established", host: @host, port: @port + end + end + rescue IO::WaitReadable + # If the exception is Errno::EWOULDBLOCK or Errno::EAGAIN, it is extended by IO::WaitReadable. + # So IO::WaitReadable can be used to rescue the exceptions for retrying read_nonblock. + # https//docs.ruby-lang.org/en/2.3.0/IO.html#method-i-read_nonblock + sleep @sender.read_interval unless ri.state == :established + rescue SystemCallError => e + @log.warn "disconnected by error", host: @host, port: @port, error: e + disable! + break + rescue EOFError + @log.warn "disconnected", host: @host, port: @port + disable! + break + rescue HeloError => e + @log.warn "received invalid helo message from #{@name}" + disable! + break + rescue PingpongError => e + @log.warn "connection refused to #{@name || @host}: #{e.message}" + disable! + break + end + end + end + + def send_data_actual(sock, tag, chunk) + option = { 'size' => chunk.size, 'compressed' => @compress } + option['chunk'] = Base64.encode64(chunk.unique_id) if @ack_handler + + # https://github.com/fluent/fluentd/wiki/Forward-Protocol-Specification-v1#packedforward-mode + # out_forward always uses str32 type for entries. + # str16 can store only 64kbytes, and it should be much smaller than buffer chunk size. + + tag = tag.dup.force_encoding(Encoding::UTF_8) + + sock.write @sender.forward_header # array, size=3 + sock.write tag.to_msgpack # 1. tag: String (str) + chunk.open(compressed: @compress) do |chunk_io| + entries = [0xdb, chunk_io.size].pack('CN') + sock.write entries.force_encoding(Encoding::UTF_8) # 2. entries: String (str32) + IO.copy_stream(chunk_io, sock) # writeRawBody(packed_es) + end + sock.write option.to_msgpack # 3. option: Hash(map) + + # TODO: use bin32 for non-utf8 content(entries) when old msgpack-ruby (0.5.x or earlier) not supported + end + + def send_data(tag, chunk) + ack = @ack_handler && @ack_handler.create_ack(chunk.unique_id, self) + connect(nil, ack: ack) do |sock, ri| + ensure_established_connection(sock, ri) + send_data_actual(sock, tag, chunk) + end + + heartbeat(false) + nil + end + + # FORWARD_TCP_HEARTBEAT_DATA = FORWARD_HEADER + ''.to_msgpack + [].to_msgpack + # + # @return [Boolean] return true if it needs to rebuild nodes + def send_heartbeat + begin + dest_addr = resolved_host + @resolved_once = true + rescue ::SocketError => e + if !@resolved_once && @sender.ignore_network_errors_at_startup + @log.warn "failed to resolve node name in heartbeating", server: @name || @host, error: e + return false + end + raise + end + + case @sender.heartbeat_type + when :transport + connect(dest_addr) do |sock, ri| + ensure_established_connection(sock, ri) + + ## don't send any data to not cause a compatibility problem + # sock.write FORWARD_TCP_HEARTBEAT_DATA + + # successful tcp connection establishment is considered as valid heartbeat. + # When heartbeat is succeeded after detached, return true. It rebuilds weight array. + heartbeat(true) + end + when :udp + @usock.send "\0", 0, Socket.pack_sockaddr_in(@port, dest_addr) + # response is going to receive at on_udp_heatbeat_response_recv + false + when :none # :none doesn't use this class + raise "BUG: heartbeat_type none must not use Node" + else + raise "BUG: unknown heartbeat_type '#{@sender.heartbeat_type}'" + end end def resolved_host - case @conf.expire_dns_cache + case @sender.expire_dns_cache when 0 # cache is disabled - return resolve_dns! + resolve_dns! when nil # persistent cache - return @resolved_host ||= resolve_dns! + @resolved_host ||= resolve_dns! else - now = Engine.now + now = Fluent::EventTime.now rh = @resolved_host - if !rh || now - @resolved_time >= @conf.expire_dns_cache + if !rh || now - @resolved_time >= @sender.expire_dns_cache rh = @resolved_host = resolve_dns! @resolved_time = now end - return rh + rh end end def resolve_dns! addrinfo_list = Socket.getaddrinfo(@host, @port, nil, Socket::SOCK_STREAM) - addrinfo = @conf.dns_round_robin ? addrinfo_list.sample : addrinfo_list.first - @sockaddr = Socket.pack_sockaddr_in(addrinfo[1], addrinfo[3]) # used by on_heartbeat + addrinfo = @sender.dns_round_robin ? addrinfo_list.sample : addrinfo_list.first + @sockaddr = Socket.pack_sockaddr_in(addrinfo[1], addrinfo[3]) # used by on_udp_heatbeat_response_recv addrinfo[3] end private :resolve_dns! def tick now = Time.now.to_f - if !@available + unless available? if @failure.hard_timeout?(now) @failure.clear end @@ -531,41 +772,51 @@ def tick if @failure.hard_timeout?(now) @log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, hard_timeout: true - @available = false + disable! @resolved_host = nil # expire cached host @failure.clear return true end - if @conf.phi_failure_detector + if @sender.phi_failure_detector phi = @failure.phi(now) - #$log.trace "phi '#{@name}'", :host=>@host, :port=>@port, :phi=>phi - if phi > @conf.phi_threshold - @log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, phi: phi - @available = false + if phi > @sender.phi_threshold + @log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, phi: phi, phi_threshold: @sender.phi_threshold + disable! @resolved_host = nil # expire cached host @failure.clear return true end end - return false + false end def heartbeat(detect=true) now = Time.now.to_f @failure.add(now) - #@log.trace "heartbeat from '#{@name}'", :host=>@host, :port=>@port, :available=>@available, :sample_size=>@failure.sample_size - if detect && !@available && @failure.sample_size > @conf.recover_sample_size + if detect && !available? && @failure.sample_size > @sender.recover_sample_size @available = true @log.warn "recovered forwarding server '#{@name}'", host: @host, port: @port - return true + true else - return nil + nil end end - def to_msgpack(out = '') - [@host, @port, @weight, @available].to_msgpack(out) + private + + def ensure_established_connection(sock, request_info) + if request_info.state != :established + establish_connection(sock, request_info) + + if request_info.state != :established + raise ConnectionClosedError, "failed to establish connection with node #{@name}" + end + end + end + + def connect(host = nil, ack: false, &block) + @connection_manager.connect(host: host || resolved_host, port: port, hostname: @hostname, ack: ack, &block) end end @@ -583,96 +834,5 @@ def heartbeat(detect=true) true end end - - class FailureDetector - PHI_FACTOR = 1.0 / Math.log(10.0) - SAMPLE_SIZE = 1000 - - def initialize(heartbeat_interval, hard_timeout, init_last) - @heartbeat_interval = heartbeat_interval - @last = init_last - @hard_timeout = hard_timeout - - # microsec - @init_gap = (heartbeat_interval * 1e6).to_i - @window = [@init_gap] - end - - def hard_timeout?(now) - now - @last > @hard_timeout - end - - def add(now) - if @window.empty? - @window << @init_gap - @last = now - else - gap = now - @last - @window << (gap * 1e6).to_i - @window.shift if @window.length > SAMPLE_SIZE - @last = now - end - end - - def phi(now) - size = @window.size - return 0.0 if size == 0 - - # Calculate weighted moving average - mean_usec = 0 - fact = 0 - @window.each_with_index {|gap,i| - mean_usec += gap * (1+i) - fact += (1+i) - } - mean_usec = mean_usec / fact - - # Normalize arrive intervals into 1sec - mean = (mean_usec.to_f / 1e6) - @heartbeat_interval + 1 - - # Calculate phi of the phi accrual failure detector - t = now - @last - @heartbeat_interval + 1 - phi = PHI_FACTOR * t / mean - - return phi - end - - def sample_size - @window.size - end - - def clear - @window.clear - @last = 0 - end - end - - ## TODO - #class RPC - # def initialize(this) - # @this = this - # end - # - # def list_nodes - # @this.nodes - # end - # - # def list_fault_nodes - # list_nodes.select {|n| !n.available? } - # end - # - # def list_available_nodes - # list_nodes.select {|n| n.available? } - # end - # - # def add_node(name, host, port, weight) - # end - # - # def recover_node(host, port) - # end - # - # def remove_node(host, port) - # end - #end end end diff --git a/source/plugins/ruby/out_mdm.rb b/source/plugins/ruby/out_mdm.rb index 6238eb51a..7033e77a1 100644 --- a/source/plugins/ruby/out_mdm.rb +++ b/source/plugins/ruby/out_mdm.rb @@ -1,11 +1,12 @@ #!/usr/local/bin/ruby # frozen_string_literal: true -module Fluent - class OutputMDM < BufferedOutput - config_param :retry_mdm_post_wait_minutes, :integer +require 'fluent/plugin/output' - Plugin.register_output("out_mdm", self) +module Fluent::Plugin + class OutputMDM < Output + config_param :retry_mdm_post_wait_minutes, :integer + Fluent::Plugin.register_output("mdm", self) def initialize super @@ -57,8 +58,6 @@ def initialize end def configure(conf) - s = conf.add_element("secondary") - s["type"] = ChunkErrorHandler::SecondaryName super end @@ -270,6 +269,7 @@ def write(chunk) flush_mdm_exception_telemetry if (!@first_post_attempt_made || (Time.now > @last_post_attempt_time + retry_mdm_post_wait_minutes * 60)) && @can_send_data_to_mdm post_body = [] + chunk.extend Fluent::ChunkMessagePackEventStreamer chunk.msgpack_each { |(tag, record)| post_body.push(record.to_json) } @@ -352,72 +352,5 @@ def send_to_mdm(post_body) raise e end end - - private - - class ChunkErrorHandler - include Configurable - include PluginId - include PluginLoggerMixin - - SecondaryName = "__ChunkErrorHandler__" - - Plugin.register_output(SecondaryName, self) - - def initialize - @router = nil - end - - def secondary_init(primary) - @error_handlers = create_error_handlers @router - end - - def start - # NOP - end - - def shutdown - # NOP - end - - def router=(r) - @router = r - end - - def write(chunk) - chunk.msgpack_each { |(tag, record)| - @error_handlers[tag].emit(record) - } - end - - private - - def create_error_handlers(router) - nop_handler = NopErrorHandler.new - Hash.new() { |hash, tag| - etag = OMS::Common.create_error_tag tag - hash[tag] = router.match?(etag) ? - ErrorHandler.new(router, etag) : - nop_handler - } - end - - class ErrorHandler - def initialize(router, etag) - @router = router - @etag = etag - end - - def emit(record) - @router.emit(@etag, Fluent::Engine.now, record) - end - end - - class NopErrorHandler - def emit(record) - # NOP - end - end - end end # class OutputMDM end # module Fluent diff --git a/source/plugins/ruby/podinventory_to_mdm.rb b/source/plugins/ruby/podinventory_to_mdm.rb index d9cb71bd4..c24a91a87 100644 --- a/source/plugins/ruby/podinventory_to_mdm.rb +++ b/source/plugins/ruby/podinventory_to_mdm.rb @@ -279,16 +279,16 @@ def process_pod_inventory_record(record) begin records = [] - podUid = record["DataItems"][0]["PodUid"] + podUid = record["PodUid"] if @pod_uids.key?(podUid) return end @pod_uids[podUid] = true - podPhaseDimValue = record["DataItems"][0]["PodStatus"] - podNamespaceDimValue = record["DataItems"][0]["Namespace"] - podControllerNameDimValue = record["DataItems"][0]["ControllerName"] - podNodeDimValue = record["DataItems"][0]["Computer"] + podPhaseDimValue = record["PodStatus"] + podNamespaceDimValue = record["Namespace"] + podControllerNameDimValue = record["ControllerName"] + podNodeDimValue = record["Computer"] if podControllerNameDimValue.nil? || podControllerNameDimValue.empty? podControllerNameDimValue = "No Controller" diff --git a/source/plugins/utils/oms_common.rb b/source/plugins/utils/oms_common.rb new file mode 100644 index 000000000..1c3e549bf --- /dev/null +++ b/source/plugins/utils/oms_common.rb @@ -0,0 +1,1020 @@ +module OMS + + MSDockerCImprovHostnameFilePath = '/var/opt/microsoft/docker-cimprov/state/containerhostname' + IPV6_REGEX = '\h{4}:\h{4}:\h{4}:\h{4}:\h{4}:\h{4}:\h{4}:\h{4}' + IPV4_Approximate_REGEX = '\d+\.\d+\.\d+\.\d+' + + class RetryRequestException < Exception + # Throw this exception to tell the fluentd engine to retry and + # inform the output plugin that it is indeed retryable + end + + class Common + require 'json' + require 'yajl' + require 'net/http' + require 'net/https' + require 'time' + require 'zlib' + require 'digest' + require 'date' + require 'securerandom' + + require_relative 'omslog' + require_relative 'oms_configuration' + + @@OSFullName = nil + @@OSName = nil + @@OSVersion = nil + @@Hostname = nil + @@HostnameFilePath = MSDockerCImprovHostnameFilePath + @@FQDN = nil + @@InstalledDate = nil + @@AgentVersion = nil + @@CurrentTimeZone = nil + + @@tzMapping = { + 'Australia/Darwin' => 'AUS Central Standard Time', + 'Australia/Sydney' => 'AUS Eastern Standard Time', + 'Australia/Melbourne' => 'AUS Eastern Standard Time', + 'Asia/Kabul' => 'Afghanistan Standard Time', + 'America/Anchorage' => 'Alaskan Standard Time', + 'America/Juneau' => 'Alaskan Standard Time', + 'America/Metlakatla' => 'Alaskan Standard Time', + 'America/Nome' => 'Alaskan Standard Time', + 'America/Sitka' => 'Alaskan Standard Time', + 'America/Yakutat' => 'Alaskan Standard Time', + 'Asia/Riyadh' => 'Arab Standard Time', + 'Asia/Bahrain' => 'Arab Standard Time', + 'Asia/Kuwait' => 'Arab Standard Time', + 'Asia/Qatar' => 'Arab Standard Time', + 'Asia/Aden' => 'Arab Standard Time', + 'Asia/Dubai' => 'Arabian Standard Time', + 'Asia/Muscat' => 'Arabian Standard Time', + 'Etc/GMT-4' => 'Arabian Standard Time', + 'Asia/Baghdad' => 'Arabic Standard Time', + 'America/Buenos_Aires' => 'Argentina Standard Time', + 'America/Argentina/La_Rioja' => 'Argentina Standard Time', + 'America/Argentina/Rio_Gallegos' => 'Argentina Standard Time', + 'America/Argentina/Salta' => 'Argentina Standard Time', + 'America/Argentina/San_Juan' => 'Argentina Standard Time', + 'America/Argentina/San_Luis' => 'Argentina Standard Time', + 'America/Argentina/Tucuman' => 'Argentina Standard Time', + 'America/Argentina/Ushuaia' => 'Argentina Standard Time', + 'America/Catamarca' => 'Argentina Standard Time', + 'America/Cordoba' => 'Argentina Standard Time', + 'America/Jujuy' => 'Argentina Standard Time', + 'America/Mendoza' => 'Argentina Standard Time', + 'America/Halifax' => 'Atlantic Standard Time', + 'Atlantic/Bermuda' => 'Atlantic Standard Time', + 'America/Glace_Bay' => 'Atlantic Standard Time', + 'America/Goose_Bay' => 'Atlantic Standard Time', + 'America/Moncton' => 'Atlantic Standard Time', + 'America/Thule' => 'Atlantic Standard Time', + 'Asia/Baku' => 'Azerbaijan Standard Time', + 'Atlantic/Azores' => 'Azores Standard Time', + 'America/Scoresbysund' => 'Azores Standard Time', + 'America/Bahia' => 'Bahia Standard Time', + 'Asia/Dhaka' => 'Bangladesh Standard Time', + 'Asia/Thimphu' => 'Bangladesh Standard Time', + 'Europe/Minsk' => 'Belarus Standard Time', + 'America/Regina' => 'Canada Central Standard Time', + 'America/Swift_Current' => 'Canada Central Standard Time', + 'Atlantic/Cape_Verde' => 'Cape Verde Standard Time', + 'Etc/GMT+1' => 'Cape Verde Standard Time', + 'Asia/Yerevan' => 'Caucasus Standard Time', + 'Australia/Adelaide' => 'Cen. Australia Standard Time', + 'Australia/Broken_Hill' => 'Cen. Australia Standard Time', + 'America/Guatemala' => 'Central America Standard Time', + 'America/Belize' => 'Central America Standard Time', + 'America/Costa_Rica' => 'Central America Standard Time', + 'Pacific/Galapagos' => 'Central America Standard Time', + 'America/Tegucigalpa' => 'Central America Standard Time', + 'America/Managua' => 'Central America Standard Time', + 'America/El_Salvador' => 'Central America Standard Time', + 'Etc/GMT+6' => 'Central America Standard Time', + 'Asia/Almaty' => 'Central Asia Standard Time', + 'Antarctica/Vostok' => 'Central Asia Standard Time', + 'Indian/Chagos' => 'Central Asia Standard Time', + 'Asia/Bishkek' => 'Central Asia Standard Time', + 'Asia/Qyzylorda' => 'Central Asia Standard Time', + 'Etc/GMT-6' => 'Central Asia Standard Time', + 'America/Cuiaba' => 'Central Brazilian Standard Time', + 'America/Campo_Grande' => 'Central Brazilian Standard Time', + 'Europe/Budapest' => 'Central Europe Standard Time', + 'Europe/Tirane' => 'Central Europe Standard Time', + 'Europe/Prague' => 'Central Europe Standard Time', + 'Europe/Podgorica' => 'Central Europe Standard Time', + 'Europe/Belgrade' => 'Central Europe Standard Time', + 'Europe/Ljubljana' => 'Central Europe Standard Time', + 'Europe/Bratislava' => 'Central Europe Standard Time', + 'Europe/Warsaw' => 'Central European Standard Time', + 'Europe/Sarajevo' => 'Central European Standard Time', + 'Europe/Zagreb' => 'Central European Standard Time', + 'Europe/Skopje' => 'Central European Standard Time', + 'Pacific/Guadalcanal' => 'Central Pacific Standard Time', + 'Antarctica/Macquarie' => 'Central Pacific Standard Time', + 'Pacific/Ponape' => 'Central Pacific Standard Time', + 'Pacific/Kosrae' => 'Central Pacific Standard Time', + 'Pacific/Noumea' => 'Central Pacific Standard Time', + 'Pacific/Norfolk' => 'Central Pacific Standard Time', + 'Pacific/Bougainville' => 'Central Pacific Standard Time', + 'Pacific/Efate' => 'Central Pacific Standard Time', + 'Etc/GMT-11' => 'Central Pacific Standard Time', + 'America/Chicago' => 'Central Standard Time', + 'America/Winnipeg' => 'Central Standard Time', + 'America/Rainy_River' => 'Central Standard Time', + 'America/Rankin_Inlet' => 'Central Standard Time', + 'America/Resolute' => 'Central Standard Time', + 'America/Matamoros' => 'Central Standard Time', + 'America/Indiana/Knox' => 'Central Standard Time', + 'America/Indiana/Tell_City' => 'Central Standard Time', + 'America/Menominee' => 'Central Standard Time', + 'America/North_Dakota/Beulah' => 'Central Standard Time', + 'America/North_Dakota/Center' => 'Central Standard Time', + 'America/North_Dakota/New_Salem' => 'Central Standard Time', + 'CST6CDT' => 'Central Standard Time', + 'America/Mexico_City' => 'Central Standard Time (Mexico)', + 'America/Bahia_Banderas' => 'Central Standard Time (Mexico)', + 'America/Merida' => 'Central Standard Time (Mexico)', + 'America/Monterrey' => 'Central Standard Time (Mexico)', + 'Asia/Shanghai' => 'China Standard Time', + 'Asia/Chongqing' => 'China Standard Time', + 'Asia/Harbin' => 'China Standard Time', + 'Asia/Kashgar' => 'China Standard Time', + 'Asia/Urumqi' => 'China Standard Time', + 'Asia/Hong_Kong' => 'China Standard Time', + 'Asia/Macau' => 'China Standard Time', + 'Etc/GMT+12' => 'Dateline Standard Time', + 'Africa/Nairobi' => 'E. Africa Standard Time', + 'Antarctica/Syowa' => 'E. Africa Standard Time', + 'Africa/Djibouti' => 'E. Africa Standard Time', + 'Africa/Asmera' => 'E. Africa Standard Time', + 'Africa/Addis_Ababa' => 'E. Africa Standard Time', + 'Indian/Comoro' => 'E. Africa Standard Time', + 'Indian/Antananarivo' => 'E. Africa Standard Time', + 'Africa/Khartoum' => 'E. Africa Standard Time', + 'Africa/Mogadishu' => 'E. Africa Standard Time', + 'Africa/Juba' => 'E. Africa Standard Time', + 'Africa/Dar_es_Salaam' => 'E. Africa Standard Time', + 'Africa/Kampala' => 'E. Africa Standard Time', + 'Indian/Mayotte' => 'E. Africa Standard Time', + 'Etc/GMT-3' => 'E. Africa Standard Time', + 'Australia/Brisbane' => 'E. Australia Standard Time', + 'Australia/Lindeman' => 'E. Australia Standard Time', + 'Europe/Chisinau' => 'E. Europe Standard Time', + 'America/Sao_Paulo' => 'E. South America Standard Time', + 'America/New_York' => 'Eastern Standard Time', + 'America/Nassau' => 'Eastern Standard Time', + 'America/Toronto' => 'Eastern Standard Time', + 'America/Iqaluit' => 'Eastern Standard Time', + 'America/Montreal' => 'Eastern Standard Time', + 'America/Nipigon' => 'Eastern Standard Time', + 'America/Pangnirtung' => 'Eastern Standard Time', + 'America/Thunder_Bay' => 'Eastern Standard Time', + 'America/Havana' => 'Eastern Standard Time', + 'America/Port-au-Prince' => 'Eastern Standard Time', + 'America/Detroit' => 'Eastern Standard Time', + 'America/Indiana/Petersburg' => 'Eastern Standard Time', + 'America/Indiana/Vincennes' => 'Eastern Standard Time', + 'America/Indiana/Winamac' => 'Eastern Standard Time', + 'America/Kentucky/Monticello' => 'Eastern Standard Time', + 'America/Louisville' => 'Eastern Standard Time', + 'EST5EDT' => 'Eastern Standard Time', + 'America/Cancun' => 'Eastern Standard Time (Mexico)', + 'Africa/Cairo' => 'Egypt Standard Time', + 'Asia/Gaza' => 'Egypt Standard Time', + 'Asia/Hebron' => 'Egypt Standard Time', + 'Asia/Yekaterinburg' => 'Ekaterinburg Standard Time', + 'Europe/Kiev' => 'FLE Standard Time', + 'Europe/Mariehamn' => 'FLE Standard Time', + 'Europe/Sofia' => 'FLE Standard Time', + 'Europe/Tallinn' => 'FLE Standard Time', + 'Europe/Helsinki' => 'FLE Standard Time', + 'Europe/Vilnius' => 'FLE Standard Time', + 'Europe/Riga' => 'FLE Standard Time', + 'Europe/Uzhgorod' => 'FLE Standard Time', + 'Europe/Zaporozhye' => 'FLE Standard Time', + 'Pacific/Fiji' => 'Fiji Standard Time', + 'Europe/London' => 'GMT Standard Time', + 'Atlantic/Canary' => 'GMT Standard Time', + 'Atlantic/Faeroe' => 'GMT Standard Time', + 'Europe/Guernsey' => 'GMT Standard Time', + 'Europe/Dublin' => 'GMT Standard Time', + 'Europe/Isle_of_Man' => 'GMT Standard Time', + 'Europe/Jersey' => 'GMT Standard Time', + 'Europe/Lisbon' => 'GMT Standard Time', + 'Atlantic/Madeira' => 'GMT Standard Time', + 'Europe/Bucharest' => 'GTB Standard Time', + 'Asia/Nicosia' => 'GTB Standard Time', + 'Europe/Athens' => 'GTB Standard Time', + 'Asia/Tbilisi' => 'Georgian Standard Time', + 'America/Godthab' => 'Greenland Standard Time', + 'Atlantic/Reykjavik' => 'Greenwich Standard Time', + 'Africa/Ouagadougou' => 'Greenwich Standard Time', + 'Africa/Abidjan' => 'Greenwich Standard Time', + 'Africa/Accra' => 'Greenwich Standard Time', + 'Africa/Banjul' => 'Greenwich Standard Time', + 'Africa/Conakry' => 'Greenwich Standard Time', + 'Africa/Bissau' => 'Greenwich Standard Time', + 'Africa/Monrovia' => 'Greenwich Standard Time', + 'Africa/Bamako' => 'Greenwich Standard Time', + 'Africa/Nouakchott' => 'Greenwich Standard Time', + 'Atlantic/St_Helena' => 'Greenwich Standard Time', + 'Africa/Freetown' => 'Greenwich Standard Time', + 'Africa/Dakar' => 'Greenwich Standard Time', + 'Africa/Sao_Tome' => 'Greenwich Standard Time', + 'Africa/Lome' => 'Greenwich Standard Time', + 'Pacific/Honolulu' => 'Hawaiian Standard Time', + 'Pacific/Rarotonga' => 'Hawaiian Standard Time', + 'Pacific/Tahiti' => 'Hawaiian Standard Time', + 'Pacific/Johnston' => 'Hawaiian Standard Time', + 'Etc/GMT+10' => 'Hawaiian Standard Time', + 'Asia/Calcutta' => 'India Standard Time', + 'Asia/Tehran' => 'Iran Standard Time', + 'Asia/Jerusalem' => 'Israel Standard Time', + 'Asia/Amman' => 'Jordan Standard Time', + 'Europe/Kaliningrad' => 'Kaliningrad Standard Time', + 'Asia/Seoul' => 'Korea Standard Time', + 'Africa/Tripoli' => 'Libya Standard Time', + 'Pacific/Kiritimati' => 'Line Islands Standard Time', + 'Etc/GMT-14' => 'Line Islands Standard Time', + 'Asia/Magadan' => 'Magadan Standard Time', + 'Indian/Mauritius' => 'Mauritius Standard Time', + 'Indian/Reunion' => 'Mauritius Standard Time', + 'Indian/Mahe' => 'Mauritius Standard Time', + 'Asia/Beirut' => 'Middle East Standard Time', + 'America/Montevideo' => 'Montevideo Standard Time', + 'Africa/Casablanca' => 'Morocco Standard Time', + 'Africa/El_Aaiun' => 'Morocco Standard Time', + 'America/Denver' => 'Mountain Standard Time', + 'America/Edmonton' => 'Mountain Standard Time', + 'America/Cambridge_Bay' => 'Mountain Standard Time', + 'America/Inuvik' => 'Mountain Standard Time', + 'America/Yellowknife' => 'Mountain Standard Time', + 'America/Ojinaga' => 'Mountain Standard Time', + 'America/Boise' => 'Mountain Standard Time', + 'MST7MDT' => 'Mountain Standard Time', + 'America/Chihuahua' => 'Mountain Standard Time (Mexico)', + 'America/Mazatlan' => 'Mountain Standard Time (Mexico)', + 'Asia/Rangoon' => 'Myanmar Standard Time', + 'Indian/Cocos' => 'Myanmar Standard Time', + 'Asia/Novosibirsk' => 'N. Central Asia Standard Time', + 'Asia/Omsk' => 'N. Central Asia Standard Time', + 'Africa/Windhoek' => 'Namibia Standard Time', + 'Asia/Katmandu' => 'Nepal Standard Time', + 'Pacific/Auckland' => 'New Zealand Standard Time', + 'Antarctica/McMurdo' => 'New Zealand Standard Time', + 'America/St_Johns' => 'Newfoundland Standard Time', + 'Asia/Irkutsk' => 'North Asia East Standard Time', + 'Asia/Krasnoyarsk' => 'North Asia Standard Time', + 'Asia/Novokuznetsk' => 'North Asia Standard Time', + 'Asia/Pyongyang' => 'North Korea Standard Time', + 'America/Santiago' => 'Pacific SA Standard Time', + 'Antarctica/Palmer' => 'Pacific SA Standard Time', + 'America/Los_Angeles' => 'Pacific Standard Time', + 'America/Vancouver' => 'Pacific Standard Time', + 'America/Dawson' => 'Pacific Standard Time', + 'America/Whitehorse' => 'Pacific Standard Time', + 'America/Tijuana' => 'Pacific Standard Time', + 'America/Santa_Isabel' => 'Pacific Standard Time', + 'PST8PDT' => 'Pacific Standard Time', + 'Asia/Karachi' => 'Pakistan Standard Time', + 'America/Asuncion' => 'Paraguay Standard Time', + 'Europe/Paris' => 'Romance Standard Time', + 'Europe/Brussels' => 'Romance Standard Time', + 'Europe/Copenhagen' => 'Romance Standard Time', + 'Europe/Madrid' => 'Romance Standard Time', + 'Africa/Ceuta' => 'Romance Standard Time', + 'Asia/Srednekolymsk' => 'Russia Time Zone 10', + 'Asia/Kamchatka' => 'Russia Time Zone 11', + 'Asia/Anadyr' => 'Russia Time Zone 11', + 'Europe/Samara' => 'Russia Time Zone 3', + 'Europe/Moscow' => 'Russian Standard Time', + 'Europe/Simferopol' => 'Russian Standard Time', + 'Europe/Volgograd' => 'Russian Standard Time', + 'America/Cayenne' => 'SA Eastern Standard Time', + 'Antarctica/Rothera' => 'SA Eastern Standard Time', + 'America/Fortaleza' => 'SA Eastern Standard Time', + 'America/Araguaina' => 'SA Eastern Standard Time', + 'America/Belem' => 'SA Eastern Standard Time', + 'America/Maceio' => 'SA Eastern Standard Time', + 'America/Recife' => 'SA Eastern Standard Time', + 'America/Santarem' => 'SA Eastern Standard Time', + 'Atlantic/Stanley' => 'SA Eastern Standard Time', + 'America/Paramaribo' => 'SA Eastern Standard Time', + 'Etc/GMT+3' => 'SA Eastern Standard Time', + 'America/Bogota' => 'SA Pacific Standard Time', + 'America/Rio_Branco' => 'SA Pacific Standard Time', + 'America/Eirunepe' => 'SA Pacific Standard Time', + 'America/Coral_Harbour' => 'SA Pacific Standard Time', + 'Pacific/Easter' => 'SA Pacific Standard Time', + 'America/Guayaquil' => 'SA Pacific Standard Time', + 'America/Jamaica' => 'SA Pacific Standard Time', + 'America/Cayman' => 'SA Pacific Standard Time', + 'America/Panama' => 'SA Pacific Standard Time', + 'America/Lima' => 'SA Pacific Standard Time', + 'Etc/GMT+5' => 'SA Pacific Standard Time', + 'America/La_Paz' => 'SA Western Standard Time', + 'America/Antigua' => 'SA Western Standard Time', + 'America/Anguilla' => 'SA Western Standard Time', + 'America/Aruba' => 'SA Western Standard Time', + 'America/Barbados' => 'SA Western Standard Time', + 'America/St_Barthelemy' => 'SA Western Standard Time', + 'America/Kralendijk' => 'SA Western Standard Time', + 'America/Manaus' => 'SA Western Standard Time', + 'America/Boa_Vista' => 'SA Western Standard Time', + 'America/Porto_Velho' => 'SA Western Standard Time', + 'America/Blanc-Sablon' => 'SA Western Standard Time', + 'America/Curacao' => 'SA Western Standard Time', + 'America/Dominica' => 'SA Western Standard Time', + 'America/Santo_Domingo' => 'SA Western Standard Time', + 'America/Grenada' => 'SA Western Standard Time', + 'America/Guadeloupe' => 'SA Western Standard Time', + 'America/Guyana' => 'SA Western Standard Time', + 'America/St_Kitts' => 'SA Western Standard Time', + 'America/St_Lucia' => 'SA Western Standard Time', + 'America/Marigot' => 'SA Western Standard Time', + 'America/Martinique' => 'SA Western Standard Time', + 'America/Montserrat' => 'SA Western Standard Time', + 'America/Puerto_Rico' => 'SA Western Standard Time', + 'America/Lower_Princes' => 'SA Western Standard Time', + 'America/Grand_Turk' => 'SA Western Standard Time', + 'America/Port_of_Spain' => 'SA Western Standard Time', + 'America/St_Vincent' => 'SA Western Standard Time', + 'America/Tortola' => 'SA Western Standard Time', + 'America/St_Thomas' => 'SA Western Standard Time', + 'Etc/GMT+4' => 'SA Western Standard Time', + 'Asia/Bangkok' => 'SE Asia Standard Time', + 'Antarctica/Davis' => 'SE Asia Standard Time', + 'Indian/Christmas' => 'SE Asia Standard Time', + 'Asia/Jakarta' => 'SE Asia Standard Time', + 'Asia/Pontianak' => 'SE Asia Standard Time', + 'Asia/Phnom_Penh' => 'SE Asia Standard Time', + 'Asia/Vientiane' => 'SE Asia Standard Time', + 'Asia/Hovd' => 'SE Asia Standard Time', + 'Asia/Saigon' => 'SE Asia Standard Time', + 'Etc/GMT-7' => 'SE Asia Standard Time', + 'Pacific/Apia' => 'Samoa Standard Time', + 'Asia/Singapore' => 'Singapore Standard Time', + 'Asia/Brunei' => 'Singapore Standard Time', + 'Asia/Makassar' => 'Singapore Standard Time', + 'Asia/Kuala_Lumpur' => 'Singapore Standard Time', + 'Asia/Kuching' => 'Singapore Standard Time', + 'Asia/Manila' => 'Singapore Standard Time', + 'Etc/GMT-8' => 'Singapore Standard Time', + 'Africa/Johannesburg' => 'South Africa Standard Time', + 'Africa/Bujumbura' => 'South Africa Standard Time', + 'Africa/Gaborone' => 'South Africa Standard Time', + 'Africa/Lubumbashi' => 'South Africa Standard Time', + 'Africa/Maseru' => 'South Africa Standard Time', + 'Africa/Blantyre' => 'South Africa Standard Time', + 'Africa/Maputo' => 'South Africa Standard Time', + 'Africa/Kigali' => 'South Africa Standard Time', + 'Africa/Mbabane' => 'South Africa Standard Time', + 'Africa/Lusaka' => 'South Africa Standard Time', + 'Africa/Harare' => 'South Africa Standard Time', + 'Etc/GMT-2' => 'South Africa Standard Time', + 'Asia/Colombo' => 'Sri Lanka Standard Time', + 'Asia/Damascus' => 'Syria Standard Time', + 'Asia/Taipei' => 'Taipei Standard Time', + 'Australia/Hobart' => 'Tasmania Standard Time', + 'Australia/Currie' => 'Tasmania Standard Time', + 'Asia/Tokyo' => 'Tokyo Standard Time', + 'Asia/Jayapura' => 'Tokyo Standard Time', + 'Pacific/Palau' => 'Tokyo Standard Time', + 'Asia/Dili' => 'Tokyo Standard Time', + 'Etc/GMT-9' => 'Tokyo Standard Time', + 'Pacific/Tongatapu' => 'Tonga Standard Time', + 'Pacific/Enderbury' => 'Tonga Standard Time', + 'Pacific/Fakaofo' => 'Tonga Standard Time', + 'Etc/GMT-13' => 'Tonga Standard Time', + 'Europe/Istanbul' => 'Turkey Standard Time', + 'America/Indianapolis' => 'US Eastern Standard Time', + 'America/Indiana/Marengo' => 'US Eastern Standard Time', + 'America/Indiana/Vevay' => 'US Eastern Standard Time', + 'America/Phoenix' => 'US Mountain Standard Time', + 'America/Dawson_Creek' => 'US Mountain Standard Time', + 'America/Creston' => 'US Mountain Standard Time', + 'America/Fort_Nelson' => 'US Mountain Standard Time', + 'America/Hermosillo' => 'US Mountain Standard Time', + 'Etc/GMT+7' => 'US Mountain Standard Time', + 'Etc/GMT' => 'UTC', + 'Etc/UTC' => 'UTC', + 'America/Danmarkshavn' => 'UTC', + 'Etc/GMT-12' => 'UTC+12', + 'Pacific/Tarawa' => 'UTC+12', + 'Pacific/Majuro' => 'UTC+12', + 'Pacific/Kwajalein' => 'UTC+12', + 'Pacific/Nauru' => 'UTC+12', + 'Pacific/Funafuti' => 'UTC+12', + 'Pacific/Wake' => 'UTC+12', + 'Pacific/Wallis' => 'UTC+12', + 'Etc/GMT+2' => 'UTC-02', + 'America/Noronha' => 'UTC-02', + 'Atlantic/South_Georgia' => 'UTC-02', + 'Etc/GMT+11' => 'UTC-11', + 'Pacific/Pago_Pago' => 'UTC-11', + 'Pacific/Niue' => 'UTC-11', + 'Pacific/Midway' => 'UTC-11', + 'Asia/Ulaanbaatar' => 'Ulaanbaatar Standard Time', + 'Asia/Choibalsan' => 'Ulaanbaatar Standard Time', + 'America/Caracas' => 'Venezuela Standard Time', + 'Asia/Vladivostok' => 'Vladivostok Standard Time', + 'Asia/Sakhalin' => 'Vladivostok Standard Time', + 'Asia/Ust-Nera' => 'Vladivostok Standard Time', + 'Australia/Perth' => 'W. Australia Standard Time', + 'Antarctica/Casey' => 'W. Australia Standard Time', + 'Africa/Lagos' => 'W. Central Africa Standard Time', + 'Africa/Luanda' => 'W. Central Africa Standard Time', + 'Africa/Porto-Novo' => 'W. Central Africa Standard Time', + 'Africa/Kinshasa' => 'W. Central Africa Standard Time', + 'Africa/Bangui' => 'W. Central Africa Standard Time', + 'Africa/Brazzaville' => 'W. Central Africa Standard Time', + 'Africa/Douala' => 'W. Central Africa Standard Time', + 'Africa/Algiers' => 'W. Central Africa Standard Time', + 'Africa/Libreville' => 'W. Central Africa Standard Time', + 'Africa/Malabo' => 'W. Central Africa Standard Time', + 'Africa/Niamey' => 'W. Central Africa Standard Time', + 'Africa/Ndjamena' => 'W. Central Africa Standard Time', + 'Africa/Tunis' => 'W. Central Africa Standard Time', + 'Etc/GMT-1' => 'W. Central Africa Standard Time', + 'Europe/Berlin' => 'W. Europe Standard Time', + 'Europe/Andorra' => 'W. Europe Standard Time', + 'Europe/Vienna' => 'W. Europe Standard Time', + 'Europe/Zurich' => 'W. Europe Standard Time', + 'Europe/Busingen' => 'W. Europe Standard Time', + 'Europe/Gibraltar' => 'W. Europe Standard Time', + 'Europe/Rome' => 'W. Europe Standard Time', + 'Europe/Vaduz' => 'W. Europe Standard Time', + 'Europe/Luxembourg' => 'W. Europe Standard Time', + 'Europe/Monaco' => 'W. Europe Standard Time', + 'Europe/Malta' => 'W. Europe Standard Time', + 'Europe/Amsterdam' => 'W. Europe Standard Time', + 'Europe/Oslo' => 'W. Europe Standard Time', + 'Europe/Stockholm' => 'W. Europe Standard Time', + 'Arctic/Longyearbyen' => 'W. Europe Standard Time', + 'Europe/San_Marino' => 'W. Europe Standard Time', + 'Europe/Vatican' => 'W. Europe Standard Time', + 'Asia/Tashkent' => 'West Asia Standard Time', + 'Antarctica/Mawson' => 'West Asia Standard Time', + 'Asia/Oral' => 'West Asia Standard Time', + 'Asia/Aqtau' => 'West Asia Standard Time', + 'Asia/Aqtobe' => 'West Asia Standard Time', + 'Indian/Maldives' => 'West Asia Standard Time', + 'Indian/Kerguelen' => 'West Asia Standard Time', + 'Asia/Dushanbe' => 'West Asia Standard Time', + 'Asia/Ashgabat' => 'West Asia Standard Time', + 'Asia/Samarkand' => 'West Asia Standard Time', + 'Etc/GMT-5' => 'West Asia Standard Time', + 'Pacific/Port_Moresby' => 'West Pacific Standard Time', + 'Antarctica/DumontDUrville' => 'West Pacific Standard Time', + 'Pacific/Truk' => 'West Pacific Standard Time', + 'Pacific/Guam' => 'West Pacific Standard Time', + 'Pacific/Saipan' => 'West Pacific Standard Time', + 'Etc/GMT-10' => 'West Pacific Standard Time', + 'Asia/Yakutsk' => 'Yakutsk Standard Time', + 'Asia/Chita' => 'Yakutsk Standard Time', + 'Asia/Khandyga' => 'Yakutsk Standard Time' + } + + @@tzLocalTimePath = '/etc/localtime' + @@tzBaseFolder = '/usr/share/zoneinfo/' + @@tzRightFolder = 'right/' + + class << self + + # Internal methods + # (left public for easy testing, though protected may be better later) + + def clean_hostname_string(hnBuffer) + return "" if hnBuffer.nil? # So give the rest of the program a string to deal with. + hostname_buffer = hnBuffer.strip + return hostname_buffer + end + + def has_designated_hostnamefile? + return false if @@HostnameFilePath.nil? + return false unless @@HostnameFilePath =~ /\w/ + return false unless File.exist?(@@HostnameFilePath) + return true + end + + def is_dot_separated_string?(hnBuffer) + return true if /[^.]+\.[^.]+/ =~ hnBuffer + return false + end + + def is_hostname_compliant?(hnBuffer) + # RFC 2181: + # Size limit is 1 to 63 octets, so probably bytesize is appropriate method. + return false if hnBuffer.nil? + return false if /\./ =~ hnBuffer # Hostname by definition may not contain a dot. + return false if /:/ =~ hnBuffer # Hostname by definition may not contain a colon. + return false unless 1 <= hnBuffer.bytesize && hnBuffer.bytesize <= 63 + return true + end + + def is_like_ipv4_string?(hnBuffer) + return false unless /\A#{IPV4_Approximate_REGEX}\z/ =~ hnBuffer + qwa = hnBuffer.split('.') + return false unless qwa.length == 4 + return false if qwa[0].to_i == 0 + qwa.each do |quadwordstring| + bi = quadwordstring.to_i + # This may need more detail if 255 octets are sometimes allowed, but I don't think so. + return false unless 0 <= bi and bi < 255 + end + return true + end + + def is_like_ipv6_string?(hnBuffer) + return true if /\A#{IPV6_REGEX}\z/ =~ hnBuffer + return false + end + + def look_for_socket_class_host_address + hostname_buffer = nil + + begin + hostname_buffer = Socket.gethostname + rescue => error + OMS::Log.error_once("Unable to get the Host Name using socket facility: #{error}") + return + end + @@Hostname = clean_hostname_string(hostname_buffer) + + return # Thwart accidental return to force correct use. + end + + def look_in_designated_hostnamefile + # Issue: + # When omsagent runs inside a container, gethostname returns the hostname of the container (random name) + # not the actual machine hostname. + # One way to solve this problem is to set the container hostname same as machine name, but this is not + # possible when host-machine is a private VM inside a cluster. + # Solution: + # Share/mount ‘/etc/hostname’ as '/var/opt/microsoft/omsagent/state/containername' with container and + # omsagent will read hostname from shared file. + hostname_buffer = nil + + unless File.readable?(@@HostnameFilePath) + OMS::Log.warn_once("File '#{@@HostnameFilePath}' exists but is not readable.") + return + end + + begin + hostname_buffer = File.read(@@HostnameFilePath) + rescue => error + OMS::Log.warn_once("Unable to read the hostname from #{@@HostnameFilePath}: #{error}") + end + @@Hostname = clean_hostname_string(hostname_buffer) + return # Thwart accidental return to force correct use. + end + + def validate_hostname_equivalent(hnBuffer) + # RFC 1123 and 2181 + # Note that for now we are limiting the earlier maximum of 63 for fqdn labels and thus + # hostnames UNTIL we are assured azure will allow 255, as specified in RFC 1123, or + # we are otherwise instructed. + rfcl = "RFCs 1123, 2181 with hostname range of {1,63} octets for non-root item." + return if is_hostname_compliant?(hnBuffer) + return if is_like_ipv4_string?(hnBuffer) + return if is_like_ipv6_string?(hnBuffer) + msg = "Hostname '#{hnBuffer}' not compliant (#{rfcl}). Not IP Address Either." + OMS::Log.warn_once(msg) + raise NameError, msg + end + + # End of Internal methods + + # get the unified timezone id by absolute file path of the timezone file + # file path: the absolute path of the file + def get_unified_timezoneid(filepath) + # remove the baseFolder path + tzID = filepath[@@tzBaseFolder.length..-1] if filepath.start_with?(@@tzBaseFolder) + + return 'Unknown' if tzID.nil? + + # if the rest starts with 'right/', remove it to unify the format + tzID = tzID[@@tzRightFolder.length..-1] if tzID.start_with?(@@tzRightFolder) + + return tzID + end # end get_unified_timezoneid + + def get_current_timezone + return @@CurrentTimeZone if !@@CurrentTimeZone.nil? + + tzID = 'Unknown' + + begin + # if /etc/localtime is a symlink, check the link file's path + if File.symlink?(@@tzLocalTimePath) + symlinkpath = File.absolute_path(File.readlink(@@tzLocalTimePath), File.dirname(@@tzLocalTimePath)) + tzID = get_unified_timezoneid(symlinkpath) + + # look for the entry in the timezone mapping + if @@tzMapping.has_key?(tzID) + @@CurrentTimeZone = @@tzMapping[tzID] + return @@CurrentTimeZone + end + end + + # calculate the md5 of /etc/locatime + md5sum = Digest::MD5.file(@@tzLocalTimePath).hexdigest + + # looks for a file in the /usr/share/zoneinfo/, which is identical to /etc/localtime. use the file name as the timezone + Dir.glob("#{@@tzBaseFolder}**/*") { |filepath| + # find all the files whose md5 is the same as the /etc/localtime + if File.file? filepath and Digest::MD5.file(filepath).hexdigest == md5sum + tzID = get_unified_timezoneid(filepath) + + # look for the entry in the timezone mapping + if @@tzMapping.has_key?(tzID) + @@CurrentTimeZone = @@tzMapping[tzID] + return @@CurrentTimeZone + end + end + } + rescue => error + OMS::Log.error_once("Unable to get the current time zone: #{error}") + end + + # assign the tzID if the corresponding Windows Time Zone is not found + @@CurrentTimeZone = tzID if @@CurrentTimeZone.nil? + + return @@CurrentTimeZone + end # end get_current_timezone + + def get_os_full_name(conf_path = "/etc/opt/microsoft/scx/conf/scx-release") + return @@OSFullName if !@@OSFullName.nil? + + if File.file?(conf_path) + conf = File.read(conf_path) + os_full_name = conf[/OSFullName=(.*?)\n/, 1] + if os_full_name and os_full_name.size + @@OSFullName = os_full_name + end + end + return @@OSFullName + end + + def get_os_name(conf_path = "/etc/opt/microsoft/scx/conf/scx-release") + return @@OSName if !@@OSName.nil? + + if File.file?(conf_path) + conf = File.read(conf_path) + os_name = conf[/OSName=(.*?)\n/, 1] + if os_name and os_name.size + @@OSName = os_name + end + end + return @@OSName + end + + def get_os_version(conf_path = "/etc/opt/microsoft/scx/conf/scx-release") + return @@OSVersion if !@@OSVersion.nil? + + if File.file?(conf_path) + conf = File.read(conf_path) + os_version = conf[/OSVersion=(.*?)\n/, 1] + if os_version and os_version.size + @@OSVersion = os_version + end + end + return @@OSVersion + end + + def get_hostname(ignoreOldValue = false) + + if not is_hostname_compliant?(@@Hostname) or ignoreOldValue then + + look_in_designated_hostnamefile if has_designated_hostnamefile? + + look_for_socket_class_host_address unless is_hostname_compliant?(@@Hostname) + end + + begin + validate_hostname_equivalent(@@Hostname) + rescue => error + OMS::Log.warn_once("Hostname '#{@@Hostname}' found, but did NOT validate as compliant. #{error}. Using anyway.") + end + return @@Hostname + end + + def get_fully_qualified_domain_name + return @@FQDN unless @@FQDN.nil? + + begin + fqdn = Socket.gethostbyname(Socket.gethostname)[0] + rescue => error + OMS::Log.error_once("Unable to get the FQDN: #{error}") + else + @@FQDN = fqdn + end + return @@FQDN + end + + def get_installed_date(conf_path = "/etc/opt/microsoft/omsagent/sysconf/installinfo.txt") + return @@InstalledDate if !@@InstalledDate.nil? + + if File.file?(conf_path) + conf = File.read(conf_path) + installed_date = conf[/(.*)\n(.*)/, 2] + if installed_date and installed_date.size + begin + Time.parse(installed_date) + rescue ArgumentError + OMS::Log.error_once("Invalid install date: #{installed_date}") + else + @@InstalledDate = installed_date + end + end + end + return @@InstalledDate + end + + def get_agent_version(conf_path = "/etc/opt/microsoft/omsagent/sysconf/installinfo.txt") + return @@AgentVersion if !@@AgentVersion.nil? + + if File.file?(conf_path) + conf = File.read(conf_path) + agent_version = conf[/([\d]+\.[\d]+\.[\d]+-[\d]+)\s.*\n/, 1] + if agent_version and agent_version.size + @@AgentVersion = agent_version + end + end + return @@AgentVersion + end + + def format_time(time) + Time.at(time).utc.iso8601(3) # UTC with milliseconds + end + + def format_time_str(time) + DateTime.parse(time).strftime("%FT%H:%M:%S.%3NZ") + end + + def create_error_tag(tag) + "ERROR::#{tag}::" + end + + # create an HTTP object which uses HTTPS + def create_secure_http(uri, proxy={}) + if proxy.empty? + http = Net::HTTP.new( uri.host, uri.port ) + else + http = Net::HTTP.new( uri.host, uri.port, + proxy[:addr], proxy[:port], proxy[:user], proxy[:pass]) + end + http.use_ssl = true + http.verify_mode = OpenSSL::SSL::VERIFY_PEER + http.open_timeout = 30 + return http + end # create_secure_http + + # create an HTTP object to ODS + def create_ods_http(ods_uri, proxy={}) + http = create_secure_http(ods_uri, proxy) + http.cert = Configuration.cert + http.key = Configuration.key + return http + end # create_ods_http + + # create an HTTPRequest object to ODS + # parameters: + # path: string. path of the request + # record: Hash. body of the request + # compress: bool. Whether the body of the request should be compressed + # extra_header: Hash. extra HTTP headers + # serializer: method. serializer of the record + # returns: + # HTTPRequest. request to ODS + def create_ods_request(path, record, compress, extra_headers=nil, serializer=method(:parse_json_record_encoding)) + headers = extra_headers.nil? ? {} : extra_headers + + azure_resource_id = OMS::Configuration.azure_resource_id + if !azure_resource_id.to_s.empty? + headers[OMS::CaseSensitiveString.new("x-ms-AzureResourceId")] = azure_resource_id + end + + azure_region = OMS::Configuration.azure_region if defined?(OMS::Configuration.azure_region) + if !azure_region.to_s.empty? + headers[OMS::CaseSensitiveString.new("x-ms-AzureRegion")] = azure_region + end + + omscloud_id = OMS::Configuration.omscloud_id + if !omscloud_id.to_s.empty? + headers[OMS::CaseSensitiveString.new("x-ms-OMSCloudId")] = omscloud_id + end + + uuid = OMS::Configuration.uuid + if !uuid.to_s.empty? + headers[OMS::CaseSensitiveString.new("x-ms-UUID")] = uuid + end + + headers[OMS::CaseSensitiveString.new("X-Request-ID")] = SecureRandom.uuid + + headers["Content-Type"] = "application/json" + if compress == true + headers["Content-Encoding"] = "deflate" + end + + req = Net::HTTP::Post.new(path, headers) + json_msg = serializer.call(record) + if json_msg.nil? + return nil + else + if compress == true + req.body = Zlib::Deflate.deflate(json_msg) + else + req.body = json_msg + end + end + return req + end # create_ods_request + + # parses the json record with appropriate encoding + # parameters: + # record: Hash. body of the request + # returns: + # json represention of object, + # nil if encoding cannot be applied + def parse_json_record_encoding(record) + msg = nil + begin + msg = Yajl.dump(record) + rescue => error + # failed encoding, encode to utf-8, iso-8859-1 and try again + begin + OMS::Log.warn_once("Yajl.dump() failed due to encoding, will try iso-8859-1 for #{record}: #{error}") + + if !record["DataItems"].nil? + record["DataItems"].each do |item| + item["Message"] = item["Message"].encode('utf-8', 'iso-8859-1') + end + end + msg = Yajl.dump(record) + rescue => error + # at this point we've given up up, we don't recognize + # the encode, so return nil and log_warning for the + # record + OMS::Log.warn_once("Skipping due to failed encoding for #{record}: #{error}") + end + end + return msg + end + + # dump the records into json string + # assume the records is an array of single layer hash + # return nil if we cannot dump it + # parameters: + # records: hash[]. an array of single layer hash + def safe_dump_simple_hash_array(records) + msg = nil + + begin + msg = JSON.dump(records) + rescue JSON::GeneratorError => error + OMS::Log.warn_once("Unable to dump to JSON string. #{error}") + begin + # failed to dump, encode to utf-8, iso-8859-1 and try again + # records is an array of hash + records.each do | hash | + # the value is a hash + hash.each do | key, value | + # the value should be of simple type + # encode the string to utf-8 + if value.instance_of? String + hash[key] = value.encode('utf-8', 'iso-8859-1') + end + end + end + + msg = JSON.dump(records) + rescue => error + # at this point we've given up, we don't recognize the encode, + # so return nil and log_warning for the record + OMS::Log.warn_once("Skipping due to failed encoding for #{records}: #{error}") + end + rescue => error + # unexpected error when dumpping the records into JSON string + # skip here and return nil + OMS::Log.warn_once("Skipping due to unexpected error for #{records}: #{error}") + end + + return msg + end # safe_dump_simple_hash_array + + # start a request + # parameters: + # req: HTTPRequest. request + # secure_http: HTTP. HTTPS + # ignore404: bool. ignore the 404 error when it's true + # return_entire_response: bool. If true, return the entire response object + # returns: + # string. body of the response (or the whole response if return_entire_response is true) + def start_request(req, secure_http, ignore404 = false, return_entire_response = false) + # Tries to send the passed in request + # Raises an exception if the request fails. + # This exception should only be caught by the fluentd engine so that it retries sending this + begin + res = nil + res = secure_http.start { |http| http.request(req) } + rescue => e # rescue all StandardErrors + # Server didn't respond + raise RetryRequestException, "Net::HTTP.#{req.method.capitalize} raises exception: #{e.class}, '#{e.message}'" + else + if res.nil? + raise RetryRequestException, "Failed to #{req.method} at #{req.to_s} (res=nil)" + end + + if res.is_a?(Net::HTTPSuccess) + if return_entire_response + return res + else + return res.body + end + end + + if ignore404 and res.code == "404" + return '' + end + + if res.code != "200" + # Retry all failure error codes... + res_summary = "(request-id=#{req["X-Request-ID"]}; class=#{res.class.name}; code=#{res.code}; message=#{res.message}; body=#{res.body};)" + OMS::Log.error_once("HTTP Error: #{res_summary}") + raise RetryRequestException, "HTTP error: #{res_summary}" + end + + end # end begin + end # end start_request + end # Class methods + + end # class Common + + class IPcache + + def initialize(refresh_interval_seconds) + @cache = {} + @cache_lock = Mutex.new + @refresh_interval_seconds = refresh_interval_seconds + @condition = ConditionVariable.new + @thread = Thread.new(&method(:refresh_cache)) + end + + def get_ip(hostname) + @cache_lock.synchronize { + if @cache.has_key?(hostname) + return @cache[hostname] + else + ip = get_ip_from_socket(hostname) + @cache[hostname] = ip + return ip + end + } + end + + private + + def get_ip_from_socket(hostname) + begin + addrinfos = Socket::getaddrinfo(hostname, "echo", Socket::AF_UNSPEC) + rescue => error + OMS::Log.error_once("Unable to resolve the IP of '#{hostname}': #{error}") + return nil + end + + if addrinfos.size >= 1 + return addrinfos[0][3] + end + + return nil + end + + def refresh_cache + while true + @cache_lock.synchronize { + @condition.wait(@cache_lock, @refresh_interval_seconds) + # Flush the cache completely to prevent it from growing indefinitly + @cache = {} + } + end + end + + end + + class CaseSensitiveString < String + def downcase + self + end + def capitalize + self + end + def to_s + self + end + end + +end # module OMS diff --git a/source/plugins/utils/oms_configuration.rb b/source/plugins/utils/oms_configuration.rb new file mode 100644 index 000000000..d8682f35b --- /dev/null +++ b/source/plugins/utils/oms_configuration.rb @@ -0,0 +1,381 @@ +# frozen_string_literal: true + +module OMS + + class Configuration + require 'openssl' + require 'uri' + + require_relative 'omslog' + + @@ConfigurationLoaded = false + + @@Cert = nil + @@Key = nil + + @@AgentId = nil + @@WorkspaceId = nil + @@ODSEndpoint = nil + @@DiagnosticEndpoint = nil + @@GetBlobODSEndpoint = nil + @@NotifyBlobODSEndpoint = nil + @@OmsCloudId = nil + @@AgentGUID = nil + @@URLTLD = nil + @@LogFacility = nil + @@AzureResourceId = nil + @@AzureRegion = nil + @@AzureIMDSEndpoint = "http://169.254.169.254/metadata/instance?api-version=2017-12-01" + @@AzureResIDThreadLock = Mutex.new + @@ProxyConfig = nil + @@ProxyConfigFilePath = "/etc/opt/microsoft/omsagent/proxy.conf" + @@UUID = nil + @@TopologyInterval = nil + @@TelemetryInterval = nil + + class << self + + # test the onboard file existence + def test_onboard_file(file_name) + if !File.file?(file_name) + OMS::Log.error_once("Could not find #{file_name} Make sure to onboard.") + return false + end + + if !File.readable?(file_name) + OMS::Log.error_once("Could not read #{file_name} Check that the read permissions are set for the omsagent user") + return false + end + + return true + end + + def get_proxy_config(proxy_conf_path) + old_proxy_conf_path = '/etc/opt/microsoft/omsagent/conf/proxy.conf' + if !File.exist?(proxy_conf_path) and File.exist?(old_proxy_conf_path) + proxy_conf_path = old_proxy_conf_path + end + + begin + proxy_config = parse_proxy_config(File.read(proxy_conf_path)) + rescue SystemCallError # Error::ENOENT + return {} + end + + if proxy_config.nil? + OMS::Log.error_once("Failed to parse the proxy configuration in '#{proxy_conf_path}'") + return {} + end + + return proxy_config + end + + def parse_proxy_config(proxy_conf_str) + # Remove the http(s) protocol + proxy_conf_str = proxy_conf_str.gsub(/^(https?:\/\/)?/, "") + + # Check for unsupported protocol + if proxy_conf_str[/^[a-z]+:\/\//] + return nil + end + + re = /^(?:(?[^:]+):(?[^@]+)@)?(?[^:@]+)(?::(?\d+))?$/ + matches = re.match(proxy_conf_str) + if matches.nil? or matches[:addr].nil? + return nil + end + # Convert nammed matches to a hash + Hash[ matches.names.map{ |name| name.to_sym}.zip( matches.captures ) ] + end + + def get_azure_region_from_imds() + begin + uri = URI.parse(@@AzureIMDSEndpoint) + http_get_req = Net::HTTP::Get.new(uri, initheader = {'Metadata' => 'true'}) + + http_req = Net::HTTP.new(uri.host, uri.port) + + http_req.open_timeout = 3 + http_req.read_timeout = 2 + + res = http_req.start() do |http| + http.request(http_get_req) + end + + imds_instance_json = JSON.parse(res.body) + + return nil if !imds_instance_json.has_key?("compute") || imds_instance_json['compute'].empty? #classic vm + + imds_instance_json_compute = imds_instance_json['compute'] + return nil unless imds_instance_json_compute.has_key?("location") + return nil if imds_instance_json_compute['location'].empty? + return imds_instance_json_compute['location'] + rescue => e + # this may be a container instance or a non-Azure VM + return nil + end + end + + def get_azure_resid_from_imds() + begin + uri = URI.parse(@@AzureIMDSEndpoint) + http_get_req = Net::HTTP::Get.new(uri, initheader = {'Metadata' => 'true'}) + + http_req = Net::HTTP.new(uri.host, uri.port) + + http_req.open_timeout = 3 + http_req.read_timeout = 2 + + res = http_req.start() do |http| + http.request(http_get_req) + end + + imds_instance_json = JSON.parse(res.body) + + return nil if !imds_instance_json.has_key?("compute") || imds_instance_json['compute'].empty? #classic vm + + imds_instance_json_compute = imds_instance_json['compute'] + + #guard from missing keys + return nil unless imds_instance_json_compute.has_key?("subscriptionId") && imds_instance_json_compute.has_key?("resourceGroupName") && imds_instance_json_compute.has_key?("name") && imds_instance_json_compute.has_key?("vmScaleSetName") + + #guard from blank values + return nil if imds_instance_json_compute['subscriptionId'].empty? || imds_instance_json_compute['resourceGroupName'].empty? || imds_instance_json_compute['name'].empty? + + azure_resource_id = '/subscriptions/' + imds_instance_json_compute['subscriptionId'] + '/resourceGroups/' + imds_instance_json_compute['resourceGroupName'] + '/providers/Microsoft.Compute/' + + if (imds_instance_json_compute['vmScaleSetName'].empty?) + azure_resource_id = azure_resource_id + 'virtualMachines/' + imds_instance_json_compute['name'] + else + azure_resource_id = azure_resource_id + 'virtualMachineScaleSets/' + imds_instance_json_compute['vmScaleSetName'] + '/virtualMachines/' + imds_instance_json_compute['name'] + end + + return azure_resource_id + + rescue => e + # this may be a container instance or a non-Azure VM + OMS::Log.warn_once("Could not fetch Azure Resource ID from IMDS, Reason: #{e}") + return nil + end + end + + def update_azure_resource_id() + retries=1 + max_retries=3 + + loop do + break if retries > max_retries + azure_resource_id = get_azure_resid_from_imds() + if azure_resource_id.nil? + sleep (retries * 120) + retries += 1 + next + end + + @@AzureResourceId = azure_resource_id unless @@AzureResourceId == azure_resource_id + retries=1 #reset + sleep 60 + end + + OMS::Log.warn_once("Exceeded max attempts to fetch Azure Resource ID, killing the thread") + return #terminate + end + + # load the configuration from the configuration file, cert, and key path + def load_configuration(conf_path, cert_path, key_path) + return true if @@ConfigurationLoaded + return false if !test_onboard_file(conf_path) or !test_onboard_file(cert_path) or !test_onboard_file(key_path) + + @@ProxyConfig = get_proxy_config(@@ProxyConfigFilePath) + + endpoint_lines = IO.readlines(conf_path).select{ |line| line.start_with?("OMS_ENDPOINT")} + if endpoint_lines.size == 0 + OMS::Log.error_once("Could not find OMS_ENDPOINT setting in #{conf_path}") + return false + elsif endpoint_lines.size > 1 + OMS::Log.warn_once("Found more than one OMS_ENDPOINT setting in #{conf_path}, will use the first one.") + end + + begin + endpoint_url = endpoint_lines[0].split("=")[1].strip + @@ODSEndpoint = URI.parse( endpoint_url ) + @@GetBlobODSEndpoint = @@ODSEndpoint.clone + @@GetBlobODSEndpoint.path = '/ContainerService.svc/GetBlobUploadUri' + @@NotifyBlobODSEndpoint = @@ODSEndpoint.clone + @@NotifyBlobODSEndpoint.path = '/ContainerService.svc/PostBlobUploadNotification' + rescue => e + OMS::Log.error_once("Error parsing endpoint url. #{e}") + return false + end + + begin + diagnostic_endpoint_lines = IO.readlines(conf_path).select{ |line| line.start_with?("DIAGNOSTIC_ENDPOINT=")} + if diagnostic_endpoint_lines.size == 0 + # Endpoint to be inferred from @@ODSEndpoint + @@DiagnosticEndpoint = @@ODSEndpoint.clone + @@DiagnosticEndpoint.path = '/DiagnosticsDataService.svc/PostJsonDataItems' + else + if diagnostic_endpoint_lines.size > 1 + OMS::Log.warn_once("Found more than one DIAGNOSTIC_ENDPOINT setting in #{conf_path}, will use the first one.") + end + diagnostic_endpoint_url = diagnostic_endpoint_lines[0].split("=")[1].strip + @@DiagnosticEndpoint = URI.parse( diagnostic_endpoint_url ) + end + rescue => e + OMS::Log.error_once("Error obtaining diagnostic endpoint url. #{e}") + return false + end + + agentid_lines = IO.readlines(conf_path).select{ |line| line.start_with?("AGENT_GUID")} + if agentid_lines.size == 0 + OMS::Log.error_once("Could not find AGENT_GUID setting in #{conf_path}") + return false + elsif agentid_lines.size > 1 + OMS::Log.warn_once("Found more than one AGENT_GUID setting in #{conf_path}, will use the first one.") + end + + begin + @@AgentId = agentid_lines[0].split("=")[1].strip + rescue => e + OMS::Log.error_once("Error parsing agent id. #{e}") + return false + end + + File.open(conf_path).each_line do |line| + if line =~ /^WORKSPACE_ID/ + @@WorkspaceId = line.sub("WORKSPACE_ID=","").strip + end + if line =~ /AZURE_RESOURCE_ID/ + # We have contract with AKS team about how to pass AKS specific resource id. + # As per contract, AKS team before starting the agent will set environment variable + # 'customResourceId' + @@AzureResourceId = ENV['customResourceId'] + + # Only if environment variable is empty/nil load it from imds and refresh it periodically. + if @@AzureResourceId.nil? || @@AzureResourceId.empty? + @@AzureResourceId = line.sub("AZURE_RESOURCE_ID=","").strip + if @@AzureResourceId.include? "Microsoft.ContainerService" + OMS::Log.info_once("Azure resource id in configuration file is for AKS. It will be used") + else + Thread.new(&method(:update_azure_resource_id)) if @@AzureResIDThreadLock.try_lock + end + else + OMS::Log.info_once("There is non empty value set for overriden-resourceId environment variable. It will be used") + end + end + if line =~ /OMSCLOUD_ID/ + @@OmsCloudId = line.sub("OMSCLOUD_ID=","").strip + end + if line =~ /^AGENT_GUID/ + @@AgentGUID = line.sub("AGENT_GUID=","").strip + end + if line =~ /^URL_TLD/ + @@URLTLD = line.sub("URL_TLD=","").strip + end + if line =~ /^LOG_FACILITY/ + @@LogFacility = line.sub("LOG_FACILITY=","").strip + end + if line =~ /UUID/ + @@UUID = line.sub("UUID=","").strip + end + end + + begin + raw = File.read cert_path + @@Cert = OpenSSL::X509::Certificate.new raw + raw = File.read key_path + @@Key = OpenSSL::PKey::RSA.new raw + rescue => e + OMS::Log.error_once("Error loading certs: #{e}") + return false + end + + @@AzureRegion = get_azure_region_from_imds() + if @@AzureRegion.nil? || @@AzureRegion.empty? + OMS::Log.warn_once("Azure region value is not set. This must be onpremise machine") + @@AzureRegion = "OnPremise" + end + + @@ConfigurationLoaded = true + return true + end # load_configuration + + def set_request_intervals(topology_interval, telemetry_interval) + @@TopologyInterval = topology_interval + @@TelemetryInterval = telemetry_interval + OMS::Log.info_once("OMS agent management service topology request interval now #{@@TopologyInterval}") + OMS::Log.info_once("OMS agent management service telemetry request interval now #{@@TelemetryInterval}") + end + + def cert + @@Cert + end # getter cert + + def key + @@Key + end # getter key + + def workspace_id + @@WorkspaceId + end # getter workspace_id + + def agent_id + @@AgentId + end # getter agent_id + + def ods_endpoint + @@ODSEndpoint + end # getter ods_endpoint + + def diagnostic_endpoint + @@DiagnosticEndpoint + end # getter diagnostic_endpoint + + def get_blob_ods_endpoint + @@GetBlobODSEndpoint + end # getter get_blob_ods_endpoint + + def notify_blob_ods_endpoint + @@NotifyBlobODSEndpoint + end # getter notify_blob_ods_endpoint + + def azure_resource_id + @@AzureResourceId + end + + def omscloud_id + @@OmsCloudId + end + + def agent_guid + @@AgentGUID + end # getter agent_guid + + def url_tld + @@URLTLD + end # getter url_tld + + def log_facility + @@LogFacility + end # getter log_facility + + def uuid + @@UUID + end # getter for VM uuid + + def azure_region + @@AzureRegion + end + + def topology_interval + @@TopologyInterval + end + + def telemetry_interval + @@TelemetryInterval + end + + end # Class methods + + end # class Common +end # module OMS diff --git a/source/plugins/utils/omslog.rb b/source/plugins/utils/omslog.rb new file mode 100644 index 000000000..b65bf947c --- /dev/null +++ b/source/plugins/utils/omslog.rb @@ -0,0 +1,50 @@ +module OMS + class Log + require 'set' + require 'digest' + + @@error_proc = Proc.new {|message| $log.error message } + @@warn_proc = Proc.new {|message| $log.warn message } + @@info_proc = Proc.new {|message| $log.info message } + @@debug_proc = Proc.new {|message| $log.debug message } + + @@logged_hashes = Set.new + + class << self + def error_once(message, tag=nil) + log_once(@@error_proc, @@debug_proc, message, tag) + end + + def warn_once(message, tag=nil) + log_once(@@warn_proc, @@debug_proc, message, tag) + end + + def info_once(message, tag=nil) + log_once(@@info_proc, @@debug_proc, message, tag) + end + + def log_once(first_loglevel_proc, next_loglevel_proc, message, tag=nil) + # Will log a message once with the first procedure and subsequently with the second + # This allows repeated messages to be ignored by having the second logging function at a lower log level + # An optional tag can be used as the message key + + if tag == nil + tag = message + end + + md5_digest = Digest::MD5.new + tag_hash = md5_digest.update(tag).base64digest + res = @@logged_hashes.add?(tag_hash) + + if res == nil + # The hash was already in the set + next_loglevel_proc.call(message) + else + # First time we see this hash + first_loglevel_proc.call(message) + end + end + end # Class methods + + end # Class Log +end # Module OMS From 9edf9337736d69247dfc02cc5288c6a43e5cc4f8 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Thu, 13 May 2021 22:22:04 -0700 Subject: [PATCH 02/18] add odsdirect as fallback option --- build/linux/installer/conf/out_oms.conf | 4 ++-- source/plugins/go/src/oms.go | 26 ++++++++++++++----------- source/plugins/go/src/utils.go | 8 +++++++- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/build/linux/installer/conf/out_oms.conf b/build/linux/installer/conf/out_oms.conf index 2f286e947..21dc4c1ed 100644 --- a/build/linux/installer/conf/out_oms.conf +++ b/build/linux/installer/conf/out_oms.conf @@ -3,7 +3,7 @@ adx_cluster_uri_path=/etc/config/settings/adx/ADXCLUSTERURI adx_client_id_path=/etc/config/settings/adx/ADXCLIENTID adx_tenant_id_path=/etc/config/settings/adx/ADXTENANTID adx_client_secret_path=/etc/config/settings/adx/ADXCLIENTSECRET -cert_file_path=/etc/opt/microsoft/omsagent/certs/oms.crt -key_file_path=/etc/opt/microsoft/omsagent/certs/oms.key +cert_file_path=/etc/mdsd.d/oms/%s/oms.crt +key_file_path=/etc/mdsd.d/oms/%s/oms.key container_host_file_path=/var/opt/microsoft/docker-cimprov/state/containerhostname container_inventory_refresh_interval=60 diff --git a/source/plugins/go/src/oms.go b/source/plugins/go/src/oms.go index 32eef9a24..fd00fc04d 100644 --- a/source/plugins/go/src/oms.go +++ b/source/plugins/go/src/oms.go @@ -95,11 +95,14 @@ const MdsdContainerLogV2SourceName = "ContainerLogV2Source" const MdsdKubeMonAgentEventsSourceName = "KubeMonAgentEventsSource" const MdsdInsightsMetricsSourceName = "InsightsMetricsSource" -//container logs route (v2=flush to oneagent, adx= flush to adx ingestion, anything else flush to ODS[default]) +//container logs route (v2=flush to oneagent, adx= flush to adx ingestion, v1 for ODS Direct) const ContainerLogsV2Route = "v2" const ContainerLogsADXRoute = "adx" +//fallback option v1 route i.e. ODS direct if required in any case +const ContainerLogsV1Route = "v1" + //container logs schema (v2=ContainerLogsV2 table in LA, anything else ContainerLogs table in LA. This is applicable only if Container logs route is NOT ADX) const ContainerLogV2SchemaVersion = "v2" @@ -1594,18 +1597,13 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { Log(message) } - PluginConfiguration = pluginConfig - - if IsWindows == true { - Log("Creating HTTP Client since the OS Platform is Windows") - CreateHTTPClient() - } + PluginConfiguration = pluginConfig ContainerLogsRoute := strings.TrimSpace(strings.ToLower(os.Getenv("AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE"))) Log("AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE:%s", ContainerLogsRoute) - ContainerLogsRouteV2 = false //default is ODS - ContainerLogsRouteADX = false //default is LA + ContainerLogsRouteV2 = false + ContainerLogsRouteADX = false if strings.Compare(ContainerLogsRoute, ContainerLogsADXRoute) == 0 { //check if adx clusteruri, clientid & secret are set @@ -1638,8 +1636,11 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { Log("Routing container logs thru %s route...", ContainerLogsADXRoute) fmt.Fprintf(os.Stdout, "Routing container logs thru %s route...\n", ContainerLogsADXRoute) } - } else if strings.Compare(strings.ToLower(osType), "windows") != 0 { //for linux, oneagent will be default route + } else if strings.Compare(strings.ToLower(osType), "windows") != 0 { //for linux, oneagent will be default route ContainerLogsRouteV2 = true //default is mdsd route + if strings.Compare(ContainerLogsRoute, ContainerLogsV1Route) == 0 { + ContainerLogsRouteV2 = false //fallback option + } Log("Routing container logs thru %s route...", ContainerLogsV2Route) fmt.Fprintf(os.Stdout, "Routing container logs thru %s route... \n", ContainerLogsV2Route) } @@ -1648,7 +1649,10 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { CreateMDSDClient(ContainerType) } else if ContainerLogsRouteADX == true { CreateADXClient() - } + } else { // v1 or windows + Log("Creating HTTP Client since the OS Platform is Windows") + CreateHTTPClient() + } if IsWindows == false { Log("Creating MDSD clients for KubeMonAgentEvents & InsightsMetrics") diff --git a/source/plugins/go/src/utils.go b/source/plugins/go/src/utils.go index 6873ce847..696a7d089 100644 --- a/source/plugins/go/src/utils.go +++ b/source/plugins/go/src/utils.go @@ -63,7 +63,13 @@ func ReadConfiguration(filename string) (map[string]string, error) { // CreateHTTPClient used to create the client for sending post requests to OMSEndpoint func CreateHTTPClient() { - cert, err := tls.LoadX509KeyPair(PluginConfiguration["cert_file_path"], PluginConfiguration["key_file_path"]) + certFilePath := PluginConfiguration["cert_file_path"] + keyFilePath := PluginConfiguration["key_file_path"] + if IsWindows == false { + certFilePath = fmt.Sprintf(certFilePath, WorkspaceID) + keyFilePath := fmt.Sprintf(keyFilePath, WorkspaceID) + } + cert, err := tls.LoadX509KeyPair(certFilePath, keyFilePath) if err != nil { message := fmt.Sprintf("Error when loading cert %s", err.Error()) SendException(message) From bbecc2cfd7f2e17a5a26d40dfe270ba5bf3a7728 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Thu, 13 May 2021 22:57:21 -0700 Subject: [PATCH 03/18] cleanup --- source/plugins/go/src/oms.go | 40 +++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/source/plugins/go/src/oms.go b/source/plugins/go/src/oms.go index fd00fc04d..20071327d 100644 --- a/source/plugins/go/src/oms.go +++ b/source/plugins/go/src/oms.go @@ -592,8 +592,7 @@ func flushKubeMonAgentEventRecords() { Log(message) SendException(message) } else { - err := json.Unmarshal(jsonBytes, &stringMap) - if err != nil { + if err := json.Unmarshal(jsonBytes, &stringMap); err != nil { message := fmt.Sprintf("Error while UnMarhalling json bytes to stringmap: %s", err.Error()) Log(message) SendException(message) @@ -626,12 +625,23 @@ func flushKubeMonAgentEventRecords() { } laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) var stringMap map[string]string - inrec, _ := json.Marshal(&laKubeMonAgentEventsRecord) - json.Unmarshal(inrec, &stringMap) - msgPackEntry := MsgPackEntry{ - Record: stringMap, - } - msgPackEntries = append(msgPackEntries, msgPackEntry) + jsonBytes, err := json.Marshal(&laKubeMonAgentEventsRecord) + if err != nil { + message := fmt.Sprintf("Error while Marshalling laKubeMonAgentEventsRecord to json bytes: %s", err.Error()) + Log(message) + SendException(message) + } else { + if err := json.Unmarshal(jsonBytes, &stringMap); err != nil { + message := fmt.Sprintf("Error while UnMarhalling json bytes to stringmap: %s", err.Error()) + Log(message) + SendException(message) + } else { + msgPackEntry := MsgPackEntry{ + Record: stringMap, + } + msgPackEntries = append(msgPackEntries, msgPackEntry) + } + } } } @@ -914,8 +924,7 @@ func PostTelegrafMetricsToLA(telegrafRecords []map[interface{}]interface{}) int ContainerLogTelemetryMutex.Lock() defer ContainerLogTelemetryMutex.Unlock() - ContainerLogsSendErrorsToMDSDFromFluent += 1 - + InsightsMetricsMDSDClientCreateErrors += 1 return output.FLB_RETRY } else { numTelegrafMetricsRecords := len(msgPackEntries) @@ -1165,8 +1174,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { numContainerLogRecords := 0 if len(msgPackEntries) > 0 && ContainerLogsRouteV2 == true { - //flush to mdsd - Log("Info::mdsd:: using mdsdsource name: %s", MdsdContainerLogTagName) + //flush to mdsd fluentForward := MsgPackForward{ Tag: MdsdContainerLogTagName, Entries: msgPackEntries, @@ -1466,7 +1474,7 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { Log("Container Type %s", ContainerType) osType := os.Getenv("OS_TYPE") - + IsWindows = false // Linux if strings.Compare(strings.ToLower(osType), "windows") != 0 { Log("Reading configuration for Linux from %s", pluginConfPath) @@ -1650,11 +1658,11 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { } else if ContainerLogsRouteADX == true { CreateADXClient() } else { // v1 or windows - Log("Creating HTTP Client since the OS Platform is Windows") + Log("Creating HTTP Client since either OS Platform is Windows or configmap configured with fallback option for ODS direct") CreateHTTPClient() - } + } - if IsWindows == false { + if IsWindows == false { // mdsd linux specific Log("Creating MDSD clients for KubeMonAgentEvents & InsightsMetrics") CreateMDSDClientKubeMon(ContainerType) CreateMDSDClientInsightsMetrics(ContainerType) From 836331afcc52049085d9097d4cf2a84ab256994e Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Thu, 13 May 2021 23:18:02 -0700 Subject: [PATCH 04/18] cleanup --- source/plugins/go/src/utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/plugins/go/src/utils.go b/source/plugins/go/src/utils.go index 696a7d089..9d60fb3d1 100644 --- a/source/plugins/go/src/utils.go +++ b/source/plugins/go/src/utils.go @@ -67,7 +67,7 @@ func CreateHTTPClient() { keyFilePath := PluginConfiguration["key_file_path"] if IsWindows == false { certFilePath = fmt.Sprintf(certFilePath, WorkspaceID) - keyFilePath := fmt.Sprintf(keyFilePath, WorkspaceID) + keyFilePath = fmt.Sprintf(keyFilePath, WorkspaceID) } cert, err := tls.LoadX509KeyPair(certFilePath, keyFilePath) if err != nil { From 017831006dc9c4f6bb01688184ca3f68470e10bf Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Fri, 14 May 2021 09:28:54 -0700 Subject: [PATCH 05/18] move customRegion to stage3 --- kubernetes/linux/main.sh | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh index ac3974a59..e67839447 100644 --- a/kubernetes/linux/main.sh +++ b/kubernetes/linux/main.sh @@ -19,12 +19,7 @@ else export customResourceId=$AKS_RESOURCE_ID echo "export customResourceId=$AKS_RESOURCE_ID" >> ~/.bashrc source ~/.bashrc - echo "customResourceId:$customResourceId" - - export customRegion=$AKS_REGION - echo "export customRegion=$AKS_REGION" >> ~/.bashrc - source ~/.bashrc - echo "customRegion:$customRegion" + echo "customResourceId:$customResourceId" fi #set agent config schema version From f29ec25c4f2e50bc312ab93aa5a45e2540bd9548 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Fri, 14 May 2021 19:35:10 -0700 Subject: [PATCH 06/18] updates related to containerlog route --- build/common/installer/scripts/tomlparser.rb | 16 +++++++++++----- kubernetes/linux/main.sh | 2 +- source/plugins/go/src/oms.go | 4 ++-- source/plugins/ruby/CAdvisorMetricsAPIClient.rb | 7 ++----- source/plugins/ruby/constants.rb | 2 +- 5 files changed, 17 insertions(+), 14 deletions(-) diff --git a/build/common/installer/scripts/tomlparser.rb b/build/common/installer/scripts/tomlparser.rb index a0f3c2f0a..83e0c9206 100644 --- a/build/common/installer/scripts/tomlparser.rb +++ b/build/common/installer/scripts/tomlparser.rb @@ -25,8 +25,10 @@ @enrichContainerLogs = false @containerLogSchemaVersion = "" @collectAllKubeEvents = false -@containerLogsRoute = "" - +@containerLogsRoute = "v2" # default for linux +if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0 + @containerLogsRoute = "v1" # default is v1 for windows until windows agent integrates windows ama +end # Use parser to parse the configmap toml file to a ruby structure def parseConfigMap begin @@ -162,8 +164,12 @@ def populateSettingValuesFromConfigMap(parsedConfig) #Get container logs route setting begin if !parsedConfig[:log_collection_settings][:route_container_logs].nil? && !parsedConfig[:log_collection_settings][:route_container_logs][:version].nil? - @containerLogsRoute = parsedConfig[:log_collection_settings][:route_container_logs][:version] - puts "config::Using config map setting for container logs route" + if !parsedConfig[:log_collection_settings][:route_container_logs][:version].empty? + @containerLogsRoute = parsedConfig[:log_collection_settings][:route_container_logs][:version] + puts "config::Using config map setting for container logs route" + else + puts "config::Ignoring config map settings and using default value since provided container logs route value is empty" + end end rescue => errorStr ConfigParseErrorLogger.logError("Exception while reading config map settings for container logs route - #{errorStr}, using defaults, please check config map for errors") @@ -256,7 +262,7 @@ def get_command_windows(env_variable_name, env_variable_value) file.write(commands) commands = get_command_windows('AZMON_CLUSTER_COLLECT_ALL_KUBE_EVENTS', @collectAllKubeEvents) file.write(commands) - commands = get_command_windows('AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE', @containerLogsRoute) + commands = get_command_windows('AZMON_CONTAINER_LOGS_ROUTE', @containerLogsRoute) file.write(commands) commands = get_command_windows('AZMON_CONTAINER_LOG_SCHEMA_VERSION', @containerLogSchemaVersion) file.write(commands) diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh index e67839447..aac035728 100644 --- a/kubernetes/linux/main.sh +++ b/kubernetes/linux/main.sh @@ -392,7 +392,7 @@ DOCKER_CIMPROV_VERSION=$(dpkg -l | grep docker-cimprov | awk '{print $3}') echo "DOCKER_CIMPROV_VERSION=$DOCKER_CIMPROV_VERSION" export DOCKER_CIMPROV_VERSION=$DOCKER_CIMPROV_VERSION echo "export DOCKER_CIMPROV_VERSION=$DOCKER_CIMPROV_VERSION" >> ~/.bashrc -echo "*** activating oneagent in legacy auth mode ***" +echo "*** activating oneagent in legacy auth mode ***" CIWORKSPACE_id="$(cat /etc/omsagent-secret/WSID)" #use the file path as its secure than env CIWORKSPACE_keyFile="/etc/omsagent-secret/KEY" diff --git a/source/plugins/go/src/oms.go b/source/plugins/go/src/oms.go index 20071327d..0dba90e16 100644 --- a/source/plugins/go/src/oms.go +++ b/source/plugins/go/src/oms.go @@ -1607,8 +1607,8 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { PluginConfiguration = pluginConfig - ContainerLogsRoute := strings.TrimSpace(strings.ToLower(os.Getenv("AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE"))) - Log("AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE:%s", ContainerLogsRoute) + ContainerLogsRoute := strings.TrimSpace(strings.ToLower(os.Getenv("AZMON_CONTAINER_LOGS_ROUTE"))) + Log("AZMON_CONTAINER_LOGS_ROUTE:%s", ContainerLogsRoute) ContainerLogsRouteV2 = false ContainerLogsRouteADX = false diff --git a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb index 93b0eaa39..f02459aef 100644 --- a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb +++ b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb @@ -252,11 +252,8 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met telemetryProps["dsPromUrl"] = @dsPromUrlCount end #telemetry about containerlog Routing for daemonset - if File.exist?(Constants::AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2_FILENAME) - telemetryProps["containerLogsRoute"] = "v2" - elsif (!@containerLogsRoute.nil? && !@containerLogsRoute.empty?) - telemetryProps["containerLogsRoute"] = @containerLogsRoute - end + telemetryProps["containerLogsRoute"] = @containerLogsRoute + #telemetry about health model if (!@hmEnabled.nil? && !@hmEnabled.empty?) telemetryProps["hmEnabled"] = @hmEnabled diff --git a/source/plugins/ruby/constants.rb b/source/plugins/ruby/constants.rb index 906019b95..c037c99f6 100644 --- a/source/plugins/ruby/constants.rb +++ b/source/plugins/ruby/constants.rb @@ -103,5 +103,5 @@ class Constants #Pod Statuses POD_STATUS_TERMINATING = "Terminating" - AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2_FILENAME = "/opt/AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2" + end From f3fbf3c6b8c73cd5876a9d545949c549ba741416 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Fri, 14 May 2021 19:54:13 -0700 Subject: [PATCH 07/18] make xml eventschema consistent --- kubernetes/linux/mdsd.xml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/kubernetes/linux/mdsd.xml b/kubernetes/linux/mdsd.xml index d99549d3f..de14240aa 100644 --- a/kubernetes/linux/mdsd.xml +++ b/kubernetes/linux/mdsd.xml @@ -293,57 +293,57 @@ - + - + - + - + - + - + - + - + - + - + - + From 673807f8b1b4dda87b5ea304ffa2886fa0cdec1e Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Sun, 16 May 2021 17:14:58 -0700 Subject: [PATCH 08/18] add buffer settings --- build/linux/installer/conf/container.conf | 34 +++++++ build/linux/installer/conf/kube.conf | 118 +++++++++++++++++++++- 2 files changed, 151 insertions(+), 1 deletion(-) diff --git a/build/linux/installer/conf/container.conf b/build/linux/installer/conf/container.conf index 90f923c8d..15f02f539 100644 --- a/build/linux/installer/conf/container.conf +++ b/build/linux/installer/conf/container.conf @@ -63,6 +63,17 @@ host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + @type file + path /var/opt/microsoft/docker-cimprov/state/containerinventory*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + keepalive true @@ -74,6 +85,17 @@ host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + @type file + path /var/opt/microsoft/docker-cimprov/state/cadvisorperf*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + keepalive true @@ -130,5 +152,17 @@ host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + @type file + path /var/opt/microsoft/docker-cimprov/state/insightsmetrics*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length 20 + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + keepalive true diff --git a/build/linux/installer/conf/kube.conf b/build/linux/installer/conf/kube.conf index 1990e8b6f..03180f355 100644 --- a/build/linux/installer/conf/kube.conf +++ b/build/linux/installer/conf/kube.conf @@ -95,6 +95,17 @@ host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + @type file + path /var/opt/microsoft/docker-cimprov/state/kubepod*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + keepalive true @@ -106,6 +117,18 @@ host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + @type file + path /var/opt/microsoft/docker-cimprov/state/kubepv*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length 20 + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + keepalive true @@ -118,6 +141,18 @@ host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + @type file + path /var/opt/microsoft/docker-cimprov/state/insightsmetrics*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length 20 + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + keepalive true @@ -129,6 +164,18 @@ host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + @type file + path /var/opt/microsoft/docker-cimprov/state/kubeevents*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length 20 + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + keepalive true @@ -140,6 +187,18 @@ host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + @type file + path /var/opt/microsoft/docker-cimprov/state/kubeservices*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length 20 + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 2 + keepalive true @@ -151,6 +210,18 @@ host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + @type file + path /var/opt/microsoft/docker-cimprov/state/kubenode*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length 20 + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + keepalive true @@ -162,6 +233,18 @@ host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + @type file + path /var/opt/microsoft/docker-cimprov/state/containernodeinventory*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length 20 + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 3 + keepalive true @@ -173,7 +256,18 @@ host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" - keepalive true + + @type file + path /var/opt/microsoft/docker-cimprov/state/containerinventory*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + + keepalive true #perf @@ -184,6 +278,17 @@ host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + @type file + path /var/opt/microsoft/docker-cimprov/state/perf*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + keepalive true @@ -229,5 +334,16 @@ host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + @type file + path /var/opt/microsoft/docker-cimprov/state/kubehealth*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + keepalive true From 877ae9e08c9058512e94b44af7d0b5600e85cd77 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Mon, 17 May 2021 11:08:09 -0700 Subject: [PATCH 09/18] address HTTPServerException deprecation in ruby 2.6 --- source/plugins/ruby/out_mdm.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/plugins/ruby/out_mdm.rb b/source/plugins/ruby/out_mdm.rb index 7033e77a1..d4508a857 100644 --- a/source/plugins/ruby/out_mdm.rb +++ b/source/plugins/ruby/out_mdm.rb @@ -320,7 +320,7 @@ def send_to_mdm(post_body) ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMSendSuccessful", {}) @last_telemetry_sent_time = Time.now end - rescue Net::HTTPServerException => e + rescue Net::HTTPClientException => e # see https://docs.ruby-lang.org/en/2.6.0/NEWS.html about deprecating HTTPServerException and adding HTTPClientException if !response.nil? && !response.body.nil? #body will have actual error @log.info "Failed to Post Metrics to MDM : #{e} Response.body: #{response.body}" else @@ -334,10 +334,10 @@ def send_to_mdm(post_body) # Not raising exception, as that will cause retries to happen elsif !response.code.empty? && response.code.start_with?("4") # Log 400 errors and continue - @log.info "Non-retryable HTTPServerException when POSTing Metrics to MDM #{e} Response: #{response}" + @log.info "Non-retryable HTTPClientException when POSTing Metrics to MDM #{e} Response: #{response}" else # raise if the response code is non-400 - @log.info "HTTPServerException when POSTing Metrics to MDM #{e} Response: #{response}" + @log.info "HTTPClientException when POSTing Metrics to MDM #{e} Response: #{response}" raise e end # Adding exceptions to hash to aggregate and send telemetry for all 400 error codes From 90e9eebf588b00434c40c39ba424193936b1e576 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Mon, 17 May 2021 15:37:04 -0700 Subject: [PATCH 10/18] update to official mdsd version --- kubernetes/linux/setup.sh | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/kubernetes/linux/setup.sh b/kubernetes/linux/setup.sh index a50c310a7..3c5063375 100644 --- a/kubernetes/linux/setup.sh +++ b/kubernetes/linux/setup.sh @@ -9,12 +9,8 @@ sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && \ dpkg-reconfigure --frontend=noninteractive locales && \ update-locale LANG=en_US.UTF-8 -#install oneagent - Official bits (10/18) -# wget https://github.com/microsoft/Docker-Provider/releases/download/10182020-oneagent/azure-mdsd_1.5.126-build.master.99_x86_64.deb -# use official build which has all the changes for the release - -# private mdsd build -wget https://github.com/microsoft/Docker-Provider/raw/gangams/ci-aad-auth-msi/oneagent-dev/azure-mdsd_1.11.0-build.develop.1999_x86_64.deb +#install oneagent - Official bits (05/17/2021) +wget https://github.com/microsoft/Docker-Provider/releases/download/05172021-oneagent/azure-mdsd_1.10.0-build.master.212_x86_64.deb /usr/bin/dpkg -i $TMPDIR/azure-mdsd*.deb cp -f $TMPDIR/mdsd.xml /etc/mdsd.d From 7ecdbaba9d11510c789696cc5e9bfd1d2fa40143 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Mon, 17 May 2021 16:10:34 -0700 Subject: [PATCH 11/18] fix log message issue --- build/common/installer/scripts/tomlparser.rb | 2 +- source/plugins/go/src/oms.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/build/common/installer/scripts/tomlparser.rb b/build/common/installer/scripts/tomlparser.rb index 83e0c9206..b173ecfe3 100644 --- a/build/common/installer/scripts/tomlparser.rb +++ b/build/common/installer/scripts/tomlparser.rb @@ -166,7 +166,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) if !parsedConfig[:log_collection_settings][:route_container_logs].nil? && !parsedConfig[:log_collection_settings][:route_container_logs][:version].nil? if !parsedConfig[:log_collection_settings][:route_container_logs][:version].empty? @containerLogsRoute = parsedConfig[:log_collection_settings][:route_container_logs][:version] - puts "config::Using config map setting for container logs route" + puts "config::Using config map setting for container logs route: #{@containerLogsRoute}" else puts "config::Ignoring config map settings and using default value since provided container logs route value is empty" end diff --git a/source/plugins/go/src/oms.go b/source/plugins/go/src/oms.go index 0dba90e16..7b25a3290 100644 --- a/source/plugins/go/src/oms.go +++ b/source/plugins/go/src/oms.go @@ -1647,10 +1647,10 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { } else if strings.Compare(strings.ToLower(osType), "windows") != 0 { //for linux, oneagent will be default route ContainerLogsRouteV2 = true //default is mdsd route if strings.Compare(ContainerLogsRoute, ContainerLogsV1Route) == 0 { - ContainerLogsRouteV2 = false //fallback option + ContainerLogsRouteV2 = false //fallback option when hiddensetting set } - Log("Routing container logs thru %s route...", ContainerLogsV2Route) - fmt.Fprintf(os.Stdout, "Routing container logs thru %s route... \n", ContainerLogsV2Route) + Log("Routing container logs thru %s route...", ContainerLogsRoute) + fmt.Fprintf(os.Stdout, "Routing container logs thru %s route... \n", ContainerLogsRoute) } if ContainerLogsRouteV2 == true { From 5cd47d7f6ea4f237249464e834285e1181eba72c Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 18 May 2021 21:43:10 -0700 Subject: [PATCH 12/18] fix pr feedback --- build/linux/installer/conf/container.conf | 9 ++++++ build/linux/installer/conf/kube.conf | 30 +++++++++++++++++++ kubernetes/linux/setup.sh | 2 +- .../ruby/filter_health_model_builder.rb | 3 +- source/plugins/ruby/in_cadvisor_perf.rb | 6 ++-- source/plugins/ruby/in_containerinventory.rb | 3 +- source/plugins/ruby/in_kube_events.rb | 5 ++-- source/plugins/ruby/in_kube_health.rb | 5 ++-- source/plugins/ruby/in_kube_nodes.rb | 11 +++---- source/plugins/ruby/in_kube_podinventory.rb | 13 ++++---- source/plugins/ruby/in_kube_pvinventory.rb | 5 ++-- .../plugins/ruby/in_kubestate_deployments.rb | 4 +-- source/plugins/ruby/in_kubestate_hpa.rb | 4 +-- source/plugins/ruby/out_mdm.rb | 2 +- 14 files changed, 74 insertions(+), 28 deletions(-) diff --git a/build/linux/installer/conf/container.conf b/build/linux/installer/conf/container.conf index 15f02f539..05822eccb 100644 --- a/build/linux/installer/conf/container.conf +++ b/build/linux/installer/conf/container.conf @@ -59,6 +59,9 @@ @type forward @log_level debug + send_timeout 30 + connect_timeout 30 + heartbeat_type none host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" @@ -81,6 +84,9 @@ @type forward @log_level debug + send_timeout 30 + connect_timeout 30 + heartbeat_type none host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" @@ -148,6 +154,9 @@ @type forward @log_level debug + send_timeout 30 + connect_timeout 30 + heartbeat_type none host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" diff --git a/build/linux/installer/conf/kube.conf b/build/linux/installer/conf/kube.conf index 03180f355..6d0f2d903 100644 --- a/build/linux/installer/conf/kube.conf +++ b/build/linux/installer/conf/kube.conf @@ -91,6 +91,9 @@ @type forward @log_level debug + send_timeout 30 + connect_timeout 30 + heartbeat_type none host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" @@ -113,6 +116,9 @@ @type forward @log_level debug + send_timeout 30 + connect_timeout 30 + heartbeat_type none host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" @@ -137,6 +143,9 @@ @type forward @log_level debug + send_timeout 30 + connect_timeout 30 + heartbeat_type none host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" @@ -160,6 +169,9 @@ @type forward @log_level debug + send_timeout 30 + connect_timeout 30 + heartbeat_type none host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" @@ -183,6 +195,9 @@ @type forward @log_level debug + send_timeout 30 + connect_timeout 30 + heartbeat_type none host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" @@ -206,6 +221,9 @@ @type forward @log_level debug + send_timeout 30 + connect_timeout 30 + heartbeat_type none host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" @@ -229,6 +247,9 @@ @type forward @log_level debug + send_timeout 30 + connect_timeout 30 + heartbeat_type none host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" @@ -252,6 +273,9 @@ @type forward @log_level debug + send_timeout 30 + connect_timeout 30 + heartbeat_type none host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" @@ -274,6 +298,9 @@ @type forward @log_level debug + send_timeout 30 + connect_timeout 30 + heartbeat_type none host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" @@ -330,6 +357,9 @@ @type forward @log_level debug + send_timeout 30 + connect_timeout 30 + heartbeat_type none host 0.0.0.0 port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" diff --git a/kubernetes/linux/setup.sh b/kubernetes/linux/setup.sh index 3c5063375..3d00e4c57 100644 --- a/kubernetes/linux/setup.sh +++ b/kubernetes/linux/setup.sh @@ -10,7 +10,7 @@ sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && \ update-locale LANG=en_US.UTF-8 #install oneagent - Official bits (05/17/2021) -wget https://github.com/microsoft/Docker-Provider/releases/download/05172021-oneagent/azure-mdsd_1.10.0-build.master.212_x86_64.deb +wget https://github.com/microsoft/Docker-Provider/releases/download/05172021-oneagent/azure-mdsd_1.10.1-build.master.213_x86_64.deb /usr/bin/dpkg -i $TMPDIR/azure-mdsd*.deb cp -f $TMPDIR/mdsd.xml /etc/mdsd.d diff --git a/source/plugins/ruby/filter_health_model_builder.rb b/source/plugins/ruby/filter_health_model_builder.rb index 67dfff2ca..d491f17c2 100644 --- a/source/plugins/ruby/filter_health_model_builder.rb +++ b/source/plugins/ruby/filter_health_model_builder.rb @@ -226,6 +226,7 @@ def filter_stream(tag, es) # for each key in monitor.keys, # get the state from health_monitor_state # generate the record to send + emit_time = Fluent::Engine.now all_monitors.keys.each{|key| record = @provider.get_record(all_monitors[key], state) if record[HealthMonitorRecordFields::MONITOR_ID] == MonitorId::CLUSTER @@ -245,7 +246,7 @@ def filter_stream(tag, es) end end end - new_es.add(Fluent::Engine.now, record) + new_es.add(emit_time, record) } #emit the stream diff --git a/source/plugins/ruby/in_cadvisor_perf.rb b/source/plugins/ruby/in_cadvisor_perf.rb index cfad9e393..781042cea 100644 --- a/source/plugins/ruby/in_cadvisor_perf.rb +++ b/source/plugins/ruby/in_cadvisor_perf.rb @@ -53,7 +53,7 @@ def shutdown def enumerate() currentTime = Time.now - time = currentTime.to_f + time = Fluent::Engine.now batchTime = currentTime.utc.iso8601 @@istestvar = ENV["ISTEST"] begin @@ -61,7 +61,7 @@ def enumerate() insightsMetricsEventStream = Fluent::MultiEventStream.new metricData = CAdvisorMetricsAPIClient.getMetrics(winNode: nil, metricTime: batchTime ) metricData.each do |record| - eventStream.add(Fluent::Engine.now, record) if record + eventStream.add(time, record) if record end router.emit_stream(@tag, eventStream) if eventStream @@ -80,7 +80,7 @@ def enumerate() containerGPUusageInsightsMetricsDataItems.concat(CAdvisorMetricsAPIClient.getInsightsMetrics(winNode: nil, metricTime: batchTime)) containerGPUusageInsightsMetricsDataItems.each do |insightsMetricsRecord| - insightsMetricsEventStream.add(Fluent::Engine.now, insightsMetricsRecord) if insightsMetricsRecord + insightsMetricsEventStream.add(time, insightsMetricsRecord) if insightsMetricsRecord end router.emit_stream(@insightsmetricstag, insightsMetricsEventStream) if insightsMetricsEventStream diff --git a/source/plugins/ruby/in_containerinventory.rb b/source/plugins/ruby/in_containerinventory.rb index f4f05f9ce..eebf422d6 100644 --- a/source/plugins/ruby/in_containerinventory.rb +++ b/source/plugins/ruby/in_containerinventory.rb @@ -52,6 +52,7 @@ def shutdown def enumerate currentTime = Time.now batchTime = currentTime.utc.iso8601 + emitTime = Fluent::Engine.now containerInventory = Array.new eventStream = Fluent::MultiEventStream.new hostName = "" @@ -93,7 +94,7 @@ def enumerate end end containerInventory.each do |record| - eventStream.add(Fluent::Engine.now, record) if record + eventStream.add(emitTime, record) if record end router.emit_stream(@tag, eventStream) if eventStream @@istestvar = ENV["ISTEST"] diff --git a/source/plugins/ruby/in_kube_events.rb b/source/plugins/ruby/in_kube_events.rb index 6bad35096..6f65dab92 100644 --- a/source/plugins/ruby/in_kube_events.rb +++ b/source/plugins/ruby/in_kube_events.rb @@ -131,7 +131,8 @@ def enumerate end # end enumerate def parse_and_emit_records(events, eventQueryState, newEventQueryState, batchTime = Time.utc.iso8601) - currentTime = Time.now + currentTime = Time.now + emitTime = Fluent::Engine.now @@istestvar = ENV["ISTEST"] begin eventStream = Fluent::MultiEventStream.new @@ -166,7 +167,7 @@ def parse_and_emit_records(events, eventQueryState, newEventQueryState, batchTim record["Computer"] = nodeName record["ClusterName"] = KubernetesApiClient.getClusterName record["ClusterId"] = KubernetesApiClient.getClusterId - eventStream.add(Fluent::Engine.now, record) if record + eventStream.add(emitTime, record) if record @eventsCount += 1 end router.emit_stream(@tag, eventStream) if eventStream diff --git a/source/plugins/ruby/in_kube_health.rb b/source/plugins/ruby/in_kube_health.rb index 10da8d33d..db981c53e 100644 --- a/source/plugins/ruby/in_kube_health.rb +++ b/source/plugins/ruby/in_kube_health.rb @@ -89,7 +89,8 @@ def enumerate return Fluent::MultiEventStream.new end begin - currentTime = Time.now + currentTime = Time.now + emitTime = Fluent::Engine.now batchTime = currentTime.utc.iso8601 health_monitor_records = [] eventStream = Fluent::MultiEventStream.new @@ -159,7 +160,7 @@ def enumerate end health_monitor_records.each do |record| - eventStream.add(Fluent::Engine.now, record) + eventStream.add(emitTime, record) end router.emit_stream(@tag, eventStream) if eventStream rescue => errorStr diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb index 8a5f99f43..ffc11de55 100644 --- a/source/plugins/ruby/in_kube_nodes.rb +++ b/source/plugins/ruby/in_kube_nodes.rb @@ -161,7 +161,8 @@ def enumerate def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601) begin - currentTime = Time.now + currentTime = Time.now + emitTime = Fluent::Engine.now telemetrySent = false eventStream = Fluent::MultiEventStream.new containerNodeInventoryEventStream = Fluent::MultiEventStream.new @@ -172,7 +173,7 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601) nodeInventory["items"].each do |item| # node inventory nodeInventoryRecord = getNodeInventoryRecord(item, batchTime) - eventStream.add(Fluent::Engine.now, nodeInventoryRecord) if nodeInventoryRecord + eventStream.add(emitTime, nodeInventoryRecord) if nodeInventoryRecord if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && eventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE $log.info("in_kube_node::parse_and_emit_records: number of node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}") router.emit_stream(@tag, eventStream) if eventStream @@ -186,7 +187,7 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601) # container node inventory containerNodeInventoryRecord = getContainerNodeInventoryRecord(item, batchTime) - containerNodeInventoryEventStream.add(Fluent::Engine.now, containerNodeInventoryRecord) if containerNodeInventoryRecord + containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryRecord) if containerNodeInventoryRecord if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && containerNodeInventoryEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE $log.info("in_kube_node::parse_and_emit_records: number of container node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}") @@ -235,7 +236,7 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601) end end nodeMetricRecords.each do |metricRecord| - kubePerfEventStream.add(Fluent::Engine.now, metricRecord) if metricRecord + kubePerfEventStream.add(emitTime, metricRecord) if metricRecord end if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && kubePerfEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE $log.info("in_kube_nodes::parse_and_emit_records: number of node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}") @@ -265,7 +266,7 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601) nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord) end nodeGPUInsightsMetricsRecords.each do |insightsMetricsRecord| - insightsMetricsEventStream.add(Fluent::Engine.now, insightsMetricsRecord) if insightsMetricsRecord + insightsMetricsEventStream.add(emitTime, insightsMetricsRecord) if insightsMetricsRecord end if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && insightsMetricsEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE $log.info("in_kube_nodes::parse_and_emit_records: number of GPU node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}") diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb index 40b3934d3..5598602cd 100644 --- a/source/plugins/ruby/in_kube_podinventory.rb +++ b/source/plugins/ruby/in_kube_podinventory.rb @@ -197,7 +197,8 @@ def enumerate(podList = nil) end def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batchTime = Time.utc.iso8601) - currentTime = Time.now + currentTime = Time.now + emitTime = Fluent::Engine.now #batchTime = currentTime.utc.iso8601 eventStream = Fluent::MultiEventStream.new containerInventoryStream = Fluent::MultiEventStream.new @@ -213,7 +214,7 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc podInventoryRecords = getPodInventoryRecords(item, serviceRecords, batchTime) podInventoryRecords.each do |record| if !record.nil? - eventStream.add(Fluent::Engine.now, record) if record + eventStream.add(emitTime, record) if record @inventoryToMdmConvertor.process_pod_inventory_record(record) end end @@ -232,7 +233,7 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc @winContainerCount += containerInventoryRecords.length containerInventoryRecords.each do |cirecord| if !cirecord.nil? - containerInventoryStream.add(Fluent::Engine.now, cirecord) if cirecord + containerInventoryStream.add(emitTime, cirecord) if cirecord end end end @@ -255,7 +256,7 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(item, "limits", "memory", "memoryLimitBytes", batchTime)) containerMetricDataItems.each do |record| - kubePerfEventStream.add(Fluent::Engine.now, record) if record + kubePerfEventStream.add(emitTime, record) if record end if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && kubePerfEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE @@ -274,7 +275,7 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(item, "requests", "amd.com/gpu", "containerGpuRequests", batchTime)) containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(item, "limits", "amd.com/gpu", "containerGpuLimits", batchTime)) containerGPUInsightsMetricsDataItems.each do |insightsMetricsRecord| - insightsMetricsEventStream.add(Fluent::Engine.now, insightsMetricsRecord) if insightsMetricsRecord + insightsMetricsEventStream.add(emitTime, insightsMetricsRecord) if insightsMetricsRecord end if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && insightsMetricsEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE @@ -341,7 +342,7 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc # adding before emit to reduce memory foot print kubeServiceRecord["ClusterId"] = KubernetesApiClient.getClusterId kubeServiceRecord["ClusterName"] = KubernetesApiClient.getClusterName - kubeServicesEventStream.add(Fluent::Engine.now, kubeServiceRecord) if kubeServiceRecord + kubeServicesEventStream.add(emitTime, kubeServiceRecord) if kubeServiceRecord if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && kubeServicesEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE $log.info("in_kube_podinventory::parse_and_emit_records: number of service records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}") router.emit_stream(@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream diff --git a/source/plugins/ruby/in_kube_pvinventory.rb b/source/plugins/ruby/in_kube_pvinventory.rb index fbd86787e..6b1c6f758 100644 --- a/source/plugins/ruby/in_kube_pvinventory.rb +++ b/source/plugins/ruby/in_kube_pvinventory.rb @@ -110,7 +110,8 @@ def enumerate end # end enumerate def parse_and_emit_records(pvInventory, batchTime = Time.utc.iso8601) - currentTime = Time.now + currentTime = Time.now + emitTime = Fluent::Engine.now eventStream = Fluent::MultiEventStream.new @@istestvar = ENV["ISTEST"] begin @@ -152,7 +153,7 @@ def parse_and_emit_records(pvInventory, batchTime = Time.utc.iso8601) records.each do |record| if !record.nil? - eventStream.add(Fluent::Engine.now, record) + eventStream.add(emitTime, record) end end diff --git a/source/plugins/ruby/in_kubestate_deployments.rb b/source/plugins/ruby/in_kubestate_deployments.rb index d17830cd5..182c3ffc1 100644 --- a/source/plugins/ruby/in_kubestate_deployments.rb +++ b/source/plugins/ruby/in_kubestate_deployments.rb @@ -185,9 +185,9 @@ def parse_and_emit_records(deployments, batchTime = Time.utc.iso8601) metricItems.push(metricItem) end - time = Time.now.to_f + time = Fluent::Engine.now metricItems.each do |insightsMetricsRecord| - insightsMetricsEventStream.add(Fluent::Engine.now, insightsMetricsRecord) if insightsMetricsRecord + insightsMetricsEventStream.add(time, insightsMetricsRecord) if insightsMetricsRecord end router.emit_stream(@tag, insightsMetricsEventStream) if insightsMetricsEventStream diff --git a/source/plugins/ruby/in_kubestate_hpa.rb b/source/plugins/ruby/in_kubestate_hpa.rb index 833d1a0ae..8f60bfb72 100644 --- a/source/plugins/ruby/in_kubestate_hpa.rb +++ b/source/plugins/ruby/in_kubestate_hpa.rb @@ -185,9 +185,9 @@ def parse_and_emit_records(hpas, batchTime = Time.utc.iso8601) metricItems.push(metricItem) end - time = Time.now.to_f + time = Fluent::Engine.now metricItems.each do |insightsMetricsRecord| - insightsMetricsEventStream.add(Fluent::Engine.now, insightsMetricsRecord) if insightsMetricsRecord + insightsMetricsEventStream.add(time, insightsMetricsRecord) if insightsMetricsRecord end router.emit_stream(@tag, insightsMetricsEventStream) if insightsMetricsEventStream diff --git a/source/plugins/ruby/out_mdm.rb b/source/plugins/ruby/out_mdm.rb index d4508a857..c03e63f5d 100644 --- a/source/plugins/ruby/out_mdm.rb +++ b/source/plugins/ruby/out_mdm.rb @@ -337,7 +337,7 @@ def send_to_mdm(post_body) @log.info "Non-retryable HTTPClientException when POSTing Metrics to MDM #{e} Response: #{response}" else # raise if the response code is non-400 - @log.info "HTTPClientException when POSTing Metrics to MDM #{e} Response: #{response}" + @log.info "HTTPServerException when POSTing Metrics to MDM #{e} Response: #{response}" raise e end # Adding exceptions to hash to aggregate and send telemetry for all 400 error codes From a4dff092425d2472e6b683b09c8c93d95a4b2162 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Wed, 19 May 2021 10:27:12 -0700 Subject: [PATCH 13/18] get ridoff unused code from omscommon --- .../installer/datafiles/base_container.data | 1 - source/plugins/utils/oms_common.rb | 881 +----------------- source/plugins/utils/oms_configuration.rb | 381 -------- 3 files changed, 2 insertions(+), 1261 deletions(-) delete mode 100644 source/plugins/utils/oms_configuration.rb diff --git a/build/linux/installer/datafiles/base_container.data b/build/linux/installer/datafiles/base_container.data index 788e7bbcf..b9f889dba 100644 --- a/build/linux/installer/datafiles/base_container.data +++ b/build/linux/installer/datafiles/base_container.data @@ -146,7 +146,6 @@ MAINTAINER: 'Microsoft Corporation' /etc/fluent/plugin/omslog.rb; source/plugins/utils/omslog.rb; 644; root; root /etc/fluent/plugin/oms_common.rb; source/plugins/utils/oms_common.rb; 644; root; root -/etc/fluent/plugin/oms_configuration.rb; source/plugins/utils/oms_configuration.rb; 644; root; root /etc/fluent/kube.conf; build/linux/installer/conf/kube.conf; 644; root; root /etc/fluent/container.conf; build/linux/installer/conf/container.conf; 644; root; root diff --git a/source/plugins/utils/oms_common.rb b/source/plugins/utils/oms_common.rb index 1c3e549bf..c10cb8638 100644 --- a/source/plugins/utils/oms_common.rb +++ b/source/plugins/utils/oms_common.rb @@ -10,477 +10,12 @@ class RetryRequestException < Exception end class Common - require 'json' - require 'yajl' - require 'net/http' - require 'net/https' - require 'time' - require 'zlib' - require 'digest' - require 'date' - require 'securerandom' - + require 'socket' require_relative 'omslog' - require_relative 'oms_configuration' - - @@OSFullName = nil - @@OSName = nil - @@OSVersion = nil + @@Hostname = nil @@HostnameFilePath = MSDockerCImprovHostnameFilePath - @@FQDN = nil - @@InstalledDate = nil - @@AgentVersion = nil - @@CurrentTimeZone = nil - @@tzMapping = { - 'Australia/Darwin' => 'AUS Central Standard Time', - 'Australia/Sydney' => 'AUS Eastern Standard Time', - 'Australia/Melbourne' => 'AUS Eastern Standard Time', - 'Asia/Kabul' => 'Afghanistan Standard Time', - 'America/Anchorage' => 'Alaskan Standard Time', - 'America/Juneau' => 'Alaskan Standard Time', - 'America/Metlakatla' => 'Alaskan Standard Time', - 'America/Nome' => 'Alaskan Standard Time', - 'America/Sitka' => 'Alaskan Standard Time', - 'America/Yakutat' => 'Alaskan Standard Time', - 'Asia/Riyadh' => 'Arab Standard Time', - 'Asia/Bahrain' => 'Arab Standard Time', - 'Asia/Kuwait' => 'Arab Standard Time', - 'Asia/Qatar' => 'Arab Standard Time', - 'Asia/Aden' => 'Arab Standard Time', - 'Asia/Dubai' => 'Arabian Standard Time', - 'Asia/Muscat' => 'Arabian Standard Time', - 'Etc/GMT-4' => 'Arabian Standard Time', - 'Asia/Baghdad' => 'Arabic Standard Time', - 'America/Buenos_Aires' => 'Argentina Standard Time', - 'America/Argentina/La_Rioja' => 'Argentina Standard Time', - 'America/Argentina/Rio_Gallegos' => 'Argentina Standard Time', - 'America/Argentina/Salta' => 'Argentina Standard Time', - 'America/Argentina/San_Juan' => 'Argentina Standard Time', - 'America/Argentina/San_Luis' => 'Argentina Standard Time', - 'America/Argentina/Tucuman' => 'Argentina Standard Time', - 'America/Argentina/Ushuaia' => 'Argentina Standard Time', - 'America/Catamarca' => 'Argentina Standard Time', - 'America/Cordoba' => 'Argentina Standard Time', - 'America/Jujuy' => 'Argentina Standard Time', - 'America/Mendoza' => 'Argentina Standard Time', - 'America/Halifax' => 'Atlantic Standard Time', - 'Atlantic/Bermuda' => 'Atlantic Standard Time', - 'America/Glace_Bay' => 'Atlantic Standard Time', - 'America/Goose_Bay' => 'Atlantic Standard Time', - 'America/Moncton' => 'Atlantic Standard Time', - 'America/Thule' => 'Atlantic Standard Time', - 'Asia/Baku' => 'Azerbaijan Standard Time', - 'Atlantic/Azores' => 'Azores Standard Time', - 'America/Scoresbysund' => 'Azores Standard Time', - 'America/Bahia' => 'Bahia Standard Time', - 'Asia/Dhaka' => 'Bangladesh Standard Time', - 'Asia/Thimphu' => 'Bangladesh Standard Time', - 'Europe/Minsk' => 'Belarus Standard Time', - 'America/Regina' => 'Canada Central Standard Time', - 'America/Swift_Current' => 'Canada Central Standard Time', - 'Atlantic/Cape_Verde' => 'Cape Verde Standard Time', - 'Etc/GMT+1' => 'Cape Verde Standard Time', - 'Asia/Yerevan' => 'Caucasus Standard Time', - 'Australia/Adelaide' => 'Cen. Australia Standard Time', - 'Australia/Broken_Hill' => 'Cen. Australia Standard Time', - 'America/Guatemala' => 'Central America Standard Time', - 'America/Belize' => 'Central America Standard Time', - 'America/Costa_Rica' => 'Central America Standard Time', - 'Pacific/Galapagos' => 'Central America Standard Time', - 'America/Tegucigalpa' => 'Central America Standard Time', - 'America/Managua' => 'Central America Standard Time', - 'America/El_Salvador' => 'Central America Standard Time', - 'Etc/GMT+6' => 'Central America Standard Time', - 'Asia/Almaty' => 'Central Asia Standard Time', - 'Antarctica/Vostok' => 'Central Asia Standard Time', - 'Indian/Chagos' => 'Central Asia Standard Time', - 'Asia/Bishkek' => 'Central Asia Standard Time', - 'Asia/Qyzylorda' => 'Central Asia Standard Time', - 'Etc/GMT-6' => 'Central Asia Standard Time', - 'America/Cuiaba' => 'Central Brazilian Standard Time', - 'America/Campo_Grande' => 'Central Brazilian Standard Time', - 'Europe/Budapest' => 'Central Europe Standard Time', - 'Europe/Tirane' => 'Central Europe Standard Time', - 'Europe/Prague' => 'Central Europe Standard Time', - 'Europe/Podgorica' => 'Central Europe Standard Time', - 'Europe/Belgrade' => 'Central Europe Standard Time', - 'Europe/Ljubljana' => 'Central Europe Standard Time', - 'Europe/Bratislava' => 'Central Europe Standard Time', - 'Europe/Warsaw' => 'Central European Standard Time', - 'Europe/Sarajevo' => 'Central European Standard Time', - 'Europe/Zagreb' => 'Central European Standard Time', - 'Europe/Skopje' => 'Central European Standard Time', - 'Pacific/Guadalcanal' => 'Central Pacific Standard Time', - 'Antarctica/Macquarie' => 'Central Pacific Standard Time', - 'Pacific/Ponape' => 'Central Pacific Standard Time', - 'Pacific/Kosrae' => 'Central Pacific Standard Time', - 'Pacific/Noumea' => 'Central Pacific Standard Time', - 'Pacific/Norfolk' => 'Central Pacific Standard Time', - 'Pacific/Bougainville' => 'Central Pacific Standard Time', - 'Pacific/Efate' => 'Central Pacific Standard Time', - 'Etc/GMT-11' => 'Central Pacific Standard Time', - 'America/Chicago' => 'Central Standard Time', - 'America/Winnipeg' => 'Central Standard Time', - 'America/Rainy_River' => 'Central Standard Time', - 'America/Rankin_Inlet' => 'Central Standard Time', - 'America/Resolute' => 'Central Standard Time', - 'America/Matamoros' => 'Central Standard Time', - 'America/Indiana/Knox' => 'Central Standard Time', - 'America/Indiana/Tell_City' => 'Central Standard Time', - 'America/Menominee' => 'Central Standard Time', - 'America/North_Dakota/Beulah' => 'Central Standard Time', - 'America/North_Dakota/Center' => 'Central Standard Time', - 'America/North_Dakota/New_Salem' => 'Central Standard Time', - 'CST6CDT' => 'Central Standard Time', - 'America/Mexico_City' => 'Central Standard Time (Mexico)', - 'America/Bahia_Banderas' => 'Central Standard Time (Mexico)', - 'America/Merida' => 'Central Standard Time (Mexico)', - 'America/Monterrey' => 'Central Standard Time (Mexico)', - 'Asia/Shanghai' => 'China Standard Time', - 'Asia/Chongqing' => 'China Standard Time', - 'Asia/Harbin' => 'China Standard Time', - 'Asia/Kashgar' => 'China Standard Time', - 'Asia/Urumqi' => 'China Standard Time', - 'Asia/Hong_Kong' => 'China Standard Time', - 'Asia/Macau' => 'China Standard Time', - 'Etc/GMT+12' => 'Dateline Standard Time', - 'Africa/Nairobi' => 'E. Africa Standard Time', - 'Antarctica/Syowa' => 'E. Africa Standard Time', - 'Africa/Djibouti' => 'E. Africa Standard Time', - 'Africa/Asmera' => 'E. Africa Standard Time', - 'Africa/Addis_Ababa' => 'E. Africa Standard Time', - 'Indian/Comoro' => 'E. Africa Standard Time', - 'Indian/Antananarivo' => 'E. Africa Standard Time', - 'Africa/Khartoum' => 'E. Africa Standard Time', - 'Africa/Mogadishu' => 'E. Africa Standard Time', - 'Africa/Juba' => 'E. Africa Standard Time', - 'Africa/Dar_es_Salaam' => 'E. Africa Standard Time', - 'Africa/Kampala' => 'E. Africa Standard Time', - 'Indian/Mayotte' => 'E. Africa Standard Time', - 'Etc/GMT-3' => 'E. Africa Standard Time', - 'Australia/Brisbane' => 'E. Australia Standard Time', - 'Australia/Lindeman' => 'E. Australia Standard Time', - 'Europe/Chisinau' => 'E. Europe Standard Time', - 'America/Sao_Paulo' => 'E. South America Standard Time', - 'America/New_York' => 'Eastern Standard Time', - 'America/Nassau' => 'Eastern Standard Time', - 'America/Toronto' => 'Eastern Standard Time', - 'America/Iqaluit' => 'Eastern Standard Time', - 'America/Montreal' => 'Eastern Standard Time', - 'America/Nipigon' => 'Eastern Standard Time', - 'America/Pangnirtung' => 'Eastern Standard Time', - 'America/Thunder_Bay' => 'Eastern Standard Time', - 'America/Havana' => 'Eastern Standard Time', - 'America/Port-au-Prince' => 'Eastern Standard Time', - 'America/Detroit' => 'Eastern Standard Time', - 'America/Indiana/Petersburg' => 'Eastern Standard Time', - 'America/Indiana/Vincennes' => 'Eastern Standard Time', - 'America/Indiana/Winamac' => 'Eastern Standard Time', - 'America/Kentucky/Monticello' => 'Eastern Standard Time', - 'America/Louisville' => 'Eastern Standard Time', - 'EST5EDT' => 'Eastern Standard Time', - 'America/Cancun' => 'Eastern Standard Time (Mexico)', - 'Africa/Cairo' => 'Egypt Standard Time', - 'Asia/Gaza' => 'Egypt Standard Time', - 'Asia/Hebron' => 'Egypt Standard Time', - 'Asia/Yekaterinburg' => 'Ekaterinburg Standard Time', - 'Europe/Kiev' => 'FLE Standard Time', - 'Europe/Mariehamn' => 'FLE Standard Time', - 'Europe/Sofia' => 'FLE Standard Time', - 'Europe/Tallinn' => 'FLE Standard Time', - 'Europe/Helsinki' => 'FLE Standard Time', - 'Europe/Vilnius' => 'FLE Standard Time', - 'Europe/Riga' => 'FLE Standard Time', - 'Europe/Uzhgorod' => 'FLE Standard Time', - 'Europe/Zaporozhye' => 'FLE Standard Time', - 'Pacific/Fiji' => 'Fiji Standard Time', - 'Europe/London' => 'GMT Standard Time', - 'Atlantic/Canary' => 'GMT Standard Time', - 'Atlantic/Faeroe' => 'GMT Standard Time', - 'Europe/Guernsey' => 'GMT Standard Time', - 'Europe/Dublin' => 'GMT Standard Time', - 'Europe/Isle_of_Man' => 'GMT Standard Time', - 'Europe/Jersey' => 'GMT Standard Time', - 'Europe/Lisbon' => 'GMT Standard Time', - 'Atlantic/Madeira' => 'GMT Standard Time', - 'Europe/Bucharest' => 'GTB Standard Time', - 'Asia/Nicosia' => 'GTB Standard Time', - 'Europe/Athens' => 'GTB Standard Time', - 'Asia/Tbilisi' => 'Georgian Standard Time', - 'America/Godthab' => 'Greenland Standard Time', - 'Atlantic/Reykjavik' => 'Greenwich Standard Time', - 'Africa/Ouagadougou' => 'Greenwich Standard Time', - 'Africa/Abidjan' => 'Greenwich Standard Time', - 'Africa/Accra' => 'Greenwich Standard Time', - 'Africa/Banjul' => 'Greenwich Standard Time', - 'Africa/Conakry' => 'Greenwich Standard Time', - 'Africa/Bissau' => 'Greenwich Standard Time', - 'Africa/Monrovia' => 'Greenwich Standard Time', - 'Africa/Bamako' => 'Greenwich Standard Time', - 'Africa/Nouakchott' => 'Greenwich Standard Time', - 'Atlantic/St_Helena' => 'Greenwich Standard Time', - 'Africa/Freetown' => 'Greenwich Standard Time', - 'Africa/Dakar' => 'Greenwich Standard Time', - 'Africa/Sao_Tome' => 'Greenwich Standard Time', - 'Africa/Lome' => 'Greenwich Standard Time', - 'Pacific/Honolulu' => 'Hawaiian Standard Time', - 'Pacific/Rarotonga' => 'Hawaiian Standard Time', - 'Pacific/Tahiti' => 'Hawaiian Standard Time', - 'Pacific/Johnston' => 'Hawaiian Standard Time', - 'Etc/GMT+10' => 'Hawaiian Standard Time', - 'Asia/Calcutta' => 'India Standard Time', - 'Asia/Tehran' => 'Iran Standard Time', - 'Asia/Jerusalem' => 'Israel Standard Time', - 'Asia/Amman' => 'Jordan Standard Time', - 'Europe/Kaliningrad' => 'Kaliningrad Standard Time', - 'Asia/Seoul' => 'Korea Standard Time', - 'Africa/Tripoli' => 'Libya Standard Time', - 'Pacific/Kiritimati' => 'Line Islands Standard Time', - 'Etc/GMT-14' => 'Line Islands Standard Time', - 'Asia/Magadan' => 'Magadan Standard Time', - 'Indian/Mauritius' => 'Mauritius Standard Time', - 'Indian/Reunion' => 'Mauritius Standard Time', - 'Indian/Mahe' => 'Mauritius Standard Time', - 'Asia/Beirut' => 'Middle East Standard Time', - 'America/Montevideo' => 'Montevideo Standard Time', - 'Africa/Casablanca' => 'Morocco Standard Time', - 'Africa/El_Aaiun' => 'Morocco Standard Time', - 'America/Denver' => 'Mountain Standard Time', - 'America/Edmonton' => 'Mountain Standard Time', - 'America/Cambridge_Bay' => 'Mountain Standard Time', - 'America/Inuvik' => 'Mountain Standard Time', - 'America/Yellowknife' => 'Mountain Standard Time', - 'America/Ojinaga' => 'Mountain Standard Time', - 'America/Boise' => 'Mountain Standard Time', - 'MST7MDT' => 'Mountain Standard Time', - 'America/Chihuahua' => 'Mountain Standard Time (Mexico)', - 'America/Mazatlan' => 'Mountain Standard Time (Mexico)', - 'Asia/Rangoon' => 'Myanmar Standard Time', - 'Indian/Cocos' => 'Myanmar Standard Time', - 'Asia/Novosibirsk' => 'N. Central Asia Standard Time', - 'Asia/Omsk' => 'N. Central Asia Standard Time', - 'Africa/Windhoek' => 'Namibia Standard Time', - 'Asia/Katmandu' => 'Nepal Standard Time', - 'Pacific/Auckland' => 'New Zealand Standard Time', - 'Antarctica/McMurdo' => 'New Zealand Standard Time', - 'America/St_Johns' => 'Newfoundland Standard Time', - 'Asia/Irkutsk' => 'North Asia East Standard Time', - 'Asia/Krasnoyarsk' => 'North Asia Standard Time', - 'Asia/Novokuznetsk' => 'North Asia Standard Time', - 'Asia/Pyongyang' => 'North Korea Standard Time', - 'America/Santiago' => 'Pacific SA Standard Time', - 'Antarctica/Palmer' => 'Pacific SA Standard Time', - 'America/Los_Angeles' => 'Pacific Standard Time', - 'America/Vancouver' => 'Pacific Standard Time', - 'America/Dawson' => 'Pacific Standard Time', - 'America/Whitehorse' => 'Pacific Standard Time', - 'America/Tijuana' => 'Pacific Standard Time', - 'America/Santa_Isabel' => 'Pacific Standard Time', - 'PST8PDT' => 'Pacific Standard Time', - 'Asia/Karachi' => 'Pakistan Standard Time', - 'America/Asuncion' => 'Paraguay Standard Time', - 'Europe/Paris' => 'Romance Standard Time', - 'Europe/Brussels' => 'Romance Standard Time', - 'Europe/Copenhagen' => 'Romance Standard Time', - 'Europe/Madrid' => 'Romance Standard Time', - 'Africa/Ceuta' => 'Romance Standard Time', - 'Asia/Srednekolymsk' => 'Russia Time Zone 10', - 'Asia/Kamchatka' => 'Russia Time Zone 11', - 'Asia/Anadyr' => 'Russia Time Zone 11', - 'Europe/Samara' => 'Russia Time Zone 3', - 'Europe/Moscow' => 'Russian Standard Time', - 'Europe/Simferopol' => 'Russian Standard Time', - 'Europe/Volgograd' => 'Russian Standard Time', - 'America/Cayenne' => 'SA Eastern Standard Time', - 'Antarctica/Rothera' => 'SA Eastern Standard Time', - 'America/Fortaleza' => 'SA Eastern Standard Time', - 'America/Araguaina' => 'SA Eastern Standard Time', - 'America/Belem' => 'SA Eastern Standard Time', - 'America/Maceio' => 'SA Eastern Standard Time', - 'America/Recife' => 'SA Eastern Standard Time', - 'America/Santarem' => 'SA Eastern Standard Time', - 'Atlantic/Stanley' => 'SA Eastern Standard Time', - 'America/Paramaribo' => 'SA Eastern Standard Time', - 'Etc/GMT+3' => 'SA Eastern Standard Time', - 'America/Bogota' => 'SA Pacific Standard Time', - 'America/Rio_Branco' => 'SA Pacific Standard Time', - 'America/Eirunepe' => 'SA Pacific Standard Time', - 'America/Coral_Harbour' => 'SA Pacific Standard Time', - 'Pacific/Easter' => 'SA Pacific Standard Time', - 'America/Guayaquil' => 'SA Pacific Standard Time', - 'America/Jamaica' => 'SA Pacific Standard Time', - 'America/Cayman' => 'SA Pacific Standard Time', - 'America/Panama' => 'SA Pacific Standard Time', - 'America/Lima' => 'SA Pacific Standard Time', - 'Etc/GMT+5' => 'SA Pacific Standard Time', - 'America/La_Paz' => 'SA Western Standard Time', - 'America/Antigua' => 'SA Western Standard Time', - 'America/Anguilla' => 'SA Western Standard Time', - 'America/Aruba' => 'SA Western Standard Time', - 'America/Barbados' => 'SA Western Standard Time', - 'America/St_Barthelemy' => 'SA Western Standard Time', - 'America/Kralendijk' => 'SA Western Standard Time', - 'America/Manaus' => 'SA Western Standard Time', - 'America/Boa_Vista' => 'SA Western Standard Time', - 'America/Porto_Velho' => 'SA Western Standard Time', - 'America/Blanc-Sablon' => 'SA Western Standard Time', - 'America/Curacao' => 'SA Western Standard Time', - 'America/Dominica' => 'SA Western Standard Time', - 'America/Santo_Domingo' => 'SA Western Standard Time', - 'America/Grenada' => 'SA Western Standard Time', - 'America/Guadeloupe' => 'SA Western Standard Time', - 'America/Guyana' => 'SA Western Standard Time', - 'America/St_Kitts' => 'SA Western Standard Time', - 'America/St_Lucia' => 'SA Western Standard Time', - 'America/Marigot' => 'SA Western Standard Time', - 'America/Martinique' => 'SA Western Standard Time', - 'America/Montserrat' => 'SA Western Standard Time', - 'America/Puerto_Rico' => 'SA Western Standard Time', - 'America/Lower_Princes' => 'SA Western Standard Time', - 'America/Grand_Turk' => 'SA Western Standard Time', - 'America/Port_of_Spain' => 'SA Western Standard Time', - 'America/St_Vincent' => 'SA Western Standard Time', - 'America/Tortola' => 'SA Western Standard Time', - 'America/St_Thomas' => 'SA Western Standard Time', - 'Etc/GMT+4' => 'SA Western Standard Time', - 'Asia/Bangkok' => 'SE Asia Standard Time', - 'Antarctica/Davis' => 'SE Asia Standard Time', - 'Indian/Christmas' => 'SE Asia Standard Time', - 'Asia/Jakarta' => 'SE Asia Standard Time', - 'Asia/Pontianak' => 'SE Asia Standard Time', - 'Asia/Phnom_Penh' => 'SE Asia Standard Time', - 'Asia/Vientiane' => 'SE Asia Standard Time', - 'Asia/Hovd' => 'SE Asia Standard Time', - 'Asia/Saigon' => 'SE Asia Standard Time', - 'Etc/GMT-7' => 'SE Asia Standard Time', - 'Pacific/Apia' => 'Samoa Standard Time', - 'Asia/Singapore' => 'Singapore Standard Time', - 'Asia/Brunei' => 'Singapore Standard Time', - 'Asia/Makassar' => 'Singapore Standard Time', - 'Asia/Kuala_Lumpur' => 'Singapore Standard Time', - 'Asia/Kuching' => 'Singapore Standard Time', - 'Asia/Manila' => 'Singapore Standard Time', - 'Etc/GMT-8' => 'Singapore Standard Time', - 'Africa/Johannesburg' => 'South Africa Standard Time', - 'Africa/Bujumbura' => 'South Africa Standard Time', - 'Africa/Gaborone' => 'South Africa Standard Time', - 'Africa/Lubumbashi' => 'South Africa Standard Time', - 'Africa/Maseru' => 'South Africa Standard Time', - 'Africa/Blantyre' => 'South Africa Standard Time', - 'Africa/Maputo' => 'South Africa Standard Time', - 'Africa/Kigali' => 'South Africa Standard Time', - 'Africa/Mbabane' => 'South Africa Standard Time', - 'Africa/Lusaka' => 'South Africa Standard Time', - 'Africa/Harare' => 'South Africa Standard Time', - 'Etc/GMT-2' => 'South Africa Standard Time', - 'Asia/Colombo' => 'Sri Lanka Standard Time', - 'Asia/Damascus' => 'Syria Standard Time', - 'Asia/Taipei' => 'Taipei Standard Time', - 'Australia/Hobart' => 'Tasmania Standard Time', - 'Australia/Currie' => 'Tasmania Standard Time', - 'Asia/Tokyo' => 'Tokyo Standard Time', - 'Asia/Jayapura' => 'Tokyo Standard Time', - 'Pacific/Palau' => 'Tokyo Standard Time', - 'Asia/Dili' => 'Tokyo Standard Time', - 'Etc/GMT-9' => 'Tokyo Standard Time', - 'Pacific/Tongatapu' => 'Tonga Standard Time', - 'Pacific/Enderbury' => 'Tonga Standard Time', - 'Pacific/Fakaofo' => 'Tonga Standard Time', - 'Etc/GMT-13' => 'Tonga Standard Time', - 'Europe/Istanbul' => 'Turkey Standard Time', - 'America/Indianapolis' => 'US Eastern Standard Time', - 'America/Indiana/Marengo' => 'US Eastern Standard Time', - 'America/Indiana/Vevay' => 'US Eastern Standard Time', - 'America/Phoenix' => 'US Mountain Standard Time', - 'America/Dawson_Creek' => 'US Mountain Standard Time', - 'America/Creston' => 'US Mountain Standard Time', - 'America/Fort_Nelson' => 'US Mountain Standard Time', - 'America/Hermosillo' => 'US Mountain Standard Time', - 'Etc/GMT+7' => 'US Mountain Standard Time', - 'Etc/GMT' => 'UTC', - 'Etc/UTC' => 'UTC', - 'America/Danmarkshavn' => 'UTC', - 'Etc/GMT-12' => 'UTC+12', - 'Pacific/Tarawa' => 'UTC+12', - 'Pacific/Majuro' => 'UTC+12', - 'Pacific/Kwajalein' => 'UTC+12', - 'Pacific/Nauru' => 'UTC+12', - 'Pacific/Funafuti' => 'UTC+12', - 'Pacific/Wake' => 'UTC+12', - 'Pacific/Wallis' => 'UTC+12', - 'Etc/GMT+2' => 'UTC-02', - 'America/Noronha' => 'UTC-02', - 'Atlantic/South_Georgia' => 'UTC-02', - 'Etc/GMT+11' => 'UTC-11', - 'Pacific/Pago_Pago' => 'UTC-11', - 'Pacific/Niue' => 'UTC-11', - 'Pacific/Midway' => 'UTC-11', - 'Asia/Ulaanbaatar' => 'Ulaanbaatar Standard Time', - 'Asia/Choibalsan' => 'Ulaanbaatar Standard Time', - 'America/Caracas' => 'Venezuela Standard Time', - 'Asia/Vladivostok' => 'Vladivostok Standard Time', - 'Asia/Sakhalin' => 'Vladivostok Standard Time', - 'Asia/Ust-Nera' => 'Vladivostok Standard Time', - 'Australia/Perth' => 'W. Australia Standard Time', - 'Antarctica/Casey' => 'W. Australia Standard Time', - 'Africa/Lagos' => 'W. Central Africa Standard Time', - 'Africa/Luanda' => 'W. Central Africa Standard Time', - 'Africa/Porto-Novo' => 'W. Central Africa Standard Time', - 'Africa/Kinshasa' => 'W. Central Africa Standard Time', - 'Africa/Bangui' => 'W. Central Africa Standard Time', - 'Africa/Brazzaville' => 'W. Central Africa Standard Time', - 'Africa/Douala' => 'W. Central Africa Standard Time', - 'Africa/Algiers' => 'W. Central Africa Standard Time', - 'Africa/Libreville' => 'W. Central Africa Standard Time', - 'Africa/Malabo' => 'W. Central Africa Standard Time', - 'Africa/Niamey' => 'W. Central Africa Standard Time', - 'Africa/Ndjamena' => 'W. Central Africa Standard Time', - 'Africa/Tunis' => 'W. Central Africa Standard Time', - 'Etc/GMT-1' => 'W. Central Africa Standard Time', - 'Europe/Berlin' => 'W. Europe Standard Time', - 'Europe/Andorra' => 'W. Europe Standard Time', - 'Europe/Vienna' => 'W. Europe Standard Time', - 'Europe/Zurich' => 'W. Europe Standard Time', - 'Europe/Busingen' => 'W. Europe Standard Time', - 'Europe/Gibraltar' => 'W. Europe Standard Time', - 'Europe/Rome' => 'W. Europe Standard Time', - 'Europe/Vaduz' => 'W. Europe Standard Time', - 'Europe/Luxembourg' => 'W. Europe Standard Time', - 'Europe/Monaco' => 'W. Europe Standard Time', - 'Europe/Malta' => 'W. Europe Standard Time', - 'Europe/Amsterdam' => 'W. Europe Standard Time', - 'Europe/Oslo' => 'W. Europe Standard Time', - 'Europe/Stockholm' => 'W. Europe Standard Time', - 'Arctic/Longyearbyen' => 'W. Europe Standard Time', - 'Europe/San_Marino' => 'W. Europe Standard Time', - 'Europe/Vatican' => 'W. Europe Standard Time', - 'Asia/Tashkent' => 'West Asia Standard Time', - 'Antarctica/Mawson' => 'West Asia Standard Time', - 'Asia/Oral' => 'West Asia Standard Time', - 'Asia/Aqtau' => 'West Asia Standard Time', - 'Asia/Aqtobe' => 'West Asia Standard Time', - 'Indian/Maldives' => 'West Asia Standard Time', - 'Indian/Kerguelen' => 'West Asia Standard Time', - 'Asia/Dushanbe' => 'West Asia Standard Time', - 'Asia/Ashgabat' => 'West Asia Standard Time', - 'Asia/Samarkand' => 'West Asia Standard Time', - 'Etc/GMT-5' => 'West Asia Standard Time', - 'Pacific/Port_Moresby' => 'West Pacific Standard Time', - 'Antarctica/DumontDUrville' => 'West Pacific Standard Time', - 'Pacific/Truk' => 'West Pacific Standard Time', - 'Pacific/Guam' => 'West Pacific Standard Time', - 'Pacific/Saipan' => 'West Pacific Standard Time', - 'Etc/GMT-10' => 'West Pacific Standard Time', - 'Asia/Yakutsk' => 'Yakutsk Standard Time', - 'Asia/Chita' => 'Yakutsk Standard Time', - 'Asia/Khandyga' => 'Yakutsk Standard Time' - } - - @@tzLocalTimePath = '/etc/localtime' - @@tzBaseFolder = '/usr/share/zoneinfo/' - @@tzRightFolder = 'right/' class << self @@ -588,105 +123,7 @@ def validate_hostname_equivalent(hnBuffer) # End of Internal methods - # get the unified timezone id by absolute file path of the timezone file - # file path: the absolute path of the file - def get_unified_timezoneid(filepath) - # remove the baseFolder path - tzID = filepath[@@tzBaseFolder.length..-1] if filepath.start_with?(@@tzBaseFolder) - - return 'Unknown' if tzID.nil? - - # if the rest starts with 'right/', remove it to unify the format - tzID = tzID[@@tzRightFolder.length..-1] if tzID.start_with?(@@tzRightFolder) - - return tzID - end # end get_unified_timezoneid - - def get_current_timezone - return @@CurrentTimeZone if !@@CurrentTimeZone.nil? - - tzID = 'Unknown' - - begin - # if /etc/localtime is a symlink, check the link file's path - if File.symlink?(@@tzLocalTimePath) - symlinkpath = File.absolute_path(File.readlink(@@tzLocalTimePath), File.dirname(@@tzLocalTimePath)) - tzID = get_unified_timezoneid(symlinkpath) - - # look for the entry in the timezone mapping - if @@tzMapping.has_key?(tzID) - @@CurrentTimeZone = @@tzMapping[tzID] - return @@CurrentTimeZone - end - end - - # calculate the md5 of /etc/locatime - md5sum = Digest::MD5.file(@@tzLocalTimePath).hexdigest - - # looks for a file in the /usr/share/zoneinfo/, which is identical to /etc/localtime. use the file name as the timezone - Dir.glob("#{@@tzBaseFolder}**/*") { |filepath| - # find all the files whose md5 is the same as the /etc/localtime - if File.file? filepath and Digest::MD5.file(filepath).hexdigest == md5sum - tzID = get_unified_timezoneid(filepath) - - # look for the entry in the timezone mapping - if @@tzMapping.has_key?(tzID) - @@CurrentTimeZone = @@tzMapping[tzID] - return @@CurrentTimeZone - end - end - } - rescue => error - OMS::Log.error_once("Unable to get the current time zone: #{error}") - end - - # assign the tzID if the corresponding Windows Time Zone is not found - @@CurrentTimeZone = tzID if @@CurrentTimeZone.nil? - - return @@CurrentTimeZone - end # end get_current_timezone - - def get_os_full_name(conf_path = "/etc/opt/microsoft/scx/conf/scx-release") - return @@OSFullName if !@@OSFullName.nil? - - if File.file?(conf_path) - conf = File.read(conf_path) - os_full_name = conf[/OSFullName=(.*?)\n/, 1] - if os_full_name and os_full_name.size - @@OSFullName = os_full_name - end - end - return @@OSFullName - end - - def get_os_name(conf_path = "/etc/opt/microsoft/scx/conf/scx-release") - return @@OSName if !@@OSName.nil? - - if File.file?(conf_path) - conf = File.read(conf_path) - os_name = conf[/OSName=(.*?)\n/, 1] - if os_name and os_name.size - @@OSName = os_name - end - end - return @@OSName - end - - def get_os_version(conf_path = "/etc/opt/microsoft/scx/conf/scx-release") - return @@OSVersion if !@@OSVersion.nil? - - if File.file?(conf_path) - conf = File.read(conf_path) - os_version = conf[/OSVersion=(.*?)\n/, 1] - if os_version and os_version.size - @@OSVersion = os_version - end - end - return @@OSVersion - end - def get_hostname(ignoreOldValue = false) - if not is_hostname_compliant?(@@Hostname) or ignoreOldValue then look_in_designated_hostnamefile if has_designated_hostnamefile? @@ -701,320 +138,6 @@ def get_hostname(ignoreOldValue = false) end return @@Hostname end - - def get_fully_qualified_domain_name - return @@FQDN unless @@FQDN.nil? - - begin - fqdn = Socket.gethostbyname(Socket.gethostname)[0] - rescue => error - OMS::Log.error_once("Unable to get the FQDN: #{error}") - else - @@FQDN = fqdn - end - return @@FQDN - end - - def get_installed_date(conf_path = "/etc/opt/microsoft/omsagent/sysconf/installinfo.txt") - return @@InstalledDate if !@@InstalledDate.nil? - - if File.file?(conf_path) - conf = File.read(conf_path) - installed_date = conf[/(.*)\n(.*)/, 2] - if installed_date and installed_date.size - begin - Time.parse(installed_date) - rescue ArgumentError - OMS::Log.error_once("Invalid install date: #{installed_date}") - else - @@InstalledDate = installed_date - end - end - end - return @@InstalledDate - end - - def get_agent_version(conf_path = "/etc/opt/microsoft/omsagent/sysconf/installinfo.txt") - return @@AgentVersion if !@@AgentVersion.nil? - - if File.file?(conf_path) - conf = File.read(conf_path) - agent_version = conf[/([\d]+\.[\d]+\.[\d]+-[\d]+)\s.*\n/, 1] - if agent_version and agent_version.size - @@AgentVersion = agent_version - end - end - return @@AgentVersion - end - - def format_time(time) - Time.at(time).utc.iso8601(3) # UTC with milliseconds - end - - def format_time_str(time) - DateTime.parse(time).strftime("%FT%H:%M:%S.%3NZ") - end - - def create_error_tag(tag) - "ERROR::#{tag}::" - end - - # create an HTTP object which uses HTTPS - def create_secure_http(uri, proxy={}) - if proxy.empty? - http = Net::HTTP.new( uri.host, uri.port ) - else - http = Net::HTTP.new( uri.host, uri.port, - proxy[:addr], proxy[:port], proxy[:user], proxy[:pass]) - end - http.use_ssl = true - http.verify_mode = OpenSSL::SSL::VERIFY_PEER - http.open_timeout = 30 - return http - end # create_secure_http - - # create an HTTP object to ODS - def create_ods_http(ods_uri, proxy={}) - http = create_secure_http(ods_uri, proxy) - http.cert = Configuration.cert - http.key = Configuration.key - return http - end # create_ods_http - - # create an HTTPRequest object to ODS - # parameters: - # path: string. path of the request - # record: Hash. body of the request - # compress: bool. Whether the body of the request should be compressed - # extra_header: Hash. extra HTTP headers - # serializer: method. serializer of the record - # returns: - # HTTPRequest. request to ODS - def create_ods_request(path, record, compress, extra_headers=nil, serializer=method(:parse_json_record_encoding)) - headers = extra_headers.nil? ? {} : extra_headers - - azure_resource_id = OMS::Configuration.azure_resource_id - if !azure_resource_id.to_s.empty? - headers[OMS::CaseSensitiveString.new("x-ms-AzureResourceId")] = azure_resource_id - end - - azure_region = OMS::Configuration.azure_region if defined?(OMS::Configuration.azure_region) - if !azure_region.to_s.empty? - headers[OMS::CaseSensitiveString.new("x-ms-AzureRegion")] = azure_region - end - - omscloud_id = OMS::Configuration.omscloud_id - if !omscloud_id.to_s.empty? - headers[OMS::CaseSensitiveString.new("x-ms-OMSCloudId")] = omscloud_id - end - - uuid = OMS::Configuration.uuid - if !uuid.to_s.empty? - headers[OMS::CaseSensitiveString.new("x-ms-UUID")] = uuid - end - - headers[OMS::CaseSensitiveString.new("X-Request-ID")] = SecureRandom.uuid - - headers["Content-Type"] = "application/json" - if compress == true - headers["Content-Encoding"] = "deflate" - end - - req = Net::HTTP::Post.new(path, headers) - json_msg = serializer.call(record) - if json_msg.nil? - return nil - else - if compress == true - req.body = Zlib::Deflate.deflate(json_msg) - else - req.body = json_msg - end - end - return req - end # create_ods_request - - # parses the json record with appropriate encoding - # parameters: - # record: Hash. body of the request - # returns: - # json represention of object, - # nil if encoding cannot be applied - def parse_json_record_encoding(record) - msg = nil - begin - msg = Yajl.dump(record) - rescue => error - # failed encoding, encode to utf-8, iso-8859-1 and try again - begin - OMS::Log.warn_once("Yajl.dump() failed due to encoding, will try iso-8859-1 for #{record}: #{error}") - - if !record["DataItems"].nil? - record["DataItems"].each do |item| - item["Message"] = item["Message"].encode('utf-8', 'iso-8859-1') - end - end - msg = Yajl.dump(record) - rescue => error - # at this point we've given up up, we don't recognize - # the encode, so return nil and log_warning for the - # record - OMS::Log.warn_once("Skipping due to failed encoding for #{record}: #{error}") - end - end - return msg - end - - # dump the records into json string - # assume the records is an array of single layer hash - # return nil if we cannot dump it - # parameters: - # records: hash[]. an array of single layer hash - def safe_dump_simple_hash_array(records) - msg = nil - - begin - msg = JSON.dump(records) - rescue JSON::GeneratorError => error - OMS::Log.warn_once("Unable to dump to JSON string. #{error}") - begin - # failed to dump, encode to utf-8, iso-8859-1 and try again - # records is an array of hash - records.each do | hash | - # the value is a hash - hash.each do | key, value | - # the value should be of simple type - # encode the string to utf-8 - if value.instance_of? String - hash[key] = value.encode('utf-8', 'iso-8859-1') - end - end - end - - msg = JSON.dump(records) - rescue => error - # at this point we've given up, we don't recognize the encode, - # so return nil and log_warning for the record - OMS::Log.warn_once("Skipping due to failed encoding for #{records}: #{error}") - end - rescue => error - # unexpected error when dumpping the records into JSON string - # skip here and return nil - OMS::Log.warn_once("Skipping due to unexpected error for #{records}: #{error}") - end - - return msg - end # safe_dump_simple_hash_array - - # start a request - # parameters: - # req: HTTPRequest. request - # secure_http: HTTP. HTTPS - # ignore404: bool. ignore the 404 error when it's true - # return_entire_response: bool. If true, return the entire response object - # returns: - # string. body of the response (or the whole response if return_entire_response is true) - def start_request(req, secure_http, ignore404 = false, return_entire_response = false) - # Tries to send the passed in request - # Raises an exception if the request fails. - # This exception should only be caught by the fluentd engine so that it retries sending this - begin - res = nil - res = secure_http.start { |http| http.request(req) } - rescue => e # rescue all StandardErrors - # Server didn't respond - raise RetryRequestException, "Net::HTTP.#{req.method.capitalize} raises exception: #{e.class}, '#{e.message}'" - else - if res.nil? - raise RetryRequestException, "Failed to #{req.method} at #{req.to_s} (res=nil)" - end - - if res.is_a?(Net::HTTPSuccess) - if return_entire_response - return res - else - return res.body - end - end - - if ignore404 and res.code == "404" - return '' - end - - if res.code != "200" - # Retry all failure error codes... - res_summary = "(request-id=#{req["X-Request-ID"]}; class=#{res.class.name}; code=#{res.code}; message=#{res.message}; body=#{res.body};)" - OMS::Log.error_once("HTTP Error: #{res_summary}") - raise RetryRequestException, "HTTP error: #{res_summary}" - end - - end # end begin - end # end start_request end # Class methods - end # class Common - - class IPcache - - def initialize(refresh_interval_seconds) - @cache = {} - @cache_lock = Mutex.new - @refresh_interval_seconds = refresh_interval_seconds - @condition = ConditionVariable.new - @thread = Thread.new(&method(:refresh_cache)) - end - - def get_ip(hostname) - @cache_lock.synchronize { - if @cache.has_key?(hostname) - return @cache[hostname] - else - ip = get_ip_from_socket(hostname) - @cache[hostname] = ip - return ip - end - } - end - - private - - def get_ip_from_socket(hostname) - begin - addrinfos = Socket::getaddrinfo(hostname, "echo", Socket::AF_UNSPEC) - rescue => error - OMS::Log.error_once("Unable to resolve the IP of '#{hostname}': #{error}") - return nil - end - - if addrinfos.size >= 1 - return addrinfos[0][3] - end - - return nil - end - - def refresh_cache - while true - @cache_lock.synchronize { - @condition.wait(@cache_lock, @refresh_interval_seconds) - # Flush the cache completely to prevent it from growing indefinitly - @cache = {} - } - end - end - - end - - class CaseSensitiveString < String - def downcase - self - end - def capitalize - self - end - def to_s - self - end - end - end # module OMS diff --git a/source/plugins/utils/oms_configuration.rb b/source/plugins/utils/oms_configuration.rb deleted file mode 100644 index d8682f35b..000000000 --- a/source/plugins/utils/oms_configuration.rb +++ /dev/null @@ -1,381 +0,0 @@ -# frozen_string_literal: true - -module OMS - - class Configuration - require 'openssl' - require 'uri' - - require_relative 'omslog' - - @@ConfigurationLoaded = false - - @@Cert = nil - @@Key = nil - - @@AgentId = nil - @@WorkspaceId = nil - @@ODSEndpoint = nil - @@DiagnosticEndpoint = nil - @@GetBlobODSEndpoint = nil - @@NotifyBlobODSEndpoint = nil - @@OmsCloudId = nil - @@AgentGUID = nil - @@URLTLD = nil - @@LogFacility = nil - @@AzureResourceId = nil - @@AzureRegion = nil - @@AzureIMDSEndpoint = "http://169.254.169.254/metadata/instance?api-version=2017-12-01" - @@AzureResIDThreadLock = Mutex.new - @@ProxyConfig = nil - @@ProxyConfigFilePath = "/etc/opt/microsoft/omsagent/proxy.conf" - @@UUID = nil - @@TopologyInterval = nil - @@TelemetryInterval = nil - - class << self - - # test the onboard file existence - def test_onboard_file(file_name) - if !File.file?(file_name) - OMS::Log.error_once("Could not find #{file_name} Make sure to onboard.") - return false - end - - if !File.readable?(file_name) - OMS::Log.error_once("Could not read #{file_name} Check that the read permissions are set for the omsagent user") - return false - end - - return true - end - - def get_proxy_config(proxy_conf_path) - old_proxy_conf_path = '/etc/opt/microsoft/omsagent/conf/proxy.conf' - if !File.exist?(proxy_conf_path) and File.exist?(old_proxy_conf_path) - proxy_conf_path = old_proxy_conf_path - end - - begin - proxy_config = parse_proxy_config(File.read(proxy_conf_path)) - rescue SystemCallError # Error::ENOENT - return {} - end - - if proxy_config.nil? - OMS::Log.error_once("Failed to parse the proxy configuration in '#{proxy_conf_path}'") - return {} - end - - return proxy_config - end - - def parse_proxy_config(proxy_conf_str) - # Remove the http(s) protocol - proxy_conf_str = proxy_conf_str.gsub(/^(https?:\/\/)?/, "") - - # Check for unsupported protocol - if proxy_conf_str[/^[a-z]+:\/\//] - return nil - end - - re = /^(?:(?[^:]+):(?[^@]+)@)?(?[^:@]+)(?::(?\d+))?$/ - matches = re.match(proxy_conf_str) - if matches.nil? or matches[:addr].nil? - return nil - end - # Convert nammed matches to a hash - Hash[ matches.names.map{ |name| name.to_sym}.zip( matches.captures ) ] - end - - def get_azure_region_from_imds() - begin - uri = URI.parse(@@AzureIMDSEndpoint) - http_get_req = Net::HTTP::Get.new(uri, initheader = {'Metadata' => 'true'}) - - http_req = Net::HTTP.new(uri.host, uri.port) - - http_req.open_timeout = 3 - http_req.read_timeout = 2 - - res = http_req.start() do |http| - http.request(http_get_req) - end - - imds_instance_json = JSON.parse(res.body) - - return nil if !imds_instance_json.has_key?("compute") || imds_instance_json['compute'].empty? #classic vm - - imds_instance_json_compute = imds_instance_json['compute'] - return nil unless imds_instance_json_compute.has_key?("location") - return nil if imds_instance_json_compute['location'].empty? - return imds_instance_json_compute['location'] - rescue => e - # this may be a container instance or a non-Azure VM - return nil - end - end - - def get_azure_resid_from_imds() - begin - uri = URI.parse(@@AzureIMDSEndpoint) - http_get_req = Net::HTTP::Get.new(uri, initheader = {'Metadata' => 'true'}) - - http_req = Net::HTTP.new(uri.host, uri.port) - - http_req.open_timeout = 3 - http_req.read_timeout = 2 - - res = http_req.start() do |http| - http.request(http_get_req) - end - - imds_instance_json = JSON.parse(res.body) - - return nil if !imds_instance_json.has_key?("compute") || imds_instance_json['compute'].empty? #classic vm - - imds_instance_json_compute = imds_instance_json['compute'] - - #guard from missing keys - return nil unless imds_instance_json_compute.has_key?("subscriptionId") && imds_instance_json_compute.has_key?("resourceGroupName") && imds_instance_json_compute.has_key?("name") && imds_instance_json_compute.has_key?("vmScaleSetName") - - #guard from blank values - return nil if imds_instance_json_compute['subscriptionId'].empty? || imds_instance_json_compute['resourceGroupName'].empty? || imds_instance_json_compute['name'].empty? - - azure_resource_id = '/subscriptions/' + imds_instance_json_compute['subscriptionId'] + '/resourceGroups/' + imds_instance_json_compute['resourceGroupName'] + '/providers/Microsoft.Compute/' - - if (imds_instance_json_compute['vmScaleSetName'].empty?) - azure_resource_id = azure_resource_id + 'virtualMachines/' + imds_instance_json_compute['name'] - else - azure_resource_id = azure_resource_id + 'virtualMachineScaleSets/' + imds_instance_json_compute['vmScaleSetName'] + '/virtualMachines/' + imds_instance_json_compute['name'] - end - - return azure_resource_id - - rescue => e - # this may be a container instance or a non-Azure VM - OMS::Log.warn_once("Could not fetch Azure Resource ID from IMDS, Reason: #{e}") - return nil - end - end - - def update_azure_resource_id() - retries=1 - max_retries=3 - - loop do - break if retries > max_retries - azure_resource_id = get_azure_resid_from_imds() - if azure_resource_id.nil? - sleep (retries * 120) - retries += 1 - next - end - - @@AzureResourceId = azure_resource_id unless @@AzureResourceId == azure_resource_id - retries=1 #reset - sleep 60 - end - - OMS::Log.warn_once("Exceeded max attempts to fetch Azure Resource ID, killing the thread") - return #terminate - end - - # load the configuration from the configuration file, cert, and key path - def load_configuration(conf_path, cert_path, key_path) - return true if @@ConfigurationLoaded - return false if !test_onboard_file(conf_path) or !test_onboard_file(cert_path) or !test_onboard_file(key_path) - - @@ProxyConfig = get_proxy_config(@@ProxyConfigFilePath) - - endpoint_lines = IO.readlines(conf_path).select{ |line| line.start_with?("OMS_ENDPOINT")} - if endpoint_lines.size == 0 - OMS::Log.error_once("Could not find OMS_ENDPOINT setting in #{conf_path}") - return false - elsif endpoint_lines.size > 1 - OMS::Log.warn_once("Found more than one OMS_ENDPOINT setting in #{conf_path}, will use the first one.") - end - - begin - endpoint_url = endpoint_lines[0].split("=")[1].strip - @@ODSEndpoint = URI.parse( endpoint_url ) - @@GetBlobODSEndpoint = @@ODSEndpoint.clone - @@GetBlobODSEndpoint.path = '/ContainerService.svc/GetBlobUploadUri' - @@NotifyBlobODSEndpoint = @@ODSEndpoint.clone - @@NotifyBlobODSEndpoint.path = '/ContainerService.svc/PostBlobUploadNotification' - rescue => e - OMS::Log.error_once("Error parsing endpoint url. #{e}") - return false - end - - begin - diagnostic_endpoint_lines = IO.readlines(conf_path).select{ |line| line.start_with?("DIAGNOSTIC_ENDPOINT=")} - if diagnostic_endpoint_lines.size == 0 - # Endpoint to be inferred from @@ODSEndpoint - @@DiagnosticEndpoint = @@ODSEndpoint.clone - @@DiagnosticEndpoint.path = '/DiagnosticsDataService.svc/PostJsonDataItems' - else - if diagnostic_endpoint_lines.size > 1 - OMS::Log.warn_once("Found more than one DIAGNOSTIC_ENDPOINT setting in #{conf_path}, will use the first one.") - end - diagnostic_endpoint_url = diagnostic_endpoint_lines[0].split("=")[1].strip - @@DiagnosticEndpoint = URI.parse( diagnostic_endpoint_url ) - end - rescue => e - OMS::Log.error_once("Error obtaining diagnostic endpoint url. #{e}") - return false - end - - agentid_lines = IO.readlines(conf_path).select{ |line| line.start_with?("AGENT_GUID")} - if agentid_lines.size == 0 - OMS::Log.error_once("Could not find AGENT_GUID setting in #{conf_path}") - return false - elsif agentid_lines.size > 1 - OMS::Log.warn_once("Found more than one AGENT_GUID setting in #{conf_path}, will use the first one.") - end - - begin - @@AgentId = agentid_lines[0].split("=")[1].strip - rescue => e - OMS::Log.error_once("Error parsing agent id. #{e}") - return false - end - - File.open(conf_path).each_line do |line| - if line =~ /^WORKSPACE_ID/ - @@WorkspaceId = line.sub("WORKSPACE_ID=","").strip - end - if line =~ /AZURE_RESOURCE_ID/ - # We have contract with AKS team about how to pass AKS specific resource id. - # As per contract, AKS team before starting the agent will set environment variable - # 'customResourceId' - @@AzureResourceId = ENV['customResourceId'] - - # Only if environment variable is empty/nil load it from imds and refresh it periodically. - if @@AzureResourceId.nil? || @@AzureResourceId.empty? - @@AzureResourceId = line.sub("AZURE_RESOURCE_ID=","").strip - if @@AzureResourceId.include? "Microsoft.ContainerService" - OMS::Log.info_once("Azure resource id in configuration file is for AKS. It will be used") - else - Thread.new(&method(:update_azure_resource_id)) if @@AzureResIDThreadLock.try_lock - end - else - OMS::Log.info_once("There is non empty value set for overriden-resourceId environment variable. It will be used") - end - end - if line =~ /OMSCLOUD_ID/ - @@OmsCloudId = line.sub("OMSCLOUD_ID=","").strip - end - if line =~ /^AGENT_GUID/ - @@AgentGUID = line.sub("AGENT_GUID=","").strip - end - if line =~ /^URL_TLD/ - @@URLTLD = line.sub("URL_TLD=","").strip - end - if line =~ /^LOG_FACILITY/ - @@LogFacility = line.sub("LOG_FACILITY=","").strip - end - if line =~ /UUID/ - @@UUID = line.sub("UUID=","").strip - end - end - - begin - raw = File.read cert_path - @@Cert = OpenSSL::X509::Certificate.new raw - raw = File.read key_path - @@Key = OpenSSL::PKey::RSA.new raw - rescue => e - OMS::Log.error_once("Error loading certs: #{e}") - return false - end - - @@AzureRegion = get_azure_region_from_imds() - if @@AzureRegion.nil? || @@AzureRegion.empty? - OMS::Log.warn_once("Azure region value is not set. This must be onpremise machine") - @@AzureRegion = "OnPremise" - end - - @@ConfigurationLoaded = true - return true - end # load_configuration - - def set_request_intervals(topology_interval, telemetry_interval) - @@TopologyInterval = topology_interval - @@TelemetryInterval = telemetry_interval - OMS::Log.info_once("OMS agent management service topology request interval now #{@@TopologyInterval}") - OMS::Log.info_once("OMS agent management service telemetry request interval now #{@@TelemetryInterval}") - end - - def cert - @@Cert - end # getter cert - - def key - @@Key - end # getter key - - def workspace_id - @@WorkspaceId - end # getter workspace_id - - def agent_id - @@AgentId - end # getter agent_id - - def ods_endpoint - @@ODSEndpoint - end # getter ods_endpoint - - def diagnostic_endpoint - @@DiagnosticEndpoint - end # getter diagnostic_endpoint - - def get_blob_ods_endpoint - @@GetBlobODSEndpoint - end # getter get_blob_ods_endpoint - - def notify_blob_ods_endpoint - @@NotifyBlobODSEndpoint - end # getter notify_blob_ods_endpoint - - def azure_resource_id - @@AzureResourceId - end - - def omscloud_id - @@OmsCloudId - end - - def agent_guid - @@AgentGUID - end # getter agent_guid - - def url_tld - @@URLTLD - end # getter url_tld - - def log_facility - @@LogFacility - end # getter log_facility - - def uuid - @@UUID - end # getter for VM uuid - - def azure_region - @@AzureRegion - end - - def topology_interval - @@TopologyInterval - end - - def telemetry_interval - @@TelemetryInterval - end - - end # Class methods - - end # class Common -end # module OMS From 2c8afa4064c4faf440433b44447ed7021f94cb6c Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Wed, 19 May 2021 13:14:31 -0700 Subject: [PATCH 14/18] fix pr feedback --- source/plugins/go/src/oms.go | 21 ++++-- source/plugins/go/src/telemetry.go | 13 ++++ source/plugins/go/src/utils.go | 115 ++++++++++++----------------- 3 files changed, 76 insertions(+), 73 deletions(-) diff --git a/source/plugins/go/src/oms.go b/source/plugins/go/src/oms.go index 7b25a3290..25f364c55 100644 --- a/source/plugins/go/src/oms.go +++ b/source/plugins/go/src/oms.go @@ -339,6 +339,15 @@ const ( PromScrapingError ) +// DataType to be used as enum per data type socket client creation +type DataType int +const ( + // DataType to be used as enum per data type socket client creation + ContainerLogV2 DataType = iota + KubeMonAgentEvents + InsightsMetrics +) + func createLogger() *log.Logger { var logfile *os.File @@ -698,7 +707,7 @@ func flushKubeMonAgentEventRecords() { msgpBytes := convertMsgPackEntriesToMsgpBytes(MdsdKubeMonAgentEventsTagName, msgPackEntries) if MdsdKubeMonMsgpUnixSocketClient == nil { Log("Error::mdsd::mdsd connection for KubeMonAgentEvents does not exist. re-connecting ...") - CreateMDSDClientKubeMon(ContainerType) + CreateMDSDClient(KubeMonAgentEvents, ContainerType) if MdsdKubeMonMsgpUnixSocketClient == nil { Log("Error::mdsd::Unable to create mdsd client for KubeMonAgentEvents. Please check error log.") ContainerLogTelemetryMutex.Lock() @@ -899,7 +908,7 @@ func PostTelegrafMetricsToLA(telegrafRecords []map[interface{}]interface{}) int msgpBytes := convertMsgPackEntriesToMsgpBytes(MdsdInsightsMetricsTagName, msgPackEntries) if MdsdInsightsMetricsMsgpUnixSocketClient == nil { Log("Error::mdsd::mdsd connection does not exist. re-connecting ...") - CreateMDSDClientInsightsMetrics(ContainerType) + CreateMDSDClient(InsightsMetrics, ContainerType) if MdsdInsightsMetricsMsgpUnixSocketClient == nil { Log("Error::mdsd::Unable to create mdsd client for insights metrics. Please check error log.") ContainerLogTelemetryMutex.Lock() @@ -1203,7 +1212,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { if MdsdMsgpUnixSocketClient == nil { Log("Error::mdsd::mdsd connection does not exist. re-connecting ...") - CreateMDSDClient(ContainerType) + CreateMDSDClient(ContainerLogV2, ContainerType) if MdsdMsgpUnixSocketClient == nil { Log("Error::mdsd::Unable to create mdsd client. Please check error log.") @@ -1654,7 +1663,7 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { } if ContainerLogsRouteV2 == true { - CreateMDSDClient(ContainerType) + CreateMDSDClient(ContainerLogV2, ContainerType) } else if ContainerLogsRouteADX == true { CreateADXClient() } else { // v1 or windows @@ -1664,8 +1673,8 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { if IsWindows == false { // mdsd linux specific Log("Creating MDSD clients for KubeMonAgentEvents & InsightsMetrics") - CreateMDSDClientKubeMon(ContainerType) - CreateMDSDClientInsightsMetrics(ContainerType) + CreateMDSDClient(KubeMonAgentEvents, ContainerType) + CreateMDSDClient(InsightsMetrics, ContainerType) } ContainerLogSchemaVersion := strings.TrimSpace(strings.ToLower(os.Getenv("AZMON_CONTAINER_LOG_SCHEMA_VERSION"))) diff --git a/source/plugins/go/src/telemetry.go b/source/plugins/go/src/telemetry.go index 36a8e4d10..458adce00 100644 --- a/source/plugins/go/src/telemetry.go +++ b/source/plugins/go/src/telemetry.go @@ -78,6 +78,8 @@ const ( metricNameNumberofSend429ErrorsTelegrafMetrics = "TelegrafMetricsSend429ErrorCount" metricNameErrorCountContainerLogsSendErrorsToMDSDFromFluent = "ContainerLogs2MdsdSendErrorCount" metricNameErrorCountContainerLogsMDSDClientCreateError = "ContainerLogsMdsdClientCreateErrorCount" + metricNameErrorCountInsightsMetricsMDSDClientCreateError = "InsightsMetricsMDSDClientCreateErrorsCount" + metricNameErrorCountKubeMonEventsMDSDClientCreateError = "KubeMonEventsMDSDClientCreateErrorsCount" metricNameErrorCountContainerLogsSendErrorsToADXFromFluent = "ContainerLogs2ADXSendErrorCount" metricNameErrorCountContainerLogsADXClientCreateError = "ContainerLogsADXClientCreateErrorCount" @@ -116,6 +118,8 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) { containerLogsMDSDClientCreateErrors := ContainerLogsMDSDClientCreateErrors containerLogsSendErrorsToADXFromFluent := ContainerLogsSendErrorsToADXFromFluent containerLogsADXClientCreateErrors := ContainerLogsADXClientCreateErrors + insightsMetricsMDSDClientCreateErrors := InsightsMetricsMDSDClientCreateErrors + kubeMonEventsMDSDClientCreateErrors := KubeMonEventsMDSDClientCreateErrors osmNamespaceCount := OSMNamespaceCount promMonitorPods := PromMonitorPods promMonitorPodsNamespaceLength := PromMonitorPodsNamespaceLength @@ -136,6 +140,8 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) { ContainerLogsMDSDClientCreateErrors = 0.0 ContainerLogsSendErrorsToADXFromFluent = 0.0 ContainerLogsADXClientCreateErrors = 0.0 + InsightsMetricsMDSDClientCreateErrors = 0.0 + KubeMonEventsMDSDClientCreateErrors = 0.0 ContainerLogTelemetryMutex.Unlock() if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "daemonset") == 0 { @@ -190,6 +196,13 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) { if containerLogsADXClientCreateErrors > 0.0 { TelemetryClient.Track(appinsights.NewMetricTelemetry(metricNameErrorCountContainerLogsADXClientCreateError, containerLogsADXClientCreateErrors)) } + if insightsMetricsMDSDClientCreateErrors > 0.0 { + TelemetryClient.Track(appinsights.NewMetricTelemetry(metricNameErrorCountInsightsMetricsMDSDClientCreateError, insightsMetricsMDSDClientCreateErrors)) + } + if containerLogsMDSDClientCreateErrors > 0.0 { + TelemetryClient.Track(appinsights.NewMetricTelemetry(metricNameErrorCountKubeMonEventsMDSDClientCreateError, kubeMonEventsMDSDClientCreateErrors)) + } + start = time.Now() } } diff --git a/source/plugins/go/src/utils.go b/source/plugins/go/src/utils.go index 9d60fb3d1..e95ae5b49 100644 --- a/source/plugins/go/src/utils.go +++ b/source/plugins/go/src/utils.go @@ -117,77 +117,58 @@ func ToString(s interface{}) string { } //mdsdSocketClient to write msgp messages -func CreateMDSDClient(containerType string) { - if MdsdMsgpUnixSocketClient != nil { - MdsdMsgpUnixSocketClient.Close() - MdsdMsgpUnixSocketClient = nil - } - /*conn, err := fluent.New(fluent.Config{FluentNetwork:"unix", - FluentSocketPath:"/var/run/mdsd/default_fluent.socket", - WriteTimeout: 5 * time.Second, - RequestAck: true}) */ +func CreateMDSDClient(dataType DataType, containerType string) { mdsdfluentSocket := "/var/run/mdsd/default_fluent.socket" if containerType != "" && strings.Compare(strings.ToLower(containerType), "prometheussidecar") == 0 { mdsdfluentSocket = fmt.Sprintf("/var/run/mdsd-%s/default_fluent.socket", containerType) } - conn, err := net.DialTimeout("unix", - mdsdfluentSocket, 10*time.Second) - if err != nil { - Log("Error::mdsd::Unable to open MDSD msgp socket connection %s", err.Error()) - //log.Fatalf("Unable to open MDSD msgp socket connection %s", err.Error()) - } else { - Log("Successfully created MDSD msgp socket connection: %s", mdsdfluentSocket) - MdsdMsgpUnixSocketClient = conn - } -} - -//mdsdSocketClient to write msgp messages for KubeMonAgent Events -func CreateMDSDClientKubeMon(containerType string) { - if MdsdKubeMonMsgpUnixSocketClient != nil { - MdsdKubeMonMsgpUnixSocketClient.Close() - MdsdKubeMonMsgpUnixSocketClient = nil - } - /*conn, err := fluent.New(fluent.Config{FluentNetwork:"unix", - FluentSocketPath:"/var/run/mdsd/default_fluent.socket", - WriteTimeout: 5 * time.Second, - RequestAck: true}) */ - mdsdfluentSocket := "/var/run/mdsd/default_fluent.socket" - if containerType != "" && strings.Compare(strings.ToLower(containerType), "prometheussidecar") == 0 { - mdsdfluentSocket = fmt.Sprintf("/var/run/mdsd-%s/default_fluent.socket",containerType) - } - conn, err := net.DialTimeout("unix", - mdsdfluentSocket, 10*time.Second) - if err != nil { - Log("Error::mdsd::Unable to open MDSD msgp socket connection for KubeMon events %s", err.Error()) - //log.Fatalf("Unable to open MDSD msgp socket connection %s", err.Error()) - } else { - Log("Successfully created MDSD msgp socket connection for KubeMon events:%s", mdsdfluentSocket) - MdsdKubeMonMsgpUnixSocketClient = conn - } -} - -//mdsdSocketClient to write msgp messages for KubeMonAgent Events -func CreateMDSDClientInsightsMetrics(containerType string) { - if MdsdInsightsMetricsMsgpUnixSocketClient != nil { - MdsdInsightsMetricsMsgpUnixSocketClient.Close() - MdsdInsightsMetricsMsgpUnixSocketClient = nil - } - /*conn, err := fluent.New(fluent.Config{FluentNetwork:"unix", - FluentSocketPath:"/var/run/mdsd/default_fluent.socket", - WriteTimeout: 5 * time.Second, - RequestAck: true}) */ - mdsdfluentSocket := "/var/run/mdsd/default_fluent.socket" - if containerType != "" && strings.Compare(strings.ToLower(containerType), "prometheussidecar") == 0 { - mdsdfluentSocket = fmt.Sprintf("/var/run/mdsd-%s/default_fluent.socket",containerType) - } - conn, err := net.DialTimeout("unix", - mdsdfluentSocket, 10*time.Second) - if err != nil { - Log("Error::mdsd::Unable to open MDSD msgp socket connectionfor insights metrics %s", err.Error()) - //log.Fatalf("Unable to open MDSD msgp socket connection %s", err.Error()) - } else { - Log("Successfully created MDSD msgp socket connection for Insights metrics %s", mdsdfluentSocket) - MdsdInsightsMetricsMsgpUnixSocketClient = conn + switch dataType { + case ContainerLogV2: + if MdsdMsgpUnixSocketClient != nil { + MdsdMsgpUnixSocketClient.Close() + MdsdMsgpUnixSocketClient = nil + } + /*conn, err := fluent.New(fluent.Config{FluentNetwork:"unix", + FluentSocketPath:"/var/run/mdsd/default_fluent.socket", + WriteTimeout: 5 * time.Second, + RequestAck: true}) */ + conn, err := net.DialTimeout("unix", + mdsdfluentSocket, 10*time.Second) + if err != nil { + Log("Error::mdsd::Unable to open MDSD msgp socket connection for ContainerLogV2 %s", err.Error()) + //log.Fatalf("Unable to open MDSD msgp socket connection %s", err.Error()) + } else { + Log("Successfully created MDSD msgp socket connection for ContainerLogV2: %s", mdsdfluentSocket, ) + MdsdMsgpUnixSocketClient = conn + } + case KubeMonAgentEvents: + if MdsdKubeMonMsgpUnixSocketClient != nil { + MdsdKubeMonMsgpUnixSocketClient.Close() + MdsdKubeMonMsgpUnixSocketClient = nil + } + conn, err := net.DialTimeout("unix", + mdsdfluentSocket, 10*time.Second) + if err != nil { + Log("Error::mdsd::Unable to open MDSD msgp socket connection for KubeMon events %s", err.Error()) + //log.Fatalf("Unable to open MDSD msgp socket connection %s", err.Error()) + } else { + Log("Successfully created MDSD msgp socket connection for KubeMon events:%s", mdsdfluentSocket) + MdsdKubeMonMsgpUnixSocketClient = conn + } + case InsightsMetrics: + if MdsdInsightsMetricsMsgpUnixSocketClient != nil { + MdsdInsightsMetricsMsgpUnixSocketClient.Close() + MdsdInsightsMetricsMsgpUnixSocketClient = nil + } + conn, err := net.DialTimeout("unix", + mdsdfluentSocket, 10*time.Second) + if err != nil { + Log("Error::mdsd::Unable to open MDSD msgp socket connection for insights metrics %s", err.Error()) + //log.Fatalf("Unable to open MDSD msgp socket connection %s", err.Error()) + } else { + Log("Successfully created MDSD msgp socket connection for Insights metrics %s", mdsdfluentSocket) + MdsdInsightsMetricsMsgpUnixSocketClient = conn + } } } From 7b9498994ff8f807733b0f9466cf368be738b753 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Wed, 19 May 2021 13:44:01 -0700 Subject: [PATCH 15/18] fix pr feedback --- source/plugins/go/src/telemetry.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/plugins/go/src/telemetry.go b/source/plugins/go/src/telemetry.go index 458adce00..4750b4624 100644 --- a/source/plugins/go/src/telemetry.go +++ b/source/plugins/go/src/telemetry.go @@ -199,7 +199,7 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) { if insightsMetricsMDSDClientCreateErrors > 0.0 { TelemetryClient.Track(appinsights.NewMetricTelemetry(metricNameErrorCountInsightsMetricsMDSDClientCreateError, insightsMetricsMDSDClientCreateErrors)) } - if containerLogsMDSDClientCreateErrors > 0.0 { + if kubeMonEventsMDSDClientCreateErrors > 0.0 { TelemetryClient.Track(appinsights.NewMetricTelemetry(metricNameErrorCountKubeMonEventsMDSDClientCreateError, kubeMonEventsMDSDClientCreateErrors)) } From b2fb9d8d95a1623455d457b1dc0ad989e99984a5 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Wed, 19 May 2021 15:45:51 -0700 Subject: [PATCH 16/18] clean up --- source/plugins/go/src/utils.go | 4 ++-- source/plugins/ruby/KubernetesApiClient.rb | 7 +------ source/plugins/ruby/in_kube_pvinventory.rb | 2 +- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/source/plugins/go/src/utils.go b/source/plugins/go/src/utils.go index e95ae5b49..3fe5c6d0e 100644 --- a/source/plugins/go/src/utils.go +++ b/source/plugins/go/src/utils.go @@ -138,7 +138,7 @@ func CreateMDSDClient(dataType DataType, containerType string) { Log("Error::mdsd::Unable to open MDSD msgp socket connection for ContainerLogV2 %s", err.Error()) //log.Fatalf("Unable to open MDSD msgp socket connection %s", err.Error()) } else { - Log("Successfully created MDSD msgp socket connection for ContainerLogV2: %s", mdsdfluentSocket, ) + Log("Successfully created MDSD msgp socket connection for ContainerLogV2: %s", mdsdfluentSocket) MdsdMsgpUnixSocketClient = conn } case KubeMonAgentEvents: @@ -249,4 +249,4 @@ func convertMsgPackEntriesToMsgpBytes(fluentForwardTag string, msgPackEntries [] } return msgpBytes -} \ No newline at end of file +} diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb index a791b5ddf..3720bf6dc 100644 --- a/source/plugins/ruby/KubernetesApiClient.rb +++ b/source/plugins/ruby/KubernetesApiClient.rb @@ -439,9 +439,6 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?) metricValue = getMetricNumericValue(metricNameToCollect, container["resources"][metricCategory][metricNameToCollect]) - # metricItem = {} - # metricItem["DataItems"] = [] - metricProps = {} metricProps["Timestamp"] = metricTime metricProps["Host"] = nodeName @@ -465,9 +462,7 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle if (metricCategory == "limits" && @@NodeMetrics.has_key?(nodeMetricsHashKey)) metricValue = @@NodeMetrics[nodeMetricsHashKey] #@Log.info("Limits not set for container #{clusterId + "/" + podUid + "/" + containerName} using node level limits: #{nodeMetricsHashKey}=#{metricValue} ") - # metricItem = {} - # metricItem["DataItems"] = [] - + metricProps = {} metricProps["Timestamp"] = metricTime metricProps["Host"] = nodeName diff --git a/source/plugins/ruby/in_kube_pvinventory.rb b/source/plugins/ruby/in_kube_pvinventory.rb index 6b1c6f758..40eebac8a 100644 --- a/source/plugins/ruby/in_kube_pvinventory.rb +++ b/source/plugins/ruby/in_kube_pvinventory.rb @@ -254,4 +254,4 @@ def run_periodic @mutex.unlock end end # Kube_PVInventory_Input -end # module \ No newline at end of file +end # module From a1c7658fa49b87a56ab08091a83dfa9fa322de7b Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Wed, 19 May 2021 15:58:18 -0700 Subject: [PATCH 17/18] clean up --- source/plugins/ruby/health/health_monitor_utils.rb | 2 +- source/plugins/ruby/out_mdm.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/plugins/ruby/health/health_monitor_utils.rb b/source/plugins/ruby/health/health_monitor_utils.rb index f6e23de8d..58f2ecc36 100644 --- a/source/plugins/ruby/health/health_monitor_utils.rb +++ b/source/plugins/ruby/health/health_monitor_utils.rb @@ -286,7 +286,7 @@ def build_metrics_hash(metrics_to_collect) def get_health_monitor_config health_monitor_config = {} begin - file = File.open('/opt/microsoft/omsagent/plugin/healthmonitorconfig.json', "r") + file = File.open('/etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json', "r") if !file.nil? fileContents = file.read health_monitor_config = JSON.parse(fileContents) diff --git a/source/plugins/ruby/out_mdm.rb b/source/plugins/ruby/out_mdm.rb index c03e63f5d..8e80fb753 100644 --- a/source/plugins/ruby/out_mdm.rb +++ b/source/plugins/ruby/out_mdm.rb @@ -203,7 +203,7 @@ def get_access_token end def write_status_file(success, message) - fn = "/var/opt/microsoft/omsagent/log/MDMIngestion.status" + fn = "/var/opt/microsoft/docker-cimprov/log/MDMIngestion.status" status = '{ "operation": "MDMIngestion", "success": "%s", "message": "%s" }' % [success, message] begin File.open(fn, "w") { |file| file.write(status) } From d1de1675075281856ea1fd24511af124cd9db906 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Thu, 20 May 2021 14:42:38 -0700 Subject: [PATCH 18/18] fix missing conf --- build/linux/installer/conf/container.conf | 2 ++ build/linux/installer/conf/kube.conf | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/build/linux/installer/conf/container.conf b/build/linux/installer/conf/container.conf index 05822eccb..093c9ef12 100644 --- a/build/linux/installer/conf/container.conf +++ b/build/linux/installer/conf/container.conf @@ -71,6 +71,7 @@ path /var/opt/microsoft/docker-cimprov/state/containerinventory*.buffer overflow_action drop_oldest_chunk chunk_limit_size 4m + queue_limit_length 20 flush_interval 20s retry_max_times 10 retry_wait 5s @@ -96,6 +97,7 @@ path /var/opt/microsoft/docker-cimprov/state/cadvisorperf*.buffer overflow_action drop_oldest_chunk chunk_limit_size 4m + queue_limit_length 20 flush_interval 20s retry_max_times 10 retry_wait 5s diff --git a/build/linux/installer/conf/kube.conf b/build/linux/installer/conf/kube.conf index 6d0f2d903..a1c8bf928 100644 --- a/build/linux/installer/conf/kube.conf +++ b/build/linux/installer/conf/kube.conf @@ -103,6 +103,7 @@ path /var/opt/microsoft/docker-cimprov/state/kubepod*.buffer overflow_action drop_oldest_chunk chunk_limit_size 4m + queue_limit_length 20 flush_interval 20s retry_max_times 10 retry_wait 5s @@ -285,6 +286,7 @@ path /var/opt/microsoft/docker-cimprov/state/containerinventory*.buffer overflow_action drop_oldest_chunk chunk_limit_size 4m + queue_limit_length 20 flush_interval 20s retry_max_times 10 retry_wait 5s @@ -310,6 +312,7 @@ path /var/opt/microsoft/docker-cimprov/state/perf*.buffer overflow_action drop_oldest_chunk chunk_limit_size 4m + queue_limit_length 20 flush_interval 20s retry_max_times 10 retry_wait 5s @@ -327,6 +330,7 @@ path /var/opt/microsoft/docker-cimprov/state/out_mdm_*.buffer overflow_action drop_oldest_chunk chunk_limit_size 4m + queue_limit_length 20 flush_interval 20s retry_max_times 10 retry_wait 5s @@ -344,6 +348,7 @@ path /var/opt/microsoft/docker-cimprov/state/out_mdm_cdvisorperf*.buffer overflow_action drop_oldest_chunk chunk_limit_size 4m + queue_limit_length 20 flush_interval 20s retry_max_times 10 retry_wait 5s @@ -369,6 +374,7 @@ path /var/opt/microsoft/docker-cimprov/state/kubehealth*.buffer overflow_action drop_oldest_chunk chunk_limit_size 4m + queue_limit_length 20 flush_interval 20s retry_max_times 10 retry_wait 5s