diff --git a/build/common/installer/scripts/tomlparser.rb b/build/common/installer/scripts/tomlparser.rb
index a0f3c2f0a..b173ecfe3 100644
--- a/build/common/installer/scripts/tomlparser.rb
+++ b/build/common/installer/scripts/tomlparser.rb
@@ -25,8 +25,10 @@
@enrichContainerLogs = false
@containerLogSchemaVersion = ""
@collectAllKubeEvents = false
-@containerLogsRoute = ""
-
+@containerLogsRoute = "v2" # default for linux
+if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0
+ @containerLogsRoute = "v1" # default is v1 for windows until windows agent integrates windows ama
+end
# Use parser to parse the configmap toml file to a ruby structure
def parseConfigMap
begin
@@ -162,8 +164,12 @@ def populateSettingValuesFromConfigMap(parsedConfig)
#Get container logs route setting
begin
if !parsedConfig[:log_collection_settings][:route_container_logs].nil? && !parsedConfig[:log_collection_settings][:route_container_logs][:version].nil?
- @containerLogsRoute = parsedConfig[:log_collection_settings][:route_container_logs][:version]
- puts "config::Using config map setting for container logs route"
+ if !parsedConfig[:log_collection_settings][:route_container_logs][:version].empty?
+ @containerLogsRoute = parsedConfig[:log_collection_settings][:route_container_logs][:version]
+ puts "config::Using config map setting for container logs route: #{@containerLogsRoute}"
+ else
+ puts "config::Ignoring config map settings and using default value since provided container logs route value is empty"
+ end
end
rescue => errorStr
ConfigParseErrorLogger.logError("Exception while reading config map settings for container logs route - #{errorStr}, using defaults, please check config map for errors")
@@ -256,7 +262,7 @@ def get_command_windows(env_variable_name, env_variable_value)
file.write(commands)
commands = get_command_windows('AZMON_CLUSTER_COLLECT_ALL_KUBE_EVENTS', @collectAllKubeEvents)
file.write(commands)
- commands = get_command_windows('AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE', @containerLogsRoute)
+ commands = get_command_windows('AZMON_CONTAINER_LOGS_ROUTE', @containerLogsRoute)
file.write(commands)
commands = get_command_windows('AZMON_CONTAINER_LOG_SCHEMA_VERSION', @containerLogSchemaVersion)
file.write(commands)
diff --git a/build/linux/installer/conf/container.conf b/build/linux/installer/conf/container.conf
index 958a85eb6..093c9ef12 100644
--- a/build/linux/installer/conf/container.conf
+++ b/build/linux/installer/conf/container.conf
@@ -1,141 +1,179 @@
-# Fluentd config file for OMS Docker - container components (non kubeAPI)
-
-# Forward port 25225 for container logs
-
- type forward
- port 25225
- bind 127.0.0.1
-
-
-# MDM metrics from telegraf
-
- @type tcp
- tag oms.mdm.container.perf.telegraf.*
- bind 0.0.0.0
- port 25228
- format json
-
-
-# Container inventory
-
- type containerinventory
- tag oms.containerinsights.containerinventory
- run_interval 60
- log_level debug
-
-
-#cadvisor perf
-
- type cadvisorperf
- tag oms.api.cadvisorperf
- run_interval 60
- log_level debug
-
-
-
- type filter_cadvisor_health_node
- log_level debug
-
-
-
- type filter_cadvisor_health_container
- log_level debug
-
-
-#custom_metrics_mdm filter plugin
-
- type filter_cadvisor2mdm
- metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes,pvUsedBytes
- log_level info
-
-
-
- type filter_telegraf2mdm
- log_level debug
-
-
-
- type out_oms
- log_level debug
- num_threads 5
- buffer_type file
- buffer_path %STATE_DIR_WS%/out_oms_containerinventory*.buffer
- buffer_queue_full_action drop_oldest_chunk
- buffer_chunk_limit 4m
- flush_interval 20s
- retry_limit 10
- retry_wait 5s
- max_retry_wait 5m
-
-
-
- type out_oms
- log_level debug
- num_threads 5
- buffer_type file
- buffer_path %STATE_DIR_WS%/out_oms_cadvisorperf*.buffer
- buffer_queue_full_action drop_oldest_chunk
- buffer_chunk_limit 4m
- flush_interval 20s
- retry_limit 10
- retry_wait 5s
- max_retry_wait 5m
-
-
-
-
- @type health_forward
- send_timeout 60s
- recover_wait 10s
- hard_timeout 60s
- heartbeat_type tcp
- skip_network_error_at_init true
- expire_dns_cache 600s
- buffer_queue_full_action drop_oldest_chunk
- buffer_type file
- buffer_path %STATE_DIR_WS%/out_health_forward*.buffer
- buffer_chunk_limit 3m
- flush_interval 20s
- retry_limit 10
- retry_wait 5s
- max_retry_wait 5m
-
-
- host "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_HOST']}"
- port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
-
-
-
+ # Fluentd config file for OMS Docker - container components (non kubeAPI)
+
+ # Forward port 25225 for container logs
+ # gangams - not used and get ridoff after confirming safe to remove
+
+ @type forward
+ port 25225
+ bind 127.0.0.1
+
+
+ # MDM metrics from telegraf
+
+ @type tcp
+ tag oms.mdm.container.perf.telegraf.*
+ bind 0.0.0.0
+ port 25228
+ format json
+
+
+ # Container inventory
+
+ @type containerinventory
+ tag oneagent.containerInsights.CONTAINER_INVENTORY_BLOB
+ run_interval 60
+ @log_level debug
+
+
+ #cadvisor perf
+
+ @type cadvisor_perf
+ tag oneagent.containerInsights.LINUX_PERF_BLOB
+ run_interval 60
+ @log_level debug
+
+
+
+ @type cadvisor_health_node
+ @log_level debug
+
+
+
+ @type cadvisor_health_container
+ @log_level debug
+
+
+ #custom_metrics_mdm filter plugin
+
+ @type cadvisor2mdm
+ metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes,pvUsedBytes
+ @log_level info
+
+
+
+ @type telegraf2mdm
+ @log_level debug
+
+
+ #containerinventory
+
+ @type forward
+ @log_level debug
+ send_timeout 30
+ connect_timeout 30
+ heartbeat_type none
+
+ host 0.0.0.0
+ port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}"
+
+
@type file
- path %STATE_DIR_WS%/fluent_forward_failed.buffer
-
-
-
-
- type out_mdm
- log_level debug
- num_threads 5
- buffer_type file
- buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer
- buffer_queue_full_action drop_oldest_chunk
- buffer_chunk_limit 4m
- flush_interval 20s
- retry_limit 10
- retry_wait 5s
- max_retry_wait 5m
- retry_mdm_post_wait_minutes 30
-
-
-
- type out_oms
- log_level debug
- num_threads 5
- buffer_type file
- buffer_path %STATE_DIR_WS%/out_oms_insightsmetrics*.buffer
- buffer_queue_full_action drop_oldest_chunk
- buffer_chunk_limit 4m
- flush_interval 20s
- retry_limit 10
- retry_wait 5s
- max_retry_wait 5m
-
+ path /var/opt/microsoft/docker-cimprov/state/containerinventory*.buffer
+ overflow_action drop_oldest_chunk
+ chunk_limit_size 4m
+ queue_limit_length 20
+ flush_interval 20s
+ retry_max_times 10
+ retry_wait 5s
+ retry_max_interval 5m
+ flush_thread_count 5
+
+ keepalive true
+
+
+ #cadvisorperf
+
+ @type forward
+ @log_level debug
+ send_timeout 30
+ connect_timeout 30
+ heartbeat_type none
+
+ host 0.0.0.0
+ port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}"
+
+
+ @type file
+ path /var/opt/microsoft/docker-cimprov/state/cadvisorperf*.buffer
+ overflow_action drop_oldest_chunk
+ chunk_limit_size 4m
+ queue_limit_length 20
+ flush_interval 20s
+ retry_max_times 10
+ retry_wait 5s
+ retry_max_interval 5m
+ flush_thread_count 5
+
+ keepalive true
+
+
+
+ @type health_forward
+ send_timeout 60s
+ recover_wait 10s
+ hard_timeout 60s
+ transport tcp
+ ignore_network_errors_at_startup true
+ expire_dns_cache 600s
+
+ @type file
+ overflow_action drop_oldest_chunk
+ path /var/opt/microsoft/docker-cimprov/state/out_health_forward*.buffer
+ chunk_limit_size 3m
+ flush_interval 20s
+ retry_max_times 10
+ retry_max_interval 5m
+ retry_wait 5s
+
+
+ host "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_HOST']}"
+ port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
+
+
+ @type file
+ path /var/opt/microsoft/docker-cimprov/state/fluent_forward_failed.buffer
+
+
+
+
+ @type mdm
+ @log_level debug
+
+ @type file
+ path /var/opt/microsoft/docker-cimprov/state/out_mdm_cdvisorperf*.buffer
+ overflow_action drop_oldest_chunk
+ chunk_limit_size 4m
+ flush_interval 20s
+ retry_max_times 10
+ retry_wait 5s
+ retry_max_interval 5m
+ flush_thread_count 5
+
+ retry_mdm_post_wait_minutes 30
+
+
+ #InsightsMetrics
+
+ @type forward
+ @log_level debug
+ send_timeout 30
+ connect_timeout 30
+ heartbeat_type none
+
+ host 0.0.0.0
+ port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}"
+
+
+ @type file
+ path /var/opt/microsoft/docker-cimprov/state/insightsmetrics*.buffer
+ overflow_action drop_oldest_chunk
+ chunk_limit_size 4m
+ queue_limit_length 20
+ flush_interval 20s
+ retry_max_times 10
+ retry_wait 5s
+ retry_max_interval 5m
+ flush_thread_count 5
+
+ keepalive true
+
diff --git a/build/linux/installer/conf/kube.conf b/build/linux/installer/conf/kube.conf
index fb566c360..a1c8bf928 100644
--- a/build/linux/installer/conf/kube.conf
+++ b/build/linux/installer/conf/kube.conf
@@ -1,7 +1,6 @@
-# Fluentd config file for OMS Docker - cluster components (kubeAPI)
#fluent forward plugin
- type forward
+ @type forward
port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
bind 0.0.0.0
chunk_size_limit 4m
@@ -9,262 +8,378 @@
#Kubernetes pod inventory
- type kubepodinventory
- tag oms.containerinsights.KubePodInventory
+ @type kube_podinventory
+ tag oneagent.containerInsights.KUBE_POD_INVENTORY_BLOB
run_interval 60
- log_level debug
+ @log_level debug
#Kubernetes Persistent Volume inventory
- type kubepvinventory
- tag oms.containerinsights.KubePVInventory
+ @type kube_pvinventory
+ tag oneagent.containerInsights.KUBE_PV_INVENTORY_BLOB
run_interval 60
- log_level debug
+ @log_level debug
#Kubernetes events
- type kubeevents
- tag oms.containerinsights.KubeEvents
+ @type kube_events
+ tag oneagent.containerInsights.KUBE_EVENTS_BLOB
run_interval 60
- log_level debug
-
+ @log_level debug
+
#Kubernetes Nodes
- type kubenodeinventory
- tag oms.containerinsights.KubeNodeInventory
+ @type kube_nodes
+ tag oneagent.containerInsights.KUBE_NODE_INVENTORY_BLOB
run_interval 60
- log_level debug
+ @log_level debug
#Kubernetes health
- type kubehealth
+ @type kube_health
tag kubehealth.ReplicaSet
run_interval 60
- log_level debug
+ @log_level debug
#cadvisor perf- Windows nodes
- type wincadvisorperf
- tag oms.api.wincadvisorperf
+ @type win_cadvisor_perf
+ tag oneagent.containerInsights.LINUX_PERF_BLOB
run_interval 60
- log_level debug
+ @log_level debug
#Kubernetes object state - deployments
-
- type kubestatedeployments
- tag oms.containerinsights.KubeStateDeployments
- run_interval 60
- log_level debug
-
+
+ @type kubestate_deployments
+ tag oneagent.containerInsights.INSIGHTS_METRICS_BLOB
+ run_interval 60
+ @log_level debug
+
- #Kubernetes object state - HPA
-
- type kubestatehpa
- tag oms.containerinsights.KubeStateHpa
- run_interval 60
- log_level debug
-
+ #Kubernetes object state - HPA
+
+ @type kubestate_hpa
+ tag oneagent.containerInsights.INSIGHTS_METRICS_BLOB
+ run_interval 60
+ @log_level debug
+
- type filter_inventory2mdm
- log_level info
+ @type inventory2mdm
+ @log_level info
#custom_metrics_mdm filter plugin for perf data from windows nodes
- type filter_cadvisor2mdm
+ @type cadvisor2mdm
metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,pvUsedBytes
- log_level info
+ @log_level info
#health model aggregation filter
- type filter_health_model_builder
+ @type health_model_builder
-
- type out_oms
- log_level debug
- num_threads 5
- buffer_chunk_limit 4m
- buffer_type file
- buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer
- buffer_queue_limit 20
- buffer_queue_full_action drop_oldest_chunk
- flush_interval 20s
- retry_limit 10
- retry_wait 5s
- max_retry_wait 5m
+ #kubepodinventory
+
+ @type forward
+ @log_level debug
+ send_timeout 30
+ connect_timeout 30
+ heartbeat_type none
+
+ host 0.0.0.0
+ port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}"
+
+
+ @type file
+ path /var/opt/microsoft/docker-cimprov/state/kubepod*.buffer
+ overflow_action drop_oldest_chunk
+ chunk_limit_size 4m
+ queue_limit_length 20
+ flush_interval 20s
+ retry_max_times 10
+ retry_wait 5s
+ retry_max_interval 5m
+ flush_thread_count 5
+
+ keepalive true
-
- type out_oms
- log_level debug
- num_threads 5
- buffer_chunk_limit 4m
- buffer_type file
- buffer_path %STATE_DIR_WS%/state/out_oms_kubepv*.buffer
- buffer_queue_limit 20
- buffer_queue_full_action drop_oldest_chunk
- flush_interval 20s
- retry_limit 10
- retry_wait 5s
- max_retry_wait 5m
-
+ #kubepvinventory
+
+ @type forward
+ @log_level debug
+ send_timeout 30
+ connect_timeout 30
+ heartbeat_type none
+
+ host 0.0.0.0
+ port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}"
+
+
+ @type file
+ path /var/opt/microsoft/docker-cimprov/state/kubepv*.buffer
+ overflow_action drop_oldest_chunk
+ chunk_limit_size 4m
+ queue_limit_length 20
+ flush_interval 20s
+ retry_max_times 10
+ retry_wait 5s
+ retry_max_interval 5m
+ flush_thread_count 5
+
+ keepalive true
+
-
- type out_oms
- log_level debug
- num_threads 5
- buffer_chunk_limit 4m
- buffer_type file
- buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer
- buffer_queue_limit 20
- buffer_queue_full_action drop_oldest_chunk
- flush_interval 20s
- retry_limit 10
- retry_wait 5s
- max_retry_wait 5m
+ #InsightsMetrics
+ #kubestate
+
+ @type forward
+ @log_level debug
+ send_timeout 30
+ connect_timeout 30
+ heartbeat_type none
+
+ host 0.0.0.0
+ port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}"
+
+
+ @type file
+ path /var/opt/microsoft/docker-cimprov/state/insightsmetrics*.buffer
+ overflow_action drop_oldest_chunk
+ chunk_limit_size 4m
+ queue_limit_length 20
+ flush_interval 20s
+ retry_max_times 10
+ retry_wait 5s
+ retry_max_interval 5m
+ flush_thread_count 5
+
+ keepalive true
-
- type out_oms
- log_level debug
- num_threads 2
- buffer_chunk_limit 4m
- buffer_type file
- buffer_path %STATE_DIR_WS%/out_oms_kubeservices*.buffer
- buffer_queue_limit 20
- buffer_queue_full_action drop_oldest_chunk
- flush_interval 20s
- retry_limit 10
- retry_wait 5s
- max_retry_wait 5m
+ #kubeevents
+
+ @type forward
+ @log_level debug
+ send_timeout 30
+ connect_timeout 30
+ heartbeat_type none
+
+ host 0.0.0.0
+ port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}"
+
+
+ @type file
+ path /var/opt/microsoft/docker-cimprov/state/kubeevents*.buffer
+ overflow_action drop_oldest_chunk
+ chunk_limit_size 4m
+ queue_limit_length 20
+ flush_interval 20s
+ retry_max_times 10
+ retry_wait 5s
+ retry_max_interval 5m
+ flush_thread_count 5
+
+ keepalive true
+
+ #kubeservices
+
+ @type forward
+ @log_level debug
+ send_timeout 30
+ connect_timeout 30
+ heartbeat_type none
+
+ host 0.0.0.0
+ port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}"
+
+
+ @type file
+ path /var/opt/microsoft/docker-cimprov/state/kubeservices*.buffer
+ overflow_action drop_oldest_chunk
+ chunk_limit_size 4m
+ queue_limit_length 20
+ flush_interval 20s
+ retry_max_times 10
+ retry_wait 5s
+ retry_max_interval 5m
+ flush_thread_count 2
+
+ keepalive true
+
-
- type out_oms
- log_level debug
- num_threads 5
- buffer_chunk_limit 4m
- buffer_type file
- buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer
- buffer_queue_limit 20
- buffer_queue_full_action drop_oldest_chunk
- flush_interval 20s
- retry_limit 10
- retry_wait 5s
- max_retry_wait 5m
+ #kubenodeinventory
+
+ @type forward
+ @log_level debug
+ send_timeout 30
+ connect_timeout 30
+ heartbeat_type none
+
+ host 0.0.0.0
+ port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}"
+
+
+ @type file
+ path /var/opt/microsoft/docker-cimprov/state/kubenode*.buffer
+ overflow_action drop_oldest_chunk
+ chunk_limit_size 4m
+ queue_limit_length 20
+ flush_interval 20s
+ retry_max_times 10
+ retry_wait 5s
+ retry_max_interval 5m
+ flush_thread_count 5
+
+ keepalive true
-
- type out_oms
- log_level debug
- num_threads 3
- buffer_chunk_limit 4m
- buffer_type file
- buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer
- buffer_queue_limit 20
- flush_interval 20s
- retry_limit 10
- retry_wait 5s
- max_retry_wait 5m
+ #containernodeinventory
+
+ @type forward
+ @log_level debug
+ send_timeout 30
+ connect_timeout 30
+ heartbeat_type none
+
+ host 0.0.0.0
+ port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}"
+
+
+ @type file
+ path /var/opt/microsoft/docker-cimprov/state/containernodeinventory*.buffer
+ overflow_action drop_oldest_chunk
+ chunk_limit_size 4m
+ queue_limit_length 20
+ flush_interval 20s
+ retry_max_times 10
+ retry_wait 5s
+ retry_max_interval 5m
+ flush_thread_count 3
+
+ keepalive true
-
- type out_oms
- log_level debug
- num_threads 5
- buffer_chunk_limit 4m
- buffer_type file
- buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer
- buffer_queue_limit 20
- buffer_queue_full_action drop_oldest_chunk
- flush_interval 20s
- retry_limit 10
- retry_wait 5s
- max_retry_wait 5m
+ #containerinventory for windows containers
+
+ @type forward
+ @log_level debug
+ send_timeout 30
+ connect_timeout 30
+ heartbeat_type none
+
+ host 0.0.0.0
+ port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}"
+
+
+ @type file
+ path /var/opt/microsoft/docker-cimprov/state/containerinventory*.buffer
+ overflow_action drop_oldest_chunk
+ chunk_limit_size 4m
+ queue_limit_length 20
+ flush_interval 20s
+ retry_max_times 10
+ retry_wait 5s
+ retry_max_interval 5m
+ flush_thread_count 5
+
+ keepalive true
+
+
+ #perf
+
+ @type forward
+ @log_level debug
+ send_timeout 30
+ connect_timeout 30
+ heartbeat_type none
+
+ host 0.0.0.0
+ port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}"
+
+
+ @type file
+ path /var/opt/microsoft/docker-cimprov/state/perf*.buffer
+ overflow_action drop_oldest_chunk
+ chunk_limit_size 4m
+ queue_limit_length 20
+ flush_interval 20s
+ retry_max_times 10
+ retry_wait 5s
+ retry_max_interval 5m
+ flush_thread_count 5
+
+ keepalive true
- type out_mdm
- log_level debug
- num_threads 5
- buffer_chunk_limit 4m
- buffer_type file
- buffer_path %STATE_DIR_WS%/out_mdm_*.buffer
- buffer_queue_limit 20
- buffer_queue_full_action drop_oldest_chunk
- flush_interval 20s
- retry_limit 10
- retry_wait 5s
- max_retry_wait 5m
+ @type mdm
+ @log_level debug
+
+ @type file
+ path /var/opt/microsoft/docker-cimprov/state/out_mdm_*.buffer
+ overflow_action drop_oldest_chunk
+ chunk_limit_size 4m
+ queue_limit_length 20
+ flush_interval 20s
+ retry_max_times 10
+ retry_wait 5s
+ retry_max_interval 5m
+ flush_thread_count 5
+
retry_mdm_post_wait_minutes 30
-
- type out_oms
- log_level debug
- num_threads 5
- buffer_chunk_limit 4m
- buffer_type file
- buffer_path %STATE_DIR_WS%/out_oms_api_wincadvisorperf*.buffer
- buffer_queue_limit 20
- buffer_queue_full_action drop_oldest_chunk
- flush_interval 20s
- retry_limit 10
- retry_wait 5s
- max_retry_wait 5m
-
-
- type out_mdm
- log_level debug
- num_threads 5
- buffer_chunk_limit 4m
- buffer_type file
- buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer
- buffer_queue_limit 20
- buffer_queue_full_action drop_oldest_chunk
- flush_interval 20s
- retry_limit 10
- retry_wait 5s
- max_retry_wait 5m
+ @type mdm
+ @log_level debug
+
+ @type file
+ path /var/opt/microsoft/docker-cimprov/state/out_mdm_cdvisorperf*.buffer
+ overflow_action drop_oldest_chunk
+ chunk_limit_size 4m
+ queue_limit_length 20
+ flush_interval 20s
+ retry_max_times 10
+ retry_wait 5s
+ retry_max_interval 5m
+ flush_thread_count 5
+
retry_mdm_post_wait_minutes 30
-
-
- type out_oms
- log_level debug
- num_threads 5
- buffer_chunk_limit 4m
- buffer_type file
- buffer_path %STATE_DIR_WS%/out_oms_kubehealth*.buffer
- buffer_queue_limit 20
- buffer_queue_full_action drop_oldest_chunk
- flush_interval 20s
- retry_limit 10
- retry_wait 5s
- max_retry_wait 5m
+
+ #kubehealth
+
+ @type forward
+ @log_level debug
+ send_timeout 30
+ connect_timeout 30
+ heartbeat_type none
+
+ host 0.0.0.0
+ port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}"
+
+
+ @type file
+ path /var/opt/microsoft/docker-cimprov/state/kubehealth*.buffer
+ overflow_action drop_oldest_chunk
+ chunk_limit_size 4m
+ queue_limit_length 20
+ flush_interval 20s
+ retry_max_times 10
+ retry_wait 5s
+ retry_max_interval 5m
+ flush_thread_count 5
+
+ keepalive true
-
-
- type out_oms
- log_level debug
- num_threads 5
- buffer_chunk_limit 4m
- buffer_type file
- buffer_path %STATE_DIR_WS%/out_oms_insightsmetrics*.buffer
- buffer_queue_limit 20
- buffer_queue_full_action drop_oldest_chunk
- flush_interval 20s
- retry_limit 10
- retry_wait 5s
- max_retry_wait 5m
-
\ No newline at end of file
diff --git a/build/linux/installer/conf/out_oms.conf b/build/linux/installer/conf/out_oms.conf
index 74ba3195e..21dc4c1ed 100644
--- a/build/linux/installer/conf/out_oms.conf
+++ b/build/linux/installer/conf/out_oms.conf
@@ -1,10 +1,9 @@
-omsadmin_conf_path=/etc/opt/microsoft/omsagent/conf/omsadmin.conf
omsproxy_secret_path=/etc/omsagent-secret/PROXY
adx_cluster_uri_path=/etc/config/settings/adx/ADXCLUSTERURI
adx_client_id_path=/etc/config/settings/adx/ADXCLIENTID
adx_tenant_id_path=/etc/config/settings/adx/ADXTENANTID
adx_client_secret_path=/etc/config/settings/adx/ADXCLIENTSECRET
-cert_file_path=/etc/opt/microsoft/omsagent/certs/oms.crt
-key_file_path=/etc/opt/microsoft/omsagent/certs/oms.key
+cert_file_path=/etc/mdsd.d/oms/%s/oms.crt
+key_file_path=/etc/mdsd.d/oms/%s/oms.key
container_host_file_path=/var/opt/microsoft/docker-cimprov/state/containerhostname
container_inventory_refresh_interval=60
diff --git a/build/linux/installer/datafiles/base_container.data b/build/linux/installer/datafiles/base_container.data
index df8fbc3da..b9f889dba 100644
--- a/build/linux/installer/datafiles/base_container.data
+++ b/build/linux/installer/datafiles/base_container.data
@@ -18,89 +18,8 @@ MAINTAINER: 'Microsoft Corporation'
/etc/opt/microsoft/docker-cimprov/conf/installinfo.txt; build/linux/installer/conf/installinfo.txt; 644; root; root; conffile
-/opt/microsoft/omsagent/plugin/filter_docker_log.rb; source/plugins/ruby/filter_docker_log.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/filter_container.rb; source/plugins/ruby/filter_container.rb; 644; root; root
-
-/opt/microsoft/omsagent/plugin/in_kube_podinventory.rb; source/plugins/ruby/in_kube_podinventory.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/in_kube_pvinventory.rb; source/plugins/ruby/in_kube_pvinventory.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/in_kube_events.rb; source/plugins/ruby/in_kube_events.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/KubernetesApiClient.rb; source/plugins/ruby/KubernetesApiClient.rb; 644; root; root
-
/etc/opt/microsoft/docker-cimprov/container.conf; build/linux/installer/conf/container.conf; 644; root; root
-/opt/microsoft/omsagent/plugin/CAdvisorMetricsAPIClient.rb; source/plugins/ruby/CAdvisorMetricsAPIClient.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/in_cadvisor_perf.rb; source/plugins/ruby/in_cadvisor_perf.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/in_win_cadvisor_perf.rb; source/plugins/ruby/in_win_cadvisor_perf.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/in_kube_nodes.rb; source/plugins/ruby/in_kube_nodes.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/in_kubestate_deployments.rb; source/plugins/ruby/in_kubestate_deployments.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/in_kubestate_hpa.rb; source/plugins/ruby/in_kubestate_hpa.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/filter_inventory2mdm.rb; source/plugins/ruby/filter_inventory2mdm.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/podinventory_to_mdm.rb; source/plugins/ruby/podinventory_to_mdm.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/kubelet_utils.rb; source/plugins/ruby/kubelet_utils.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/CustomMetricsUtils.rb; source/plugins/ruby/CustomMetricsUtils.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/constants.rb; source/plugins/ruby/constants.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/MdmAlertTemplates.rb; source/plugins/ruby/MdmAlertTemplates.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/MdmMetricsGenerator.rb; source/plugins/ruby/MdmMetricsGenerator.rb; 644; root; root
-
-
-/opt/microsoft/omsagent/plugin/ApplicationInsightsUtility.rb; source/plugins/ruby/ApplicationInsightsUtility.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/ContainerInventoryState.rb; source/plugins/ruby/ContainerInventoryState.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/DockerApiClient.rb; source/plugins/ruby/DockerApiClient.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/DockerApiRestHelper.rb; source/plugins/ruby/DockerApiRestHelper.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/in_containerinventory.rb; source/plugins/ruby/in_containerinventory.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/kubernetes_container_inventory.rb; source/plugins/ruby/kubernetes_container_inventory.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/proxy_utils.rb; source/plugins/ruby/proxy_utils.rb; 644; root; root
-
-/opt/microsoft/omsagent/plugin/arc_k8s_cluster_identity.rb; source/plugins/ruby/arc_k8s_cluster_identity.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/out_mdm.rb; source/plugins/ruby/out_mdm.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/filter_cadvisor2mdm.rb; source/plugins/ruby/filter_cadvisor2mdm.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/filter_telegraf2mdm.rb; source/plugins/ruby/filter_telegraf2mdm.rb; 644; root; root
-
-/opt/microsoft/omsagent/plugin/lib/application_insights/version.rb; source/plugins/ruby/lib/application_insights/version.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/rack/track_request.rb; source/plugins/ruby/lib/application_insights/rack/track_request.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/unhandled_exception.rb; source/plugins/ruby/lib/application_insights/unhandled_exception.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/telemetry_client.rb; source/plugins/ruby/lib/application_insights/telemetry_client.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/queue_base.rb; source/plugins/ruby/lib/application_insights/channel/queue_base.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/asynchronous_queue.rb; source/plugins/ruby/lib/application_insights/channel/asynchronous_queue.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/synchronous_sender.rb; source/plugins/ruby/lib/application_insights/channel/synchronous_sender.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/data_point_type.rb; source/plugins/ruby/lib/application_insights/channel/contracts/data_point_type.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/data_point.rb; source/plugins/ruby/lib/application_insights/channel/contracts/data_point.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/stack_frame.rb; source/plugins/ruby/lib/application_insights/channel/contracts/stack_frame.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/request_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/request_data.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/session.rb; source/plugins/ruby/lib/application_insights/channel/contracts/session.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/page_view_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/page_view_data.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/remote_dependency_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/remote_dependency_data.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/exception_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/exception_data.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/location.rb; source/plugins/ruby/lib/application_insights/channel/contracts/location.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/operation.rb; source/plugins/ruby/lib/application_insights/channel/contracts/operation.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/data.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/event_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/event_data.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/metric_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/metric_data.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/device.rb; source/plugins/ruby/lib/application_insights/channel/contracts/device.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/message_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/message_data.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/dependency_source_type.rb; source/plugins/ruby/lib/application_insights/channel/contracts/dependency_source_type.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/user.rb; source/plugins/ruby/lib/application_insights/channel/contracts/user.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/severity_level.rb; source/plugins/ruby/lib/application_insights/channel/contracts/severity_level.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/application.rb; source/plugins/ruby/lib/application_insights/channel/contracts/application.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/dependency_kind.rb; source/plugins/ruby/lib/application_insights/channel/contracts/dependency_kind.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/cloud.rb; source/plugins/ruby/lib/application_insights/channel/contracts/cloud.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/envelope.rb; source/plugins/ruby/lib/application_insights/channel/contracts/envelope.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/json_serializable.rb; source/plugins/ruby/lib/application_insights/channel/contracts/json_serializable.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/domain.rb; source/plugins/ruby/lib/application_insights/channel/contracts/domain.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/base.rb; source/plugins/ruby/lib/application_insights/channel/contracts/base.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/reopenings.rb; source/plugins/ruby/lib/application_insights/channel/contracts/reopenings.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/page_view_perf_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/page_view_perf_data.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/internal.rb; source/plugins/ruby/lib/application_insights/channel/contracts/internal.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/availability_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/availability_data.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/exception_details.rb; source/plugins/ruby/lib/application_insights/channel/contracts/exception_details.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/synchronous_queue.rb; source/plugins/ruby/lib/application_insights/channel/synchronous_queue.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/sender_base.rb; source/plugins/ruby/lib/application_insights/channel/sender_base.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/telemetry_context.rb; source/plugins/ruby/lib/application_insights/channel/telemetry_context.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/asynchronous_sender.rb; source/plugins/ruby/lib/application_insights/channel/asynchronous_sender.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/telemetry_channel.rb; source/plugins/ruby/lib/application_insights/channel/telemetry_channel.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/event.rb; source/plugins/ruby/lib/application_insights/channel/event.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/lib/application_insights.rb; source/plugins/ruby/lib/application_insights.rb; 644; root; root
-
/opt/tomlrb.rb; source/toml-parser/tomlrb.rb; 644; root; root
/opt/tomlrb/generated_parser.rb; source/toml-parser/tomlrb/generated_parser.rb; 644; root; root
/opt/tomlrb/handler.rb; source/toml-parser/tomlrb/handler.rb; 644; root; root
@@ -126,6 +45,7 @@ MAINTAINER: 'Microsoft Corporation'
/opt/tomlparser-mdm-metrics-config.rb; build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb; 755; root; root
/opt/tomlparser-metric-collection-config.rb; build/linux/installer/scripts/tomlparser-metric-collection-config.rb; 755; root; root
+
/opt/tomlparser-agent-config.rb; build/linux/installer/scripts/tomlparser-agent-config.rb; 755; root; root
/opt/tomlparser.rb; build/common/installer/scripts/tomlparser.rb; 755; root; root
/opt/td-agent-bit-conf-customizer.rb; build/common/installer/scripts/td-agent-bit-conf-customizer.rb; 755; root; root
@@ -134,43 +54,127 @@ MAINTAINER: 'Microsoft Corporation'
/opt/tomlparser-osm-config.rb; build/linux/installer/scripts/tomlparser-osm-config.rb; 755; root; root
-/opt/microsoft/omsagent/plugin/filter_cadvisor_health_container.rb; source/plugins/ruby/filter_cadvisor_health_container.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/filter_cadvisor_health_node.rb; source/plugins/ruby/filter_cadvisor_health_node.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/filter_health_model_builder.rb; source/plugins/ruby/filter_health_model_builder.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/in_kube_health.rb; source/plugins/ruby/in_kube_health.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/out_health_forward.rb; source/plugins/ruby/out_health_forward.rb; 644; root; root
/etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json; build/linux/installer/conf/healthmonitorconfig.json; 644; root; root
/etc/opt/microsoft/docker-cimprov/health/health_model_definition.json; build/linux/installer/conf/health_model_definition.json; 644; root; root
-/opt/microsoft/omsagent/plugin/health/aggregate_monitor.rb; source/plugins/ruby/health/aggregate_monitor.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/agg_monitor_id_labels.rb; source/plugins/ruby/health/agg_monitor_id_labels.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/aggregate_monitor_state_finalizer.rb; source/plugins/ruby/health/aggregate_monitor_state_finalizer.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/cluster_health_state.rb; source/plugins/ruby/health/cluster_health_state.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_container_cpu_memory_aggregator.rb; source/plugins/ruby/health/health_container_cpu_memory_aggregator.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_container_cpu_memory_record_formatter.rb; source/plugins/ruby/health/health_container_cpu_memory_record_formatter.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_hierarchy_builder.rb; source/plugins/ruby/health/health_hierarchy_builder.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_kubernetes_resources.rb; source/plugins/ruby/health/health_kubernetes_resources.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_kube_api_down_handler.rb; source/plugins/ruby/health/health_kube_api_down_handler.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_missing_signal_generator.rb; source/plugins/ruby/health/health_missing_signal_generator.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_model_buffer.rb; source/plugins/ruby/health/health_model_buffer.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_model_builder.rb; source/plugins/ruby/health/health_model_builder.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_model_constants.rb; source/plugins/ruby/health/health_model_constants.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/parent_monitor_provider.rb; source/plugins/ruby/health/parent_monitor_provider.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_model_definition_parser.rb; source/plugins/ruby/health/health_model_definition_parser.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_monitor_helpers.rb; source/plugins/ruby/health/health_monitor_helpers.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_monitor_optimizer.rb; source/plugins/ruby/health/health_monitor_optimizer.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_monitor_provider.rb; source/plugins/ruby/health/health_monitor_provider.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_monitor_record.rb; source/plugins/ruby/health/health_monitor_record.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_monitor_state.rb; source/plugins/ruby/health/health_monitor_state.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_monitor_telemetry.rb; source/plugins/ruby/health/health_monitor_telemetry.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_monitor_utils.rb; source/plugins/ruby/health/health_monitor_utils.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_signal_reducer.rb; source/plugins/ruby/health/health_signal_reducer.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/monitor_factory.rb; source/plugins/ruby/health/monitor_factory.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/monitor_set.rb; source/plugins/ruby/health/monitor_set.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/unit_monitor.rb; source/plugins/ruby/health/unit_monitor.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/version.rb; source/plugins/ruby/lib/application_insights/version.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/rack/track_request.rb; source/plugins/ruby/lib/application_insights/rack/track_request.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/unhandled_exception.rb; source/plugins/ruby/lib/application_insights/unhandled_exception.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/telemetry_client.rb; source/plugins/ruby/lib/application_insights/telemetry_client.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/queue_base.rb; source/plugins/ruby/lib/application_insights/channel/queue_base.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/asynchronous_queue.rb; source/plugins/ruby/lib/application_insights/channel/asynchronous_queue.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/synchronous_sender.rb; source/plugins/ruby/lib/application_insights/channel/synchronous_sender.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/data_point_type.rb; source/plugins/ruby/lib/application_insights/channel/contracts/data_point_type.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/data_point.rb; source/plugins/ruby/lib/application_insights/channel/contracts/data_point.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/stack_frame.rb; source/plugins/ruby/lib/application_insights/channel/contracts/stack_frame.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/request_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/request_data.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/session.rb; source/plugins/ruby/lib/application_insights/channel/contracts/session.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/page_view_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/page_view_data.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/remote_dependency_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/remote_dependency_data.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/exception_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/exception_data.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/location.rb; source/plugins/ruby/lib/application_insights/channel/contracts/location.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/operation.rb; source/plugins/ruby/lib/application_insights/channel/contracts/operation.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/data.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/event_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/event_data.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/metric_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/metric_data.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/device.rb; source/plugins/ruby/lib/application_insights/channel/contracts/device.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/message_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/message_data.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/dependency_source_type.rb; source/plugins/ruby/lib/application_insights/channel/contracts/dependency_source_type.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/user.rb; source/plugins/ruby/lib/application_insights/channel/contracts/user.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/severity_level.rb; source/plugins/ruby/lib/application_insights/channel/contracts/severity_level.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/application.rb; source/plugins/ruby/lib/application_insights/channel/contracts/application.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/dependency_kind.rb; source/plugins/ruby/lib/application_insights/channel/contracts/dependency_kind.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/cloud.rb; source/plugins/ruby/lib/application_insights/channel/contracts/cloud.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/envelope.rb; source/plugins/ruby/lib/application_insights/channel/contracts/envelope.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/json_serializable.rb; source/plugins/ruby/lib/application_insights/channel/contracts/json_serializable.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/domain.rb; source/plugins/ruby/lib/application_insights/channel/contracts/domain.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/base.rb; source/plugins/ruby/lib/application_insights/channel/contracts/base.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/reopenings.rb; source/plugins/ruby/lib/application_insights/channel/contracts/reopenings.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/page_view_perf_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/page_view_perf_data.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/internal.rb; source/plugins/ruby/lib/application_insights/channel/contracts/internal.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/availability_data.rb; source/plugins/ruby/lib/application_insights/channel/contracts/availability_data.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/contracts/exception_details.rb; source/plugins/ruby/lib/application_insights/channel/contracts/exception_details.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/synchronous_queue.rb; source/plugins/ruby/lib/application_insights/channel/synchronous_queue.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/sender_base.rb; source/plugins/ruby/lib/application_insights/channel/sender_base.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/telemetry_context.rb; source/plugins/ruby/lib/application_insights/channel/telemetry_context.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/asynchronous_sender.rb; source/plugins/ruby/lib/application_insights/channel/asynchronous_sender.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/telemetry_channel.rb; source/plugins/ruby/lib/application_insights/channel/telemetry_channel.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights/channel/event.rb; source/plugins/ruby/lib/application_insights/channel/event.rb; 644; root; root
+/etc/fluent/plugin/lib/application_insights.rb; source/plugins/ruby/lib/application_insights.rb; 644; root; root
+
+/etc/fluent/plugin/health/aggregate_monitor.rb; source/plugins/ruby/health/aggregate_monitor.rb; 644; root; root
+/etc/fluent/plugin/health/agg_monitor_id_labels.rb; source/plugins/ruby/health/agg_monitor_id_labels.rb; 644; root; root
+/etc/fluent/plugin/health/aggregate_monitor_state_finalizer.rb; source/plugins/ruby/health/aggregate_monitor_state_finalizer.rb; 644; root; root
+/etc/fluent/plugin/health/cluster_health_state.rb; source/plugins/ruby/health/cluster_health_state.rb; 644; root; root
+/etc/fluent/plugin/health/health_container_cpu_memory_aggregator.rb; source/plugins/ruby/health/health_container_cpu_memory_aggregator.rb; 644; root; root
+/etc/fluent/plugin/health/health_container_cpu_memory_record_formatter.rb; source/plugins/ruby/health/health_container_cpu_memory_record_formatter.rb; 644; root; root
+/etc/fluent/plugin/health/health_hierarchy_builder.rb; source/plugins/ruby/health/health_hierarchy_builder.rb; 644; root; root
+/etc/fluent/plugin/health/health_kubernetes_resources.rb; source/plugins/ruby/health/health_kubernetes_resources.rb; 644; root; root
+/etc/fluent/plugin/health/health_kube_api_down_handler.rb; source/plugins/ruby/health/health_kube_api_down_handler.rb; 644; root; root
+/etc/fluent/plugin/health/health_missing_signal_generator.rb; source/plugins/ruby/health/health_missing_signal_generator.rb; 644; root; root
+/etc/fluent/plugin/health/health_model_buffer.rb; source/plugins/ruby/health/health_model_buffer.rb; 644; root; root
+/etc/fluent/plugin/health/health_model_builder.rb; source/plugins/ruby/health/health_model_builder.rb; 644; root; root
+/etc/fluent/plugin/health/health_model_constants.rb; source/plugins/ruby/health/health_model_constants.rb; 644; root; root
+/etc/fluent/plugin/health/parent_monitor_provider.rb; source/plugins/ruby/health/parent_monitor_provider.rb; 644; root; root
+/etc/fluent/plugin/health/health_model_definition_parser.rb; source/plugins/ruby/health/health_model_definition_parser.rb; 644; root; root
+/etc/fluent/plugin/health/health_monitor_helpers.rb; source/plugins/ruby/health/health_monitor_helpers.rb; 644; root; root
+/etc/fluent/plugin/health/health_monitor_optimizer.rb; source/plugins/ruby/health/health_monitor_optimizer.rb; 644; root; root
+/etc/fluent/plugin/health/health_monitor_provider.rb; source/plugins/ruby/health/health_monitor_provider.rb; 644; root; root
+/etc/fluent/plugin/health/health_monitor_record.rb; source/plugins/ruby/health/health_monitor_record.rb; 644; root; root
+/etc/fluent/plugin/health/health_monitor_state.rb; source/plugins/ruby/health/health_monitor_state.rb; 644; root; root
+/etc/fluent/plugin/health/health_monitor_telemetry.rb; source/plugins/ruby/health/health_monitor_telemetry.rb; 644; root; root
+/etc/fluent/plugin/health/health_monitor_utils.rb; source/plugins/ruby/health/health_monitor_utils.rb; 644; root; root
+/etc/fluent/plugin/health/health_signal_reducer.rb; source/plugins/ruby/health/health_signal_reducer.rb; 644; root; root
+/etc/fluent/plugin/health/monitor_factory.rb; source/plugins/ruby/health/monitor_factory.rb; 644; root; root
+/etc/fluent/plugin/health/monitor_set.rb; source/plugins/ruby/health/monitor_set.rb; 644; root; root
+/etc/fluent/plugin/health/unit_monitor.rb; source/plugins/ruby/health/unit_monitor.rb; 644; root; root
+
+/etc/fluent/plugin/ApplicationInsightsUtility.rb; source/plugins/ruby/ApplicationInsightsUtility.rb; 644; root; root
+/etc/fluent/plugin/arc_k8s_cluster_identity.rb; source/plugins/ruby/arc_k8s_cluster_identity.rb; 644; root; root
+/etc/fluent/plugin/CAdvisorMetricsAPIClient.rb; source/plugins/ruby/CAdvisorMetricsAPIClient.rb; 644; root; root
+/etc/fluent/plugin/constants.rb; source/plugins/ruby/constants.rb; 644; root; root
+/etc/fluent/plugin/ContainerInventoryState.rb; source/plugins/ruby/ContainerInventoryState.rb; 644; root; root
+/etc/fluent/plugin/CustomMetricsUtils.rb; source/plugins/ruby/CustomMetricsUtils.rb; 644; root; root
+/etc/fluent/plugin/DockerApiClient.rb; source/plugins/ruby/DockerApiClient.rb; 644; root; root
+/etc/fluent/plugin/DockerApiRestHelper.rb; source/plugins/ruby/DockerApiRestHelper.rb; 644; root; root
+/etc/fluent/plugin/kubelet_utils.rb; source/plugins/ruby/kubelet_utils.rb; 644; root; root
+/etc/fluent/plugin/proxy_utils.rb; source/plugins/ruby/proxy_utils.rb; 644; root; root
+/etc/fluent/plugin/kubernetes_container_inventory.rb; source/plugins/ruby/kubernetes_container_inventory.rb; 644; root; root
+/etc/fluent/plugin/podinventory_to_mdm.rb; source/plugins/ruby/podinventory_to_mdm.rb; 644; root; root
+/etc/fluent/plugin/MdmMetricsGenerator.rb; source/plugins/ruby/MdmMetricsGenerator.rb; 644; root; root
+/etc/fluent/plugin/MdmAlertTemplates.rb; source/plugins/ruby/MdmAlertTemplates.rb; 644; root; root
+
+/etc/fluent/plugin/omslog.rb; source/plugins/utils/omslog.rb; 644; root; root
+/etc/fluent/plugin/oms_common.rb; source/plugins/utils/oms_common.rb; 644; root; root
+
+/etc/fluent/kube.conf; build/linux/installer/conf/kube.conf; 644; root; root
+/etc/fluent/container.conf; build/linux/installer/conf/container.conf; 644; root; root
+
+/etc/fluent/plugin/in_cadvisor_perf.rb; source/plugins/ruby/in_cadvisor_perf.rb; 644; root; root
+/etc/fluent/plugin/in_win_cadvisor_perf.rb; source/plugins/ruby/in_win_cadvisor_perf.rb; 644; root; root
+/etc/fluent/plugin/in_containerinventory.rb; source/plugins/ruby/in_containerinventory.rb; 644; root; root
+/etc/fluent/plugin/in_kube_nodes.rb; source/plugins/ruby/in_kube_nodes.rb; 644; root; root
+/etc/fluent/plugin/in_kube_podinventory.rb; source/plugins/ruby/in_kube_podinventory.rb; 644; root; root
+/etc/fluent/plugin/KubernetesApiClient.rb; source/plugins/ruby/KubernetesApiClient.rb; 644; root; root
+/etc/fluent/plugin/in_kube_events.rb; source/plugins/ruby/in_kube_events.rb; 644; root; root
+/etc/fluent/plugin/in_kube_health.rb; source/plugins/ruby/in_kube_health.rb; 644; root; root
+/etc/fluent/plugin/in_kube_pvinventory.rb; source/plugins/ruby/in_kube_pvinventory.rb; 644; root; root
+/etc/fluent/plugin/in_kubestate_deployments.rb; source/plugins/ruby/in_kubestate_deployments.rb; 644; root; root
+/etc/fluent/plugin/in_kubestate_hpa.rb; source/plugins/ruby/in_kubestate_hpa.rb; 644; root; root
+
+/etc/fluent/plugin/filter_cadvisor_health_container.rb; source/plugins/ruby/filter_cadvisor_health_container.rb; 644; root; root
+/etc/fluent/plugin/filter_cadvisor_health_node.rb; source/plugins/ruby/filter_cadvisor_health_node.rb; 644; root; root
+/etc/fluent/plugin/filter_cadvisor2mdm.rb; source/plugins/ruby/filter_cadvisor2mdm.rb; 644; root; root
+/etc/fluent/plugin/filter_health_model_builder.rb; source/plugins/ruby/filter_health_model_builder.rb; 644; root; root
+/etc/fluent/plugin/filter_inventory2mdm.rb; source/plugins/ruby/filter_inventory2mdm.rb; 644; root; root
+/etc/fluent/plugin/filter_telegraf2mdm.rb; source/plugins/ruby/filter_telegraf2mdm.rb; 644; root; root
+
+/etc/fluent/plugin/out_health_forward.rb; source/plugins/ruby/out_health_forward.rb; 644; root; root
+/etc/fluent/plugin/out_mdm.rb; source/plugins/ruby/out_mdm.rb; 644; root; root
+
+
%Links
-/opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root
%Directories
/etc; 755; root; root; sysdir
@@ -179,27 +183,18 @@ MAINTAINER: 'Microsoft Corporation'
/var; 755; root; root; sysdir
/var/opt; 755; root; root; sysdir
+/opt/fluent; 755; root; root; sysdir
+
/etc/opt/microsoft; 755; root; root; sysdir
/etc/opt/microsoft/docker-cimprov; 755; root; root
/etc/opt/microsoft/docker-cimprov/conf; 755; root; root
/etc/opt/microsoft/docker-cimprov/health; 755; root; root
-/etc/opt/omi; 755; root; root; sysdir
-/etc/opt/omi/conf; 755; root; root; sysdir
-/etc/opt/omi/conf/omiregister; 755; root; root; sysdir
-/etc/opt/omi/conf/omiregister/root-cimv2; 755; root; root
-
/opt/microsoft; 755; root; root; sysdir
/opt/microsoft/docker-cimprov; 755; root; root
/opt/microsoft/docker-cimprov/bin; 755; root; root
/opt/microsoft/docker-cimprov/lib; 755; root; root
-/opt/microsoft/omsagent; 755; root; root; sysdir
-/opt/microsoft/omsagent/plugin; 755; root; root; sysdir
-/opt/microsoft/omsagent/plugin/health; 755; root; root; sysdir
-
-/opt/omi; 755; root; root; sysdir
-/opt/omi/lib; 755; root; root; sysdir
/var/opt/microsoft; 755; root; root; sysdir
/var/opt/microsoft/docker-cimprov; 755; root; root
@@ -213,11 +208,14 @@ MAINTAINER: 'Microsoft Corporation'
/opt/td-agent-bit/bin; 755; root; root;sysdir
/etc/telegraf; 755; root; root;sysdir
-/opt/microsoft/omsagent/plugin/lib; 755; root; root; sysdir
-/opt/microsoft/omsagent/plugin/lib/application_insights; 755; root; root; sysdir
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel; 755; root; root; sysdir
-/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts; 755; root; root; sysdir
-/opt/microsoft/omsagent/plugin/lib/application_insights/rack; 755; root; root; sysdir
+/etc/fluent; 755; root; root; sysdir
+/etc/fluent/plugin; 755; root; root; sysdir
+/etc/fluent/plugin/health; 755; root; root; sysdir
+/etc/fluent/plugin/lib; 755; root; root; sysdir
+/etc/fluent/plugin/lib/application_insights; 755; root; root; sysdir
+/etc/fluent/plugin/lib/application_insights/channel; 755; root; root; sysdir
+/etc/fluent/plugin/lib/application_insights/channel/contracts; 755; root; root; sysdir
+/etc/fluent/plugin/lib/application_insights/rack; 755; root; root; sysdir
/opt/tomlrb; 755; root; root; sysdir
@@ -230,64 +228,61 @@ WriteInstallInfo() {
}
WriteInstallInfo
-#Make omsagent owner for ContainerInventory directory. This is needed for ruby plugin to have access
-chown omsagent:omsagent /var/opt/microsoft/docker-cimprov/state/ContainerInventory
# Get the state file in place with proper permissions
touch /var/opt/microsoft/docker-cimprov/state/LastEventQueryTime.txt
chmod 644 /var/opt/microsoft/docker-cimprov/state/LastEventQueryTime.txt
-chown omsagent:omsagent /var/opt/microsoft/docker-cimprov/state/LastEventQueryTime.txt
touch /var/opt/microsoft/docker-cimprov/state/KubeEventQueryState.yaml
chmod 644 /var/opt/microsoft/docker-cimprov/state/KubeEventQueryState.yaml
-chown omsagent:omsagent /var/opt/microsoft/docker-cimprov/state/KubeEventQueryState.yaml
touch /var/opt/microsoft/docker-cimprov/state/KubeLogQueryState.yaml
chmod 644 /var/opt/microsoft/docker-cimprov/state/KubeLogQueryState.yaml
-chown omsagent:omsagent /var/opt/microsoft/docker-cimprov/state/KubeLogQueryState.yaml
+
touch /var/opt/microsoft/docker-cimprov/log/kubernetes_client_log.txt
chmod 666 /var/opt/microsoft/docker-cimprov/log/kubernetes_client_log.txt
-chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/kubernetes_client_log.txt
+
touch /var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt
chmod 666 /var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt
-chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt
+
touch /var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log
chmod 666 /var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log
-chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log
+
touch /var/opt/microsoft/docker-cimprov/log/filter_telegraf2mdm.log
chmod 666 /var/opt/microsoft/docker-cimprov/log/filter_telegraf2mdm.log
-chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/filter_telegraf2mdm.log
+
touch /var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log
chmod 666 /var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log
-chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log
+
touch /var/opt/microsoft/docker-cimprov/log/mdm_metrics_generator.log
chmod 666 /var/opt/microsoft/docker-cimprov/log/mdm_metrics_generator.log
-chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/mdm_metrics_generator.log
+
touch /var/opt/microsoft/docker-cimprov/log/health_monitors.log
chmod 666 /var/opt/microsoft/docker-cimprov/log/health_monitors.log
-chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/health_monitors.log
+
touch /var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log
chmod 666 /var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log
-chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log
+
touch /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log
chmod 666 /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log
-chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log
+
touch /var/opt/microsoft/docker-cimprov/log/arc_k8s_cluster_identity.log
chmod 666 /var/opt/microsoft/docker-cimprov/log/arc_k8s_cluster_identity.log
-chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/arc_k8s_cluster_identity.log
-mv /etc/opt/microsoft/docker-cimprov/container.conf /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf
-chown omsagent:omsagent /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf
+
+touch /var/opt/microsoft/docker-cimprov/log/fluentd.log
+chmod 666 /var/opt/microsoft/docker-cimprov/log/fluentd.log
+
%Postuninstall_10
# If we're an upgrade, skip all of this cleanup
@@ -299,7 +294,6 @@ if ${{PERFORMING_UPGRADE_NOT}}; then
rm -f /var/opt/microsoft/docker-cimprov/state/KubeLogQueryState.yaml
rm -f /var/opt/microsoft/docker-cimprov/log/kubernetes_client_log.txt
rm -f /var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt
- rm -f /etc/opt/microsoft/omsagent/conf/omsagent.d/container.conf
rmdir /var/opt/microsoft/docker-cimprov/log 2> /dev/null
rmdir /var/opt/microsoft/docker-cimprov/state/ContainerInventory 2> /dev/null
rmdir /var/opt/microsoft/docker-cimprov/state/ImageInventory 2> /dev/null
@@ -308,14 +302,7 @@ if ${{PERFORMING_UPGRADE_NOT}}; then
rmdir /etc/opt/microsoft/docker-cimprov/conf 2> /dev/null
rmdir /etc/opt/microsoft/docker-cimprov 2> /dev/null
rmdir /etc/opt/microsoft 2> /dev/null
- rmdir /etc/opt 2> /dev/null
- #Remove sudoers file edit
- if [ -s /etc/sudoers.d/omsagent ]
- then
- chmod +w /etc/sudoers.d/omsagent
- sed -i '/docker\-provider/,+1 d' /etc/sudoers.d/omsagent
- chmod 440 /etc/sudoers.d/omsagent
- fi
+ rmdir /etc/opt 2> /dev/null
fi
%Preinstall_0
diff --git a/build/linux/installer/datafiles/linux.data b/build/linux/installer/datafiles/linux.data
index 604394d80..48af63a73 100644
--- a/build/linux/installer/datafiles/linux.data
+++ b/build/linux/installer/datafiles/linux.data
@@ -1,16 +1,11 @@
%Variables
PF: 'Linux'
-OMI_SERVICE: '/opt/omi/bin/service_control'
-OMS_SERVICE: '/opt/microsoft/omsagent/bin/service_control'
+
%Postinstall_2000
-# Reload the OMI server
-${{OMI_SERVICE}} reload
-${{OMS_SERVICE}} reload
-if ${{PERFORMING_UPGRADE_NOT}}; then
- /opt/omi/bin/omicli ei root/cimv2 Container_HostInventory
-fi
+
+
%Postuninstall_1000
# Calling sequence for RPM pre/post scripts, during upgrade, is as follows:
@@ -35,10 +30,5 @@ if ${{PERFORMING_UPGRADE_NOT}}; then
fi
%Postuninstall_1100
-# If we're called for upgrade, don't do anything
-if ${{PERFORMING_UPGRADE_NOT}}; then
- # Reload the OMI server
- ${{OMI_SERVICE}} reload
- ${{OMS_SERVICE}} reload
-fi
+
diff --git a/build/linux/installer/datafiles/linux_dpkg.data b/build/linux/installer/datafiles/linux_dpkg.data
index a7821642d..bdf9f2354 100644
--- a/build/linux/installer/datafiles/linux_dpkg.data
+++ b/build/linux/installer/datafiles/linux_dpkg.data
@@ -3,5 +3,5 @@ PERFORMING_UPGRADE_NOT: '[ "$1" != "upgrade" ]'
PACKAGE_TYPE: 'DPKG'
%Dependencies
-omi (>= 1.0.8.6)
+
diff --git a/build/linux/installer/datafiles/linux_rpm.data b/build/linux/installer/datafiles/linux_rpm.data
index 1b9ba009b..d537b444d 100644
--- a/build/linux/installer/datafiles/linux_rpm.data
+++ b/build/linux/installer/datafiles/linux_rpm.data
@@ -3,5 +3,5 @@ PERFORMING_UPGRADE_NOT: '[ "$1" -ne 1 ]'
PACKAGE_TYPE: 'RPM'
%Dependencies
-omi >= 1.0.8-6
+
diff --git a/build/linux/installer/scripts/livenessprobe.sh b/build/linux/installer/scripts/livenessprobe.sh
index 5e1261e7e..252f471e9 100644
--- a/build/linux/installer/scripts/livenessprobe.sh
+++ b/build/linux/installer/scripts/livenessprobe.sh
@@ -1,19 +1,21 @@
#!/bin/bash
-#test to exit non zero value if omsagent is not running
-(ps -ef | grep omsagent- | grep -v "grep")
+#test to exit non zero value if mdsd is not running
+(ps -ef | grep "mdsd" | grep -v "grep")
if [ $? -ne 0 ]
then
- echo " omsagent is not running" > /dev/termination-log
- exit 1
+ echo "mdsd is not running" > /dev/termination-log
+ exit 1
fi
-#optionally test to exit non zero value if oneagent is not running
-if [ -e "/opt/AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2" ]; then
- (ps -ef | grep "mdsd" | grep -v "grep")
+
+#optionally test to exit non zero value if fluentd is not running
+#fluentd not used in sidecar container
+if [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ]; then
+ (ps -ef | grep "fluentd" | grep -v "grep")
if [ $? -ne 0 ]
then
- echo "oneagent is not running" > /dev/termination-log
+ echo "fluentd is not running" > /dev/termination-log
exit 1
fi
fi
diff --git a/build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb b/build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb
index 5ce5d79d2..dcf179bf2 100644
--- a/build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb
+++ b/build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb
@@ -3,7 +3,7 @@
require_relative "tomlrb"
require_relative "ConfigParseErrorLogger"
-require_relative "microsoft/omsagent/plugin/constants"
+require_relative "/etc/fluent/plugin/constants"
@configMapMountPath = "/etc/config/settings/alertable-metrics-configuration-settings"
@configVersion = ""
diff --git a/build/linux/installer/scripts/tomlparser-metric-collection-config.rb b/build/linux/installer/scripts/tomlparser-metric-collection-config.rb
index 40d87b7f1..cee41312b 100644
--- a/build/linux/installer/scripts/tomlparser-metric-collection-config.rb
+++ b/build/linux/installer/scripts/tomlparser-metric-collection-config.rb
@@ -3,7 +3,7 @@
require_relative "tomlrb"
require_relative "ConfigParseErrorLogger"
-require_relative "microsoft/omsagent/plugin/constants"
+require_relative "/etc/fluent/plugin/constants"
@configMapMountPath = "/etc/config/settings/metric_collection_settings"
@configVersion = ""
diff --git a/kubernetes/linux/envmdsd b/kubernetes/linux/envmdsd
index 3f834bfb8..5a939fc3e 100644
--- a/kubernetes/linux/envmdsd
+++ b/kubernetes/linux/envmdsd
@@ -2,8 +2,6 @@ export MDSD_ROLE_PREFIX="/var/run/mdsd/default"
#export MDSD_OPTIONS="-d -A -r ${MDSD_ROLE_PREFIX}"
export MDSD_LOG="/var/opt/microsoft/linuxmonagent/log"
export MDSD_SPOOL_DIRECTORY="/var/opt/microsoft/linuxmonagent"
-export OMS_CERT_PATH="/etc/opt/microsoft/omsagent/certs/oms.crt"
-export OMS_CERT_KEY_PATH="/etc/opt/microsoft/omsagent/certs/oms.key"
#export CIWORKSPACE_id=""
#export CIWORKSPACE_key=""
export MDSD_OPTIONS="-A -c /etc/mdsd.d/mdsd.xml -r ${MDSD_ROLE_PREFIX} -S ${MDSD_SPOOL_DIRECTORY}/eh -e ${MDSD_LOG}/mdsd.err -w ${MDSD_LOG}/mdsd.warn -o ${MDSD_LOG}/mdsd.info -q ${MDSD_LOG}/mdsd.qos"
diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh
index c7d939034..b21ed6b96 100644
--- a/kubernetes/linux/main.sh
+++ b/kubernetes/linux/main.sh
@@ -38,41 +38,9 @@ waitforlisteneronTCPport() {
fi
}
-if [ -e "/etc/config/kube.conf" ]; then
- cat /etc/config/kube.conf > /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf
-elif [ "${CONTAINER_TYPE}" == "PrometheusSidecar" ]; then
- echo "setting omsagent conf file for prometheus sidecar"
- cat /etc/opt/microsoft/docker-cimprov/prometheus-side-car.conf > /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf
- # omsadmin.sh replaces %MONITOR_AGENT_PORT% and %SYSLOG_PORT% in the monitor.conf and syslog.conf with default ports 25324 and 25224.
- # Since we are running 2 omsagents in the same pod, we need to use a different port for the sidecar,
- # else we will see the Address already in use - bind(2) for 0.0.0.0:253(2)24 error.
- # Look into omsadmin.sh scripts's configure_monitor_agent()/configure_syslog() and find_available_port() methods for more info.
- sed -i -e 's/port %MONITOR_AGENT_PORT%/port 25326/g' /etc/opt/microsoft/omsagent/sysconf/omsagent.d/monitor.conf
- sed -i -e 's/port %SYSLOG_PORT%/port 25226/g' /etc/opt/microsoft/omsagent/sysconf/omsagent.d/syslog.conf
-else
- echo "setting omsagent conf file for daemonset"
- sed -i -e 's/bind 127.0.0.1/bind 0.0.0.0/g' /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf
-fi
-sed -i -e 's/bind 127.0.0.1/bind 0.0.0.0/g' /etc/opt/microsoft/omsagent/sysconf/omsagent.d/syslog.conf
-sed -i -e 's/^exit 101$/exit 0/g' /usr/sbin/policy-rc.d
-
-#Using the get_hostname for hostname instead of the host field in syslog messages
-sed -i.bak "s/record\[\"Host\"\] = hostname/record\[\"Host\"\] = OMS::Common.get_hostname/" /opt/microsoft/omsagent/plugin/filter_syslog.rb
-
#using /var/opt/microsoft/docker-cimprov/state instead of /var/opt/microsoft/omsagent/state since the latter gets deleted during onboarding
mkdir -p /var/opt/microsoft/docker-cimprov/state
-#if [ ! -e "/etc/config/kube.conf" ]; then
- # add permissions for omsagent user to access docker.sock
- #sudo setfacl -m user:omsagent:rw /var/run/host/docker.sock
-#fi
-
-# add permissions for omsagent user to access azure.json.
-sudo setfacl -m user:omsagent:r /etc/kubernetes/host/azure.json
-
-# add permission for omsagent user to log folder. We also need 'x', else log rotation is failing. TODO: Investigate why.
-sudo setfacl -m user:omsagent:rwx /var/opt/microsoft/docker-cimprov/log
-
#Run inotify as a daemon to track changes to the mounted configmap.
inotifywait /etc/config/settings --daemon --recursive --outfile "/opt/inotifyoutput.txt" --event create,delete --format '%e : %T' --timefmt '+%s'
@@ -89,7 +57,7 @@ else
export customResourceId=$AKS_RESOURCE_ID
echo "export customResourceId=$AKS_RESOURCE_ID" >> ~/.bashrc
source ~/.bashrc
- echo "customResourceId:$customResourceId"
+ echo "customResourceId:$customResourceId"
fi
#set agent config schema version
@@ -141,7 +109,6 @@ if [[ ( ( ! -e "/etc/config/kube.conf" ) && ( "${CONTAINER_TYPE}" == "Prometheus
fi
export PROXY_ENDPOINT=""
-
# Check for internet connectivity or workspace deletion
if [ -e "/etc/omsagent-secret/WSID" ]; then
workspaceId=$(cat /etc/omsagent-secret/WSID)
@@ -222,6 +189,7 @@ else
echo "LA Onboarding:Workspace Id not mounted, skipping the telemetry check"
fi
+
# Set environment variable for if public cloud by checking the workspace domain.
if [ -z $domain ]; then
ClOUD_ENVIRONMENT="unknown"
@@ -233,6 +201,12 @@ fi
export CLOUD_ENVIRONMENT=$CLOUD_ENVIRONMENT
echo "export CLOUD_ENVIRONMENT=$CLOUD_ENVIRONMENT" >> ~/.bashrc
+#consisten naming conventions with the windows
+export DOMAIN=$domain
+echo "export DOMAIN=$DOMAIN" >> ~/.bashrc
+export WSID=$workspaceId
+echo "export WSID=$WSID" >> ~/.bashrc
+
# Check if the instrumentation key needs to be fetched from a storage account (as in airgapped clouds)
if [ ${#APPLICATIONINSIGHTS_AUTH_URL} -ge 1 ]; then # (check if APPLICATIONINSIGHTS_AUTH_URL has length >=1)
for BACKOFF in {1..4}; do
@@ -267,7 +241,7 @@ source ~/.bashrc
if [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ]; then
#Parse the configmap to set the right environment variables.
- /opt/microsoft/omsagent/ruby/bin/ruby tomlparser.rb
+ /usr/bin/ruby2.6 tomlparser.rb
cat config_env_var | while read line; do
echo $line >> ~/.bashrc
@@ -278,7 +252,7 @@ fi
#Parse the configmap to set the right environment variables for agent config.
#Note > tomlparser-agent-config.rb has to be parsed first before td-agent-bit-conf-customizer.rb for fbit agent settings
if [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ]; then
- /opt/microsoft/omsagent/ruby/bin/ruby tomlparser-agent-config.rb
+ /usr/bin/ruby2.6 tomlparser-agent-config.rb
cat agent_config_env_var | while read line; do
#echo $line
@@ -287,7 +261,7 @@ if [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ]; then
source agent_config_env_var
#Parse the configmap to set the right environment variables for network policy manager (npm) integration.
- /opt/microsoft/omsagent/ruby/bin/ruby tomlparser-npm-config.rb
+ /usr/bin/ruby2.6 tomlparser-npm-config.rb
cat integration_npm_config_env_var | while read line; do
#echo $line
@@ -298,11 +272,11 @@ fi
#Replace the placeholders in td-agent-bit.conf file for fluentbit with custom/default values in daemonset
if [ ! -e "/etc/config/kube.conf" ] && [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ]; then
- /opt/microsoft/omsagent/ruby/bin/ruby td-agent-bit-conf-customizer.rb
+ /usr/bin/ruby2.6 td-agent-bit-conf-customizer.rb
fi
#Parse the prometheus configmap to create a file with new custom settings.
-/opt/microsoft/omsagent/ruby/bin/ruby tomlparser-prom-customconfig.rb
+/usr/bin/ruby2.6 tomlparser-prom-customconfig.rb
#Setting default environment variables to be used in any case of failure in the above steps
if [ ! -e "/etc/config/kube.conf" ]; then
@@ -335,7 +309,7 @@ fi
#Parse the configmap to set the right environment variables for MDM metrics configuration for Alerting.
if [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ]; then
- /opt/microsoft/omsagent/ruby/bin/ruby tomlparser-mdm-metrics-config.rb
+ /usr/bin/ruby2.6 tomlparser-mdm-metrics-config.rb
cat config_mdm_metrics_env_var | while read line; do
echo $line >> ~/.bashrc
@@ -343,7 +317,7 @@ if [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ]; then
source config_mdm_metrics_env_var
#Parse the configmap to set the right environment variables for metric collection settings
- /opt/microsoft/omsagent/ruby/bin/ruby tomlparser-metric-collection-config.rb
+ /usr/bin/ruby2.6 tomlparser-metric-collection-config.rb
cat config_metric_collection_env_var | while read line; do
echo $line >> ~/.bashrc
@@ -354,7 +328,7 @@ fi
# OSM scraping to be done in replicaset if sidecar car scraping is disabled and always do the scraping from the sidecar (It will always be either one of the two)
if [[ ( ( ! -e "/etc/config/kube.conf" ) && ( "${CONTAINER_TYPE}" == "PrometheusSidecar" ) ) ||
( ( -e "/etc/config/kube.conf" ) && ( "${SIDECAR_SCRAPING_ENABLED}" == "false" ) ) ]]; then
- /opt/microsoft/omsagent/ruby/bin/ruby tomlparser-osm-config.rb
+ /usr/bin/ruby2.6 tomlparser-osm-config.rb
if [ -e "integration_osm_config_env_var" ]; then
cat integration_osm_config_env_var | while read line; do
@@ -432,26 +406,11 @@ export KUBELET_RUNTIME_OPERATIONS_ERRORS_METRIC="kubelet_docker_operations_error
if [ "$CONTAINER_RUNTIME" != "docker" ]; then
# these metrics are avialble only on k8s versions <1.18 and will get deprecated from 1.18
export KUBELET_RUNTIME_OPERATIONS_METRIC="kubelet_runtime_operations"
- export KUBELET_RUNTIME_OPERATIONS_ERRORS_METRIC="kubelet_runtime_operations_errors"
-else
- #if container run time is docker then add omsagent user to local docker group to get access to docker.sock
- # docker.sock only use for the telemetry to get the docker version
- DOCKER_SOCKET=/var/run/host/docker.sock
- DOCKER_GROUP=docker
- REGULAR_USER=omsagent
- if [ -S ${DOCKER_SOCKET} ]; then
- echo "getting gid for docker.sock"
- DOCKER_GID=$(stat -c '%g' ${DOCKER_SOCKET})
- echo "creating a local docker group"
- groupadd -for -g ${DOCKER_GID} ${DOCKER_GROUP}
- echo "adding omsagent user to local docker group"
- usermod -aG ${DOCKER_GROUP} ${REGULAR_USER}
- fi
+ export KUBELET_RUNTIME_OPERATIONS_ERRORS_METRIC="kubelet_runtime_operations_errors"
fi
echo "set caps for ruby process to read container env from proc"
-sudo setcap cap_sys_ptrace,cap_dac_read_search+ep /opt/microsoft/omsagent/ruby/bin/ruby
-
+sudo setcap cap_sys_ptrace,cap_dac_read_search+ep /usr/bin/ruby2.6
echo "export KUBELET_RUNTIME_OPERATIONS_METRIC="$KUBELET_RUNTIME_OPERATIONS_METRIC >> ~/.bashrc
echo "export KUBELET_RUNTIME_OPERATIONS_ERRORS_METRIC="$KUBELET_RUNTIME_OPERATIONS_ERRORS_METRIC >> ~/.bashrc
@@ -461,171 +420,70 @@ echo $NODE_NAME > /var/opt/microsoft/docker-cimprov/state/containerhostname
#check if file was written successfully.
cat /var/opt/microsoft/docker-cimprov/state/containerhostname
-
-#Commenting it for test. We do this in the installer now
-#Setup sudo permission for containerlogtailfilereader
-#chmod +w /etc/sudoers.d/omsagent
-#echo "#run containerlogtailfilereader.rb for docker-provider" >> /etc/sudoers.d/omsagent
-#echo "omsagent ALL=(ALL) NOPASSWD: /opt/microsoft/omsagent/ruby/bin/ruby /opt/microsoft/omsagent/plugin/containerlogtailfilereader.rb *" >> /etc/sudoers.d/omsagent
-#chmod 440 /etc/sudoers.d/omsagent
-
-#Disable dsc
-#/opt/microsoft/omsconfig/Scripts/OMS_MetaConfigHelper.py --disable
-rm -f /etc/opt/microsoft/omsagent/conf/omsagent.d/omsconfig.consistencyinvoker.conf
-
-CIWORKSPACE_id=""
-CIWORKSPACE_key=""
-
-if [ -z $INT ]; then
- if [ -a /etc/omsagent-secret/PROXY ]; then
- if [ -a /etc/omsagent-secret/DOMAIN ]; then
- /opt/microsoft/omsagent/bin/omsadmin.sh -w `cat /etc/omsagent-secret/WSID` -s `cat /etc/omsagent-secret/KEY` -d `cat /etc/omsagent-secret/DOMAIN` -p `cat /etc/omsagent-secret/PROXY`
- else
- /opt/microsoft/omsagent/bin/omsadmin.sh -w `cat /etc/omsagent-secret/WSID` -s `cat /etc/omsagent-secret/KEY` -p `cat /etc/omsagent-secret/PROXY`
- fi
- CIWORKSPACE_id="$(cat /etc/omsagent-secret/WSID)"
- CIWORKSPACE_key="$(cat /etc/omsagent-secret/KEY)"
- elif [ -a /etc/omsagent-secret/DOMAIN ]; then
- /opt/microsoft/omsagent/bin/omsadmin.sh -w `cat /etc/omsagent-secret/WSID` -s `cat /etc/omsagent-secret/KEY` -d `cat /etc/omsagent-secret/DOMAIN`
- CIWORKSPACE_id="$(cat /etc/omsagent-secret/WSID)"
- CIWORKSPACE_key="$(cat /etc/omsagent-secret/KEY)"
- elif [ -a /etc/omsagent-secret/WSID ]; then
- /opt/microsoft/omsagent/bin/omsadmin.sh -w `cat /etc/omsagent-secret/WSID` -s `cat /etc/omsagent-secret/KEY`
- CIWORKSPACE_id="$(cat /etc/omsagent-secret/WSID)"
- CIWORKSPACE_key="$(cat /etc/omsagent-secret/KEY)"
- elif [ -a /run/secrets/DOMAIN ]; then
- /opt/microsoft/omsagent/bin/omsadmin.sh -w `cat /run/secrets/WSID` -s `cat /run/secrets/KEY` -d `cat /run/secrets/DOMAIN`
- CIWORKSPACE_id="$(cat /run/secrets/WSID)"
- CIWORKSPACE_key="$(cat /run/secrets/KEY)"
- elif [ -a /run/secrets/WSID ]; then
- /opt/microsoft/omsagent/bin/omsadmin.sh -w `cat /run/secrets/WSID` -s `cat /run/secrets/KEY`
- CIWORKSPACE_id="$(cat /run/secrets/WSID)"
- CIWORKSPACE_key="$(cat /run/secrets/KEY)"
- elif [ -z $DOMAIN ]; then
- /opt/microsoft/omsagent/bin/omsadmin.sh -w $WSID -s $KEY
- CIWORKSPACE_id="$(cat /etc/omsagent-secret/WSID)"
- CIWORKSPACE_key="$(cat /etc/omsagent-secret/KEY)"
- else
- /opt/microsoft/omsagent/bin/omsadmin.sh -w $WSID -s $KEY -d $DOMAIN
- CIWORKSPACE_id="$WSID"
- CIWORKSPACE_key="$KEY"
- fi
-else
-#To onboard to INT workspace - workspace-id (WSID-not base64 encoded), workspace-key (KEY-not base64 encoded), Domain(DOMAIN-int2.microsoftatlanta-int.com)
-#need to be added to omsagent.yaml.
- echo WORKSPACE_ID=$WSID > /etc/omsagent-onboard.conf
- echo SHARED_KEY=$KEY >> /etc/omsagent-onboard.conf
- echo URL_TLD=$DOMAIN >> /etc/omsagent-onboard.conf
- /opt/microsoft/omsagent/bin/omsadmin.sh
- CIWORKSPACE_id="$WSID"
- CIWORKSPACE_key="$KEY"
-fi
-
#start cron daemon for logrotate
service cron start
+#get docker-provider versions
-#check if agent onboarded successfully
-/opt/microsoft/omsagent/bin/omsadmin.sh -l
-
-#get omsagent and docker-provider versions
-dpkg -l | grep omsagent | awk '{print $2 " " $3}'
dpkg -l | grep docker-cimprov | awk '{print $2 " " $3}'
DOCKER_CIMPROV_VERSION=$(dpkg -l | grep docker-cimprov | awk '{print $3}')
echo "DOCKER_CIMPROV_VERSION=$DOCKER_CIMPROV_VERSION"
export DOCKER_CIMPROV_VERSION=$DOCKER_CIMPROV_VERSION
echo "export DOCKER_CIMPROV_VERSION=$DOCKER_CIMPROV_VERSION" >> ~/.bashrc
+echo "*** activating oneagent in legacy auth mode ***"
+CIWORKSPACE_id="$(cat /etc/omsagent-secret/WSID)"
+#use the file path as its secure than env
+CIWORKSPACE_keyFile="/etc/omsagent-secret/KEY"
+cat /etc/mdsd.d/envmdsd | while read line; do
+ echo $line >> ~/.bashrc
+done
+source /etc/mdsd.d/envmdsd
+echo "setting mdsd workspaceid & key for workspace:$CIWORKSPACE_id"
+export CIWORKSPACE_id=$CIWORKSPACE_id
+echo "export CIWORKSPACE_id=$CIWORKSPACE_id" >> ~/.bashrc
+export CIWORKSPACE_keyFile=$CIWORKSPACE_keyFile
+echo "export CIWORKSPACE_keyFile=$CIWORKSPACE_keyFile" >> ~/.bashrc
+export OMS_TLD=$domain
+echo "export OMS_TLD=$OMS_TLD" >> ~/.bashrc
+export MDSD_FLUENT_SOCKET_PORT="29230"
+echo "export MDSD_FLUENT_SOCKET_PORT=$MDSD_FLUENT_SOCKET_PORT" >> ~/.bashrc
+
+#skip imds lookup since not used in legacy auth path
+export SKIP_IMDS_LOOKUP_FOR_LEGACY_AUTH="true"
+echo "export SKIP_IMDS_LOOKUP_FOR_LEGACY_AUTH=$SKIP_IMDS_LOOKUP_FOR_LEGACY_AUTH" >> ~/.bashrc
-#region check to auto-activate oneagent, to route container logs,
-#Intent is to activate one agent routing for all managed clusters with region in the regionllist, unless overridden by configmap
-# AZMON_CONTAINER_LOGS_ROUTE will have route (if any) specified in the config map
-# AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE will have the final route that we compute & set, based on our region list logic
-echo "************start oneagent log routing checks************"
-# by default, use configmap route for safer side
-AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE=$AZMON_CONTAINER_LOGS_ROUTE
-
-#trim region list
-oneagentregions="$(echo $AZMON_CONTAINERLOGS_ONEAGENT_REGIONS | xargs)"
-#lowercase region list
-typeset -l oneagentregions=$oneagentregions
-echo "oneagent regions: $oneagentregions"
-#trim current region
-currentregion="$(echo $AKS_REGION | xargs)"
-#lowercase current region
-typeset -l currentregion=$currentregion
-echo "current region: $currentregion"
-
-#initilze isoneagentregion as false
-isoneagentregion=false
-
-#set isoneagentregion as true if matching region is found
-if [ ! -z $oneagentregions ] && [ ! -z $currentregion ]; then
- for rgn in $(echo $oneagentregions | sed "s/,/ /g"); do
- if [ "$rgn" == "$currentregion" ]; then
- isoneagentregion=true
- echo "current region is in oneagent regions..."
- break
- fi
- done
-else
- echo "current region is not in oneagent regions..."
-fi
+source ~/.bashrc
-if [ "$isoneagentregion" = true ]; then
- #if configmap has a routing for logs, but current region is in the oneagent region list, take the configmap route
- if [ ! -z $AZMON_CONTAINER_LOGS_ROUTE ]; then
- AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE=$AZMON_CONTAINER_LOGS_ROUTE
- echo "oneagent region is true for current region:$currentregion and config map logs route is not empty. so using config map logs route as effective route:$AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE"
- else #there is no configmap route, so route thru oneagent
- AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE="v2"
- echo "oneagent region is true for current region:$currentregion and config map logs route is empty. so using oneagent as effective route:$AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE"
- fi
-else
- echo "oneagent region is false for current region:$currentregion"
+dpkg -l | grep mdsd | awk '{print $2 " " $3}'
+
+if [ "${CONTAINER_TYPE}" == "PrometheusSidecar" ]; then
+ echo "starting mdsd with mdsd-port=26130, fluentport=26230 and influxport=26330 in legacy auth mode in sidecar container..."
+ #use tenant name to avoid unix socket conflict and different ports for port conflict
+ #roleprefix to use container specific mdsd socket
+ export TENANT_NAME="${CONTAINER_TYPE}"
+ echo "export TENANT_NAME=$TENANT_NAME" >> ~/.bashrc
+ export MDSD_ROLE_PREFIX=/var/run/mdsd-${CONTAINER_TYPE}/default
+ echo "export MDSD_ROLE_PREFIX=$MDSD_ROLE_PREFIX" >> ~/.bashrc
+ source ~/.bashrc
+ mkdir /var/run/mdsd-${CONTAINER_TYPE}
+ # add -T 0xFFFF for full traces
+ mdsd -r ${MDSD_ROLE_PREFIX} -p 26130 -f 26230 -i 26330 -e ${MDSD_LOG}/mdsd.err -w ${MDSD_LOG}/mdsd.warn -o ${MDSD_LOG}/mdsd.info -q ${MDSD_LOG}/mdsd.qos &
+else
+ echo "starting mdsd in legacy auth mode in main container..."
+ # add -T 0xFFFF for full traces
+ mdsd -e ${MDSD_LOG}/mdsd.err -w ${MDSD_LOG}/mdsd.warn -o ${MDSD_LOG}/mdsd.info -q ${MDSD_LOG}/mdsd.qos &
fi
-
-#start oneagent
-if [ ! -e "/etc/config/kube.conf" ] && [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ]; then
- if [ ! -z $AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE ]; then
- echo "container logs configmap route is $AZMON_CONTAINER_LOGS_ROUTE"
- echo "container logs effective route is $AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE"
- #trim
- containerlogsroute="$(echo $AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE | xargs)"
- # convert to lowercase
- typeset -l containerlogsroute=$containerlogsroute
-
- echo "setting AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE as :$containerlogsroute"
- export AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE=$containerlogsroute
- echo "export AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE=$containerlogsroute" >> ~/.bashrc
- source ~/.bashrc
-
- if [ "$containerlogsroute" == "v2" ]; then
- echo "activating oneagent..."
- echo "configuring mdsd..."
- cat /etc/mdsd.d/envmdsd | while read line; do
- echo $line >> ~/.bashrc
- done
- source /etc/mdsd.d/envmdsd
-
- echo "setting mdsd workspaceid & key for workspace:$CIWORKSPACE_id"
- export CIWORKSPACE_id=$CIWORKSPACE_id
- echo "export CIWORKSPACE_id=$CIWORKSPACE_id" >> ~/.bashrc
- export CIWORKSPACE_key=$CIWORKSPACE_key
- echo "export CIWORKSPACE_key=$CIWORKSPACE_key" >> ~/.bashrc
-
- source ~/.bashrc
-
- dpkg -l | grep mdsd | awk '{print $2 " " $3}'
-
- echo "starting mdsd ..."
- mdsd -e ${MDSD_LOG}/mdsd.err -w ${MDSD_LOG}/mdsd.warn -o ${MDSD_LOG}/mdsd.info -q ${MDSD_LOG}/mdsd.qos &
-
- touch /opt/AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2
- fi
- fi
-fi
-echo "************end oneagent log routing checks************"
+# no dependency on fluentd for prometheus side car container
+if [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ]; then
+ if [ ! -e "/etc/config/kube.conf" ]; then
+ echo "*** starting fluentd v1 in daemonset"
+ fluentd -c /etc/fluent/container.conf -o /var/opt/microsoft/docker-cimprov/log/fluentd.log &
+ else
+ echo "*** starting fluentd v1 in replicaset"
+ fluentd -c /etc/fluent/kube.conf -o /var/opt/microsoft/docker-cimprov/log/fluentd.log &
+ fi
+fi
#If config parsing was successful, a copy of the conf file with replaced custom settings file is created
if [ ! -e "/etc/config/kube.conf" ]; then
@@ -749,12 +607,9 @@ dpkg -l | grep td-agent-bit | awk '{print $2 " " $3}'
#dpkg -l | grep telegraf | awk '{print $2 " " $3}'
-
-
# Write messages from the liveness probe to stdout (so telemetry picks it up)
touch /dev/write-to-traces
-
echo "stopping rsyslog..."
service rsyslog stop
@@ -762,7 +617,7 @@ echo "getting rsyslog status..."
service rsyslog status
shutdown() {
- /opt/microsoft/omsagent/bin/service_control stop
+ pkill -f mdsd
}
trap "shutdown" SIGTERM
diff --git a/kubernetes/linux/mdsd.xml b/kubernetes/linux/mdsd.xml
index 49d329791..de14240aa 100644
--- a/kubernetes/linux/mdsd.xml
+++ b/kubernetes/linux/mdsd.xml
@@ -47,6 +47,149 @@
Each column has a name, an augmented JSON source type, and a target MDS type.
-->
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -68,14 +211,33 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
]]>
@@ -143,11 +360,95 @@
-
-
]]>
+
+
+
+
+ ]]>
+
+
+
+
+
+
+
+ ]]>
+
+
+
+
+
+
+
+ ]]>
+
+
+
+
+
+
+
+ ]]>
+
+
+
+
+
+
+
+ ]]>
+
+
+
+
+
+
+
+ ]]>
+
+
+
+
+
+
+
+ ]]>
+
+
+
+
+
+
+
+ ]]>
+
+
+
+
+
+
+
+ ]]>
+
+
+
+
+
+
+ ]]>
+
+
+
+
+
+
+ ]]>
+
+
+
diff --git a/kubernetes/linux/setup.sh b/kubernetes/linux/setup.sh
index f065cc165..3d00e4c57 100644
--- a/kubernetes/linux/setup.sh
+++ b/kubernetes/linux/setup.sh
@@ -9,37 +9,13 @@ sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && \
dpkg-reconfigure --frontend=noninteractive locales && \
update-locale LANG=en_US.UTF-8
-wget https://github.com/Microsoft/OMS-Agent-for-Linux/releases/download/OMSAgent_v1.10.0-1/omsagent-1.10.0-1.universal.x64.sh
+#install oneagent - Official bits (05/17/2021)
+wget https://github.com/microsoft/Docker-Provider/releases/download/05172021-oneagent/azure-mdsd_1.10.1-build.master.213_x86_64.deb
-#create file to disable omi service startup script
-touch /etc/.omi_disable_service_control
-
-chmod 775 $TMPDIR/*.sh
-
-#Extract omsbundle
-$TMPDIR/omsagent-*.universal.x64.sh --extract
-mv $TMPDIR/omsbundle* $TMPDIR/omsbundle
-#Install omi
-/usr/bin/dpkg -i $TMPDIR/omsbundle/110/omi*.deb
-
-#Install scx
-/usr/bin/dpkg -i $TMPDIR/omsbundle/110/scx*.deb
-#$TMPDIR/omsbundle/bundles/scx-1.6.*-*.universal.x64.sh --install
-
-#Install omsagent
-
-/usr/bin/dpkg -i $TMPDIR/omsbundle/110/omsagent*.deb
-#/usr/bin/dpkg -i $TMPDIR/omsbundle/100/omsconfig*.deb
-
-#install oneagent - Official bits (05/2021)
-wget https://github.com/microsoft/Docker-Provider/releases/download/05112021-oneagent/azure-mdsd_1.8.0-build.master.189_x86_64.deb
/usr/bin/dpkg -i $TMPDIR/azure-mdsd*.deb
cp -f $TMPDIR/mdsd.xml /etc/mdsd.d
cp -f $TMPDIR/envmdsd /etc/mdsd.d
-#Assign permissions to omsagent user to access docker.sock
-sudo apt-get install acl
-
#download inotify tools for watching configmap changes
sudo apt-get update
sudo apt-get install inotify-tools -y
@@ -49,18 +25,7 @@ sudo apt-get install inotify-tools -y
sudo apt-get install jq=1.5+dfsg-2 -y
#used to setcaps for ruby process to read /proc/env
-echo "installing libcap2-bin"
sudo apt-get install libcap2-bin -y
-#/$TMPDIR/omsbundle/oss-kits/docker-cimprov-1.0.0-*.x86_64.sh --install
-#Use downloaded docker-provider instead of the bundled one
-
-#download and install telegraf
-#wget https://dl.influxdata.com/telegraf/releases/telegraf_1.10.1-1_amd64.deb
-#sudo dpkg -i telegraf_1.10.1-1_amd64.deb
-
-#service telegraf stop
-
-#wget https://github.com/microsoft/Docker-Provider/releases/download/5.0.0.0/telegraf
#1.18 pre-release
wget https://dl.influxdata.com/telegraf/releases/telegraf-1.18.0_linux_amd64.tar.gz
@@ -79,8 +44,17 @@ sudo echo "deb https://packages.fluentbit.io/ubuntu/xenial xenial main" >> /etc/
sudo apt-get update
sudo apt-get install td-agent-bit=1.6.8 -y
-rm -rf $TMPDIR/omsbundle
-rm -f $TMPDIR/omsagent*.sh
+# install ruby2.6
+sudo apt-get install software-properties-common -y
+sudo apt-add-repository ppa:brightbox/ruby-ng -y
+sudo apt-get update
+sudo apt-get install ruby2.6 ruby2.6-dev gcc make -y
+# fluentd v1 gem
+gem install fluentd -v "1.12.2" --no-document
+fluentd --setup ./fluent
+gem install gyoku iso8601 --no-doc
+
+
rm -f $TMPDIR/docker-cimprov*.sh
rm -f $TMPDIR/azure-mdsd*.deb
rm -f $TMPDIR/mdsd.xml
diff --git a/source/plugins/go/src/oms.go b/source/plugins/go/src/oms.go
index d35acad3d..25f364c55 100644
--- a/source/plugins/go/src/oms.go
+++ b/source/plugins/go/src/oms.go
@@ -92,15 +92,24 @@ const kubeMonAgentConfigEventFlushInterval = 60
//Eventsource name in mdsd
const MdsdContainerLogSourceName = "ContainerLogSource"
const MdsdContainerLogV2SourceName = "ContainerLogV2Source"
+const MdsdKubeMonAgentEventsSourceName = "KubeMonAgentEventsSource"
+const MdsdInsightsMetricsSourceName = "InsightsMetricsSource"
-//container logs route (v2=flush to oneagent, adx= flush to adx ingestion, anything else flush to ODS[default])
+//container logs route (v2=flush to oneagent, adx= flush to adx ingestion, v1 for ODS Direct)
const ContainerLogsV2Route = "v2"
const ContainerLogsADXRoute = "adx"
+//fallback option v1 route i.e. ODS direct if required in any case
+const ContainerLogsV1Route = "v1"
+
//container logs schema (v2=ContainerLogsV2 table in LA, anything else ContainerLogs table in LA. This is applicable only if Container logs route is NOT ADX)
const ContainerLogV2SchemaVersion = "v2"
+
+//env variable to container type
+const ContainerTypeEnv = "CONTAINER_TYPE"
+
var (
// PluginConfiguration the plugins configuration
PluginConfiguration map[string]string
@@ -108,6 +117,10 @@ var (
HTTPClient http.Client
// Client for MDSD msgp Unix socket
MdsdMsgpUnixSocketClient net.Conn
+ // Client for MDSD msgp Unix socket for KubeMon Agent events
+ MdsdKubeMonMsgpUnixSocketClient net.Conn
+ // Client for MDSD msgp Unix socket for Insights Metrics
+ MdsdInsightsMetricsMsgpUnixSocketClient net.Conn
// Ingestor for ADX
ADXIngestor *ingest.Ingestion
// OMSEndpoint ingestion endpoint
@@ -116,6 +129,8 @@ var (
Computer string
// WorkspaceID log analytics workspace id
WorkspaceID string
+ // LogAnalyticsWorkspaceDomain log analytics workspace domain
+ LogAnalyticsWorkspaceDomain string
// ResourceID for resource-centric log analytics data
ResourceID string
// Resource-centric flag (will be true if we determine if above RseourceID is non-empty - default is false)
@@ -143,7 +158,17 @@ var (
// ADX tenantID
AdxTenantID string
//ADX client secret
- AdxClientSecret string
+ AdxClientSecret string
+ // container log or container log v2 tag name for oneagent route
+ MdsdContainerLogTagName string
+ // kubemonagent events tag name for oneagent route
+ MdsdKubeMonAgentEventsTagName string
+ // InsightsMetrics tag name for oneagent route
+ MdsdInsightsMetricsTagName string
+ // flag to check if its Windows OS
+ IsWindows bool
+ // container type
+ ContainerType string
)
var (
@@ -314,6 +339,15 @@ const (
PromScrapingError
)
+// DataType to be used as enum per data type socket client creation
+type DataType int
+const (
+ // DataType to be used as enum per data type socket client creation
+ ContainerLogV2 DataType = iota
+ KubeMonAgentEvents
+ InsightsMetrics
+)
+
func createLogger() *log.Logger {
var logfile *os.File
@@ -532,6 +566,7 @@ func flushKubeMonAgentEventRecords() {
start := time.Now()
var elapsed time.Duration
var laKubeMonAgentEventsRecords []laKubeMonAgentEvents
+ var msgPackEntries []MsgPackEntry
telemetryDimensions := make(map[string]string)
telemetryDimensions["ConfigErrorEventCount"] = strconv.Itoa(len(ConfigErrorEvent))
@@ -558,7 +593,25 @@ func flushKubeMonAgentEventRecords() {
Message: k,
Tags: fmt.Sprintf("%s", tagJson),
}
- laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord)
+ laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord)
+ var stringMap map[string]string
+ jsonBytes, err := json.Marshal(&laKubeMonAgentEventsRecord)
+ if err != nil {
+ message := fmt.Sprintf("Error while Marshalling laKubeMonAgentEventsRecord to json bytes: %s", err.Error())
+ Log(message)
+ SendException(message)
+ } else {
+ if err := json.Unmarshal(jsonBytes, &stringMap); err != nil {
+ message := fmt.Sprintf("Error while UnMarhalling json bytes to stringmap: %s", err.Error())
+ Log(message)
+ SendException(message)
+ } else {
+ msgPackEntry := MsgPackEntry{
+ Record: stringMap,
+ }
+ msgPackEntries = append(msgPackEntries, msgPackEntry)
+ }
+ }
}
}
@@ -579,7 +632,25 @@ func flushKubeMonAgentEventRecords() {
Message: k,
Tags: fmt.Sprintf("%s", tagJson),
}
- laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord)
+ laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord)
+ var stringMap map[string]string
+ jsonBytes, err := json.Marshal(&laKubeMonAgentEventsRecord)
+ if err != nil {
+ message := fmt.Sprintf("Error while Marshalling laKubeMonAgentEventsRecord to json bytes: %s", err.Error())
+ Log(message)
+ SendException(message)
+ } else {
+ if err := json.Unmarshal(jsonBytes, &stringMap); err != nil {
+ message := fmt.Sprintf("Error while UnMarhalling json bytes to stringmap: %s", err.Error())
+ Log(message)
+ SendException(message)
+ } else {
+ msgPackEntry := MsgPackEntry{
+ Record: stringMap,
+ }
+ msgPackEntries = append(msgPackEntries, msgPackEntry)
+ }
+ }
}
}
@@ -610,11 +681,63 @@ func flushKubeMonAgentEventRecords() {
Message: "No errors",
Tags: fmt.Sprintf("%s", tagJson),
}
- laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord)
+ laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord)
+ var stringMap map[string]string
+ jsonBytes, err := json.Marshal(&laKubeMonAgentEventsRecord)
+ if err != nil {
+ message := fmt.Sprintf("Error while Marshalling laKubeMonAgentEventsRecord to json bytes: %s", err.Error())
+ Log(message)
+ SendException(message)
+ } else {
+ if err := json.Unmarshal(jsonBytes, &stringMap); err != nil {
+ message := fmt.Sprintf("Error while UnMarshalling json bytes to stringmap: %s", err.Error())
+ Log(message)
+ SendException(message)
+ } else {
+ msgPackEntry := MsgPackEntry{
+ Record: stringMap,
+ }
+ msgPackEntries = append(msgPackEntries, msgPackEntry)
+ }
+ }
}
}
-
- if len(laKubeMonAgentEventsRecords) > 0 {
+ if (IsWindows == false && len(msgPackEntries) > 0) { //for linux, mdsd route
+ Log("Info::mdsd:: using mdsdsource name for KubeMonAgentEvents: %s", MdsdKubeMonAgentEventsTagName)
+ msgpBytes := convertMsgPackEntriesToMsgpBytes(MdsdKubeMonAgentEventsTagName, msgPackEntries)
+ if MdsdKubeMonMsgpUnixSocketClient == nil {
+ Log("Error::mdsd::mdsd connection for KubeMonAgentEvents does not exist. re-connecting ...")
+ CreateMDSDClient(KubeMonAgentEvents, ContainerType)
+ if MdsdKubeMonMsgpUnixSocketClient == nil {
+ Log("Error::mdsd::Unable to create mdsd client for KubeMonAgentEvents. Please check error log.")
+ ContainerLogTelemetryMutex.Lock()
+ defer ContainerLogTelemetryMutex.Unlock()
+ KubeMonEventsMDSDClientCreateErrors += 1
+ }
+ }
+ if MdsdKubeMonMsgpUnixSocketClient != nil {
+ deadline := 10 * time.Second
+ MdsdKubeMonMsgpUnixSocketClient.SetWriteDeadline(time.Now().Add(deadline)) //this is based of clock time, so cannot reuse
+ bts, er := MdsdKubeMonMsgpUnixSocketClient.Write(msgpBytes)
+ elapsed = time.Since(start)
+ if er != nil {
+ message := fmt.Sprintf("Error::mdsd::Failed to write to kubemonagent mdsd %d records after %s. Will retry ... error : %s", len(msgPackEntries), elapsed, er.Error())
+ Log(message)
+ if MdsdKubeMonMsgpUnixSocketClient != nil {
+ MdsdKubeMonMsgpUnixSocketClient.Close()
+ MdsdKubeMonMsgpUnixSocketClient = nil
+ }
+ SendException(message)
+ } else {
+ numRecords := len(msgPackEntries)
+ Log("FlushKubeMonAgentEventRecords::Info::Successfully flushed %d records that was %d bytes in %s", numRecords, bts, elapsed)
+ // Send telemetry to AppInsights resource
+ SendEvent(KubeMonAgentEventsFlushedEvent, telemetryDimensions)
+ }
+ } else {
+ Log("Error::mdsd::Unable to create mdsd client for KubeMonAgentEvents. Please check error log.")
+ }
+ } else if len(laKubeMonAgentEventsRecords) > 0 { //for windows, ODS direct
kubeMonAgentEventEntry := KubeMonAgentEventBlob{
DataType: KubeMonAgentEventDataType,
IPName: IPName,
@@ -746,70 +869,144 @@ func PostTelegrafMetricsToLA(telegrafRecords []map[interface{}]interface{}) int
message := fmt.Sprintf("PostTelegrafMetricsToLA::Info:derived %v metrics from %v timeseries", len(laMetrics), len(telegrafRecords))
Log(message)
}
+
+ if IsWindows == false { //for linux, mdsd route
+ var msgPackEntries []MsgPackEntry
+ var i int
+ start := time.Now()
+ var elapsed time.Duration
+
+ for i = 0; i < len(laMetrics); i++ {
+ var interfaceMap map[string]interface{}
+ stringMap := make(map[string]string)
+ jsonBytes, err := json.Marshal(*laMetrics[i])
+ if err != nil {
+ message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:when marshalling json %q", err)
+ Log(message)
+ SendException(message)
+ return output.FLB_OK
+ } else {
+ if err := json.Unmarshal(jsonBytes, &interfaceMap); err != nil {
+ message := fmt.Sprintf("Error while UnMarshalling json bytes to interfaceMap: %s", err.Error())
+ Log(message)
+ SendException(message)
+ return output.FLB_OK
+ } else {
+ for key, value := range interfaceMap {
+ strKey := fmt.Sprintf("%v", key)
+ strValue := fmt.Sprintf("%v", value)
+ stringMap[strKey] = strValue
+ }
+ msgPackEntry := MsgPackEntry{
+ Record: stringMap,
+ }
+ msgPackEntries = append(msgPackEntries, msgPackEntry)
+ }
+ }
+ }
+ if (len(msgPackEntries) > 0) {
+ msgpBytes := convertMsgPackEntriesToMsgpBytes(MdsdInsightsMetricsTagName, msgPackEntries)
+ if MdsdInsightsMetricsMsgpUnixSocketClient == nil {
+ Log("Error::mdsd::mdsd connection does not exist. re-connecting ...")
+ CreateMDSDClient(InsightsMetrics, ContainerType)
+ if MdsdInsightsMetricsMsgpUnixSocketClient == nil {
+ Log("Error::mdsd::Unable to create mdsd client for insights metrics. Please check error log.")
+ ContainerLogTelemetryMutex.Lock()
+ defer ContainerLogTelemetryMutex.Unlock()
+ InsightsMetricsMDSDClientCreateErrors += 1
+ return output.FLB_RETRY
+ }
+ }
- var metrics []laTelegrafMetric
- var i int
+ deadline := 10 * time.Second
+ MdsdInsightsMetricsMsgpUnixSocketClient.SetWriteDeadline(time.Now().Add(deadline)) //this is based of clock time, so cannot reuse
+ bts, er := MdsdInsightsMetricsMsgpUnixSocketClient.Write(msgpBytes)
- for i = 0; i < len(laMetrics); i++ {
- metrics = append(metrics, *laMetrics[i])
- }
+ elapsed = time.Since(start)
- laTelegrafMetrics := InsightsMetricsBlob{
- DataType: InsightsMetricsDataType,
- IPName: IPName,
- DataItems: metrics}
+ if er != nil {
+ Log("Error::mdsd::Failed to write to mdsd %d records after %s. Will retry ... error : %s", len(msgPackEntries), elapsed, er.Error())
+ if MdsdInsightsMetricsMsgpUnixSocketClient != nil {
+ MdsdInsightsMetricsMsgpUnixSocketClient.Close()
+ MdsdInsightsMetricsMsgpUnixSocketClient = nil
+ }
- jsonBytes, err := json.Marshal(laTelegrafMetrics)
+ ContainerLogTelemetryMutex.Lock()
+ defer ContainerLogTelemetryMutex.Unlock()
+ InsightsMetricsMDSDClientCreateErrors += 1
+ return output.FLB_RETRY
+ } else {
+ numTelegrafMetricsRecords := len(msgPackEntries)
+ Log("Success::mdsd::Successfully flushed %d telegraf metrics records that was %d bytes to mdsd in %s ", numTelegrafMetricsRecords, bts, elapsed)
+ }
+ }
+
+ } else { // for windows, ODS direct
- if err != nil {
- message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:when marshalling json %q", err)
- Log(message)
- SendException(message)
- return output.FLB_OK
- }
+ var metrics []laTelegrafMetric
+ var i int
- //Post metrics data to LA
- req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(jsonBytes))
+ for i = 0; i < len(laMetrics); i++ {
+ metrics = append(metrics, *laMetrics[i])
+ }
- //req.URL.Query().Add("api-version","2016-04-01")
+ laTelegrafMetrics := InsightsMetricsBlob{
+ DataType: InsightsMetricsDataType,
+ IPName: IPName,
+ DataItems: metrics}
- //set headers
- req.Header.Set("x-ms-date", time.Now().Format(time.RFC3339))
- req.Header.Set("User-Agent", userAgent)
- reqID := uuid.New().String()
- req.Header.Set("X-Request-ID", reqID)
+ jsonBytes, err := json.Marshal(laTelegrafMetrics)
- //expensive to do string len for every request, so use a flag
- if ResourceCentric == true {
- req.Header.Set("x-ms-AzureResourceId", ResourceID)
- }
+ if err != nil {
+ message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:when marshalling json %q", err)
+ Log(message)
+ SendException(message)
+ return output.FLB_OK
+ }
- start := time.Now()
- resp, err := HTTPClient.Do(req)
- elapsed := time.Since(start)
+ //Post metrics data to LA
+ req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(jsonBytes))
- if err != nil {
- message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:(retriable) when sending %v metrics. duration:%v err:%q \n", len(laMetrics), elapsed, err.Error())
- Log(message)
- UpdateNumTelegrafMetricsSentTelemetry(0, 1, 0)
- return output.FLB_RETRY
- }
+ //req.URL.Query().Add("api-version","2016-04-01")
- if resp == nil || resp.StatusCode != 200 {
- if resp != nil {
- Log("PostTelegrafMetricsToLA::Error:(retriable) RequestID %s Response Status %v Status Code %v", reqID, resp.Status, resp.StatusCode)
+ //set headers
+ req.Header.Set("x-ms-date", time.Now().Format(time.RFC3339))
+ req.Header.Set("User-Agent", userAgent)
+ reqID := uuid.New().String()
+ req.Header.Set("X-Request-ID", reqID)
+
+ //expensive to do string len for every request, so use a flag
+ if ResourceCentric == true {
+ req.Header.Set("x-ms-AzureResourceId", ResourceID)
}
- if resp != nil && resp.StatusCode == 429 {
- UpdateNumTelegrafMetricsSentTelemetry(0, 1, 1)
+
+ start := time.Now()
+ resp, err := HTTPClient.Do(req)
+ elapsed := time.Since(start)
+
+ if err != nil {
+ message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:(retriable) when sending %v metrics. duration:%v err:%q \n", len(laMetrics), elapsed, err.Error())
+ Log(message)
+ UpdateNumTelegrafMetricsSentTelemetry(0, 1, 0)
+ return output.FLB_RETRY
+ }
+
+ if resp == nil || resp.StatusCode != 200 {
+ if resp != nil {
+ Log("PostTelegrafMetricsToLA::Error:(retriable) RequestID %s Response Status %v Status Code %v", reqID, resp.Status, resp.StatusCode)
+ }
+ if resp != nil && resp.StatusCode == 429 {
+ UpdateNumTelegrafMetricsSentTelemetry(0, 1, 1)
+ }
+ return output.FLB_RETRY
}
- return output.FLB_RETRY
- }
- defer resp.Body.Close()
+ defer resp.Body.Close()
- numMetrics := len(laMetrics)
- UpdateNumTelegrafMetricsSentTelemetry(numMetrics, 0, 0)
- Log("PostTelegrafMetricsToLA::Info:Successfully flushed %v records in %v", numMetrics, elapsed)
+ numMetrics := len(laMetrics)
+ UpdateNumTelegrafMetricsSentTelemetry(numMetrics, 0, 0)
+ Log("PostTelegrafMetricsToLA::Info:Successfully flushed %v records in %v", numMetrics, elapsed)
+ }
return output.FLB_OK
}
@@ -986,13 +1183,9 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
numContainerLogRecords := 0
if len(msgPackEntries) > 0 && ContainerLogsRouteV2 == true {
- //flush to mdsd
- mdsdSourceName := MdsdContainerLogSourceName
- if (ContainerLogSchemaV2 == true) {
- mdsdSourceName = MdsdContainerLogV2SourceName
- }
+ //flush to mdsd
fluentForward := MsgPackForward{
- Tag: mdsdSourceName,
+ Tag: MdsdContainerLogTagName,
Entries: msgPackEntries,
}
@@ -1019,7 +1212,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
if MdsdMsgpUnixSocketClient == nil {
Log("Error::mdsd::mdsd connection does not exist. re-connecting ...")
- CreateMDSDClient()
+ CreateMDSDClient(ContainerLogV2, ContainerType)
if MdsdMsgpUnixSocketClient == nil {
Log("Error::mdsd::Unable to create mdsd client. Please check error log.")
@@ -1286,21 +1479,31 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
log.Fatalln(message)
}
- osType := os.Getenv("OS_TYPE")
+ ContainerType = os.Getenv(ContainerTypeEnv)
+ Log("Container Type %s", ContainerType)
+ osType := os.Getenv("OS_TYPE")
+ IsWindows = false
// Linux
if strings.Compare(strings.ToLower(osType), "windows") != 0 {
Log("Reading configuration for Linux from %s", pluginConfPath)
- omsadminConf, err := ReadConfiguration(pluginConfig["omsadmin_conf_path"])
- if err != nil {
- message := fmt.Sprintf("Error Reading omsadmin configuration %s\n", err.Error())
+ WorkspaceID = os.Getenv("WSID")
+ if WorkspaceID == "" {
+ message := fmt.Sprintf("WorkspaceID shouldnt be empty")
Log(message)
SendException(message)
time.Sleep(30 * time.Second)
log.Fatalln(message)
}
- OMSEndpoint = omsadminConf["OMS_ENDPOINT"]
- WorkspaceID = omsadminConf["WORKSPACE_ID"]
+ LogAnalyticsWorkspaceDomain = os.Getenv("DOMAIN")
+ if LogAnalyticsWorkspaceDomain == "" {
+ message := fmt.Sprintf("Workspace DOMAIN shouldnt be empty")
+ Log(message)
+ SendException(message)
+ time.Sleep(30 * time.Second)
+ log.Fatalln(message)
+ }
+ OMSEndpoint = "https://" + WorkspaceID + ".ods." + LogAnalyticsWorkspaceDomain + "/OperationalData.svc/PostJsonDataItems"
// Populate Computer field
containerHostName, err1 := ioutil.ReadFile(pluginConfig["container_host_file_path"])
if err1 != nil {
@@ -1329,6 +1532,7 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
}
} else {
// windows
+ IsWindows = true
Computer = os.Getenv("HOSTNAME")
WorkspaceID = os.Getenv("WSID")
logAnalyticsDomain := os.Getenv("DOMAIN")
@@ -1410,21 +1614,15 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
Log(message)
}
- PluginConfiguration = pluginConfig
-
- CreateHTTPClient()
+ PluginConfiguration = pluginConfig
- ContainerLogsRoute := strings.TrimSpace(strings.ToLower(os.Getenv("AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE")))
- Log("AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE:%s", ContainerLogsRoute)
+ ContainerLogsRoute := strings.TrimSpace(strings.ToLower(os.Getenv("AZMON_CONTAINER_LOGS_ROUTE")))
+ Log("AZMON_CONTAINER_LOGS_ROUTE:%s", ContainerLogsRoute)
- ContainerLogsRouteV2 = false //default is ODS
- ContainerLogsRouteADX = false //default is LA
+ ContainerLogsRouteV2 = false
+ ContainerLogsRouteADX = false
- if strings.Compare(ContainerLogsRoute, ContainerLogsV2Route) == 0 && strings.Compare(strings.ToLower(osType), "windows") != 0 {
- ContainerLogsRouteV2 = true
- Log("Routing container logs thru %s route...", ContainerLogsV2Route)
- fmt.Fprintf(os.Stdout, "Routing container logs thru %s route... \n", ContainerLogsV2Route)
- } else if strings.Compare(ContainerLogsRoute, ContainerLogsADXRoute) == 0 {
+ if strings.Compare(ContainerLogsRoute, ContainerLogsADXRoute) == 0 {
//check if adx clusteruri, clientid & secret are set
var err error
AdxClusterUri, err = ReadFileContents(PluginConfiguration["adx_cluster_uri_path"])
@@ -1455,14 +1653,30 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
Log("Routing container logs thru %s route...", ContainerLogsADXRoute)
fmt.Fprintf(os.Stdout, "Routing container logs thru %s route...\n", ContainerLogsADXRoute)
}
- }
+ } else if strings.Compare(strings.ToLower(osType), "windows") != 0 { //for linux, oneagent will be default route
+ ContainerLogsRouteV2 = true //default is mdsd route
+ if strings.Compare(ContainerLogsRoute, ContainerLogsV1Route) == 0 {
+ ContainerLogsRouteV2 = false //fallback option when hiddensetting set
+ }
+ Log("Routing container logs thru %s route...", ContainerLogsRoute)
+ fmt.Fprintf(os.Stdout, "Routing container logs thru %s route... \n", ContainerLogsRoute)
+ }
if ContainerLogsRouteV2 == true {
- CreateMDSDClient()
+ CreateMDSDClient(ContainerLogV2, ContainerType)
} else if ContainerLogsRouteADX == true {
CreateADXClient()
+ } else { // v1 or windows
+ Log("Creating HTTP Client since either OS Platform is Windows or configmap configured with fallback option for ODS direct")
+ CreateHTTPClient()
}
+ if IsWindows == false { // mdsd linux specific
+ Log("Creating MDSD clients for KubeMonAgentEvents & InsightsMetrics")
+ CreateMDSDClient(KubeMonAgentEvents, ContainerType)
+ CreateMDSDClient(InsightsMetrics, ContainerType)
+ }
+
ContainerLogSchemaVersion := strings.TrimSpace(strings.ToLower(os.Getenv("AZMON_CONTAINER_LOG_SCHEMA_VERSION")))
Log("AZMON_CONTAINER_LOG_SCHEMA_VERSION:%s", ContainerLogSchemaVersion)
@@ -1491,4 +1705,12 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
Log("Running in replicaset. Disabling container enrichment caching & updates \n")
}
+ if ContainerLogSchemaV2 == true {
+ MdsdContainerLogTagName = MdsdContainerLogV2SourceName
+ } else {
+ MdsdContainerLogTagName = MdsdContainerLogSourceName
+ }
+
+ MdsdInsightsMetricsTagName = MdsdInsightsMetricsSourceName
+ MdsdKubeMonAgentEventsTagName = MdsdKubeMonAgentEventsSourceName
}
\ No newline at end of file
diff --git a/source/plugins/go/src/telemetry.go b/source/plugins/go/src/telemetry.go
index 461fdea96..4750b4624 100644
--- a/source/plugins/go/src/telemetry.go
+++ b/source/plugins/go/src/telemetry.go
@@ -42,6 +42,10 @@ var (
ContainerLogsSendErrorsToMDSDFromFluent float64
//Tracks the number of mdsd client create errors for containerlogs (uses ContainerLogTelemetryTicker)
ContainerLogsMDSDClientCreateErrors float64
+ //Tracks the number of mdsd client create errors for insightsmetrics (uses ContainerLogTelemetryTicker)
+ InsightsMetricsMDSDClientCreateErrors float64
+ //Tracks the number of mdsd client create errors for kubemonevents (uses ContainerLogTelemetryTicker)
+ KubeMonEventsMDSDClientCreateErrors float64
//Tracks the number of write/send errors to ADX for containerlogs (uses ContainerLogTelemetryTicker)
ContainerLogsSendErrorsToADXFromFluent float64
//Tracks the number of ADX client create errors for containerlogs (uses ContainerLogTelemetryTicker)
@@ -74,6 +78,8 @@ const (
metricNameNumberofSend429ErrorsTelegrafMetrics = "TelegrafMetricsSend429ErrorCount"
metricNameErrorCountContainerLogsSendErrorsToMDSDFromFluent = "ContainerLogs2MdsdSendErrorCount"
metricNameErrorCountContainerLogsMDSDClientCreateError = "ContainerLogsMdsdClientCreateErrorCount"
+ metricNameErrorCountInsightsMetricsMDSDClientCreateError = "InsightsMetricsMDSDClientCreateErrorsCount"
+ metricNameErrorCountKubeMonEventsMDSDClientCreateError = "KubeMonEventsMDSDClientCreateErrorsCount"
metricNameErrorCountContainerLogsSendErrorsToADXFromFluent = "ContainerLogs2ADXSendErrorCount"
metricNameErrorCountContainerLogsADXClientCreateError = "ContainerLogsADXClientCreateErrorCount"
@@ -112,6 +118,8 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) {
containerLogsMDSDClientCreateErrors := ContainerLogsMDSDClientCreateErrors
containerLogsSendErrorsToADXFromFluent := ContainerLogsSendErrorsToADXFromFluent
containerLogsADXClientCreateErrors := ContainerLogsADXClientCreateErrors
+ insightsMetricsMDSDClientCreateErrors := InsightsMetricsMDSDClientCreateErrors
+ kubeMonEventsMDSDClientCreateErrors := KubeMonEventsMDSDClientCreateErrors
osmNamespaceCount := OSMNamespaceCount
promMonitorPods := PromMonitorPods
promMonitorPodsNamespaceLength := PromMonitorPodsNamespaceLength
@@ -132,6 +140,8 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) {
ContainerLogsMDSDClientCreateErrors = 0.0
ContainerLogsSendErrorsToADXFromFluent = 0.0
ContainerLogsADXClientCreateErrors = 0.0
+ InsightsMetricsMDSDClientCreateErrors = 0.0
+ KubeMonEventsMDSDClientCreateErrors = 0.0
ContainerLogTelemetryMutex.Unlock()
if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "daemonset") == 0 {
@@ -186,6 +196,13 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) {
if containerLogsADXClientCreateErrors > 0.0 {
TelemetryClient.Track(appinsights.NewMetricTelemetry(metricNameErrorCountContainerLogsADXClientCreateError, containerLogsADXClientCreateErrors))
}
+ if insightsMetricsMDSDClientCreateErrors > 0.0 {
+ TelemetryClient.Track(appinsights.NewMetricTelemetry(metricNameErrorCountInsightsMetricsMDSDClientCreateError, insightsMetricsMDSDClientCreateErrors))
+ }
+ if kubeMonEventsMDSDClientCreateErrors > 0.0 {
+ TelemetryClient.Track(appinsights.NewMetricTelemetry(metricNameErrorCountKubeMonEventsMDSDClientCreateError, kubeMonEventsMDSDClientCreateErrors))
+ }
+
start = time.Now()
}
}
diff --git a/source/plugins/go/src/utils.go b/source/plugins/go/src/utils.go
index 61d047e52..3fe5c6d0e 100644
--- a/source/plugins/go/src/utils.go
+++ b/source/plugins/go/src/utils.go
@@ -12,11 +12,12 @@ import (
"net/url"
"os"
"strings"
- "time"
-
+ "time"
+
"github.com/Azure/azure-kusto-go/kusto"
"github.com/Azure/azure-kusto-go/kusto/ingest"
"github.com/Azure/go-autorest/autorest/azure/auth"
+ "github.com/tinylib/msgp/msgp"
)
// ReadConfiguration reads a property file
@@ -62,7 +63,13 @@ func ReadConfiguration(filename string) (map[string]string, error) {
// CreateHTTPClient used to create the client for sending post requests to OMSEndpoint
func CreateHTTPClient() {
- cert, err := tls.LoadX509KeyPair(PluginConfiguration["cert_file_path"], PluginConfiguration["key_file_path"])
+ certFilePath := PluginConfiguration["cert_file_path"]
+ keyFilePath := PluginConfiguration["key_file_path"]
+ if IsWindows == false {
+ certFilePath = fmt.Sprintf(certFilePath, WorkspaceID)
+ keyFilePath = fmt.Sprintf(keyFilePath, WorkspaceID)
+ }
+ cert, err := tls.LoadX509KeyPair(certFilePath, keyFilePath)
if err != nil {
message := fmt.Sprintf("Error when loading cert %s", err.Error())
SendException(message)
@@ -93,7 +100,7 @@ func CreateHTTPClient() {
HTTPClient = http.Client{
Transport: transport,
Timeout: 30 * time.Second,
- }
+ }
Log("Successfully created HTTP Client")
}
@@ -110,23 +117,58 @@ func ToString(s interface{}) string {
}
//mdsdSocketClient to write msgp messages
-func CreateMDSDClient() {
- if MdsdMsgpUnixSocketClient != nil {
- MdsdMsgpUnixSocketClient.Close()
- MdsdMsgpUnixSocketClient = nil
- }
- /*conn, err := fluent.New(fluent.Config{FluentNetwork:"unix",
- FluentSocketPath:"/var/run/mdsd/default_fluent.socket",
- WriteTimeout: 5 * time.Second,
- RequestAck: true}) */
- conn, err := net.DialTimeout("unix",
- "/var/run/mdsd/default_fluent.socket", 10*time.Second)
- if err != nil {
- Log("Error::mdsd::Unable to open MDSD msgp socket connection %s", err.Error())
- //log.Fatalf("Unable to open MDSD msgp socket connection %s", err.Error())
- } else {
- Log("Successfully created MDSD msgp socket connection")
- MdsdMsgpUnixSocketClient = conn
+func CreateMDSDClient(dataType DataType, containerType string) {
+ mdsdfluentSocket := "/var/run/mdsd/default_fluent.socket"
+ if containerType != "" && strings.Compare(strings.ToLower(containerType), "prometheussidecar") == 0 {
+ mdsdfluentSocket = fmt.Sprintf("/var/run/mdsd-%s/default_fluent.socket", containerType)
+ }
+ switch dataType {
+ case ContainerLogV2:
+ if MdsdMsgpUnixSocketClient != nil {
+ MdsdMsgpUnixSocketClient.Close()
+ MdsdMsgpUnixSocketClient = nil
+ }
+ /*conn, err := fluent.New(fluent.Config{FluentNetwork:"unix",
+ FluentSocketPath:"/var/run/mdsd/default_fluent.socket",
+ WriteTimeout: 5 * time.Second,
+ RequestAck: true}) */
+ conn, err := net.DialTimeout("unix",
+ mdsdfluentSocket, 10*time.Second)
+ if err != nil {
+ Log("Error::mdsd::Unable to open MDSD msgp socket connection for ContainerLogV2 %s", err.Error())
+ //log.Fatalf("Unable to open MDSD msgp socket connection %s", err.Error())
+ } else {
+ Log("Successfully created MDSD msgp socket connection for ContainerLogV2: %s", mdsdfluentSocket)
+ MdsdMsgpUnixSocketClient = conn
+ }
+ case KubeMonAgentEvents:
+ if MdsdKubeMonMsgpUnixSocketClient != nil {
+ MdsdKubeMonMsgpUnixSocketClient.Close()
+ MdsdKubeMonMsgpUnixSocketClient = nil
+ }
+ conn, err := net.DialTimeout("unix",
+ mdsdfluentSocket, 10*time.Second)
+ if err != nil {
+ Log("Error::mdsd::Unable to open MDSD msgp socket connection for KubeMon events %s", err.Error())
+ //log.Fatalf("Unable to open MDSD msgp socket connection %s", err.Error())
+ } else {
+ Log("Successfully created MDSD msgp socket connection for KubeMon events:%s", mdsdfluentSocket)
+ MdsdKubeMonMsgpUnixSocketClient = conn
+ }
+ case InsightsMetrics:
+ if MdsdInsightsMetricsMsgpUnixSocketClient != nil {
+ MdsdInsightsMetricsMsgpUnixSocketClient.Close()
+ MdsdInsightsMetricsMsgpUnixSocketClient = nil
+ }
+ conn, err := net.DialTimeout("unix",
+ mdsdfluentSocket, 10*time.Second)
+ if err != nil {
+ Log("Error::mdsd::Unable to open MDSD msgp socket connection for insights metrics %s", err.Error())
+ //log.Fatalf("Unable to open MDSD msgp socket connection %s", err.Error())
+ } else {
+ Log("Successfully created MDSD msgp socket connection for Insights metrics %s", mdsdfluentSocket)
+ MdsdInsightsMetricsMsgpUnixSocketClient = conn
+ }
}
}
@@ -178,3 +220,33 @@ func isValidUrl(uri string) bool {
}
return true
}
+
+func convertMsgPackEntriesToMsgpBytes(fluentForwardTag string, msgPackEntries []MsgPackEntry) []byte {
+ var msgpBytes []byte
+
+ fluentForward := MsgPackForward{
+ Tag: fluentForwardTag,
+ Entries: msgPackEntries,
+ }
+ //determine the size of msgp message
+ msgpSize := 1 + msgp.StringPrefixSize + len(fluentForward.Tag) + msgp.ArrayHeaderSize
+ for i := range fluentForward.Entries {
+ msgpSize += 1 + msgp.Int64Size + msgp.GuessSize(fluentForward.Entries[i].Record)
+ }
+
+ //allocate buffer for msgp message
+ msgpBytes = msgp.Require(nil, msgpSize)
+
+ //construct the stream
+ msgpBytes = append(msgpBytes, 0x92)
+ msgpBytes = msgp.AppendString(msgpBytes, fluentForward.Tag)
+ msgpBytes = msgp.AppendArrayHeader(msgpBytes, uint32(len(fluentForward.Entries)))
+ batchTime := time.Now().Unix()
+ for entry := range fluentForward.Entries {
+ msgpBytes = append(msgpBytes, 0x92)
+ msgpBytes = msgp.AppendInt64(msgpBytes, batchTime)
+ msgpBytes = msgp.AppendMapStrStr(msgpBytes, fluentForward.Entries[entry].Record)
+ }
+
+ return msgpBytes
+}
diff --git a/source/plugins/ruby/ApplicationInsightsUtility.rb b/source/plugins/ruby/ApplicationInsightsUtility.rb
index b118cc646..6ae567337 100644
--- a/source/plugins/ruby/ApplicationInsightsUtility.rb
+++ b/source/plugins/ruby/ApplicationInsightsUtility.rb
@@ -14,7 +14,6 @@ class ApplicationInsightsUtility
@@Exception = "ExceptionEvent"
@@AcsClusterType = "ACS"
@@AksClusterType = "AKS"
- @OmsAdminFilePath = "/etc/opt/microsoft/omsagent/conf/omsadmin.conf"
@@EnvAcsResourceName = "ACS_RESOURCE_NAME"
@@EnvAksRegion = "AKS_REGION"
@@EnvAgentVersion = "AGENT_VERSION"
@@ -263,14 +262,11 @@ def sendMetricTelemetry(metricName, metricValue, properties)
end
def getWorkspaceId()
- begin
- adminConf = {}
- confFile = File.open(@OmsAdminFilePath, "r")
- confFile.each_line do |line|
- splitStrings = line.split("=")
- adminConf[splitStrings[0]] = splitStrings[1]
+ begin
+ workspaceId = ENV["WSID"]
+ if workspaceId.nil? || workspaceId.empty?
+ $log.warn("Exception in AppInsightsUtility: getWorkspaceId - WorkspaceID either nil or empty")
end
- workspaceId = adminConf["WORKSPACE_ID"]
return workspaceId
rescue => errorStr
$log.warn("Exception in AppInsightsUtility: getWorkspaceId - error: #{errorStr}")
@@ -278,14 +274,8 @@ def getWorkspaceId()
end
def getWorkspaceCloud()
- begin
- adminConf = {}
- confFile = File.open(@OmsAdminFilePath, "r")
- confFile.each_line do |line|
- splitStrings = line.split("=")
- adminConf[splitStrings[0]] = splitStrings[1]
- end
- workspaceDomain = adminConf["URL_TLD"].strip
+ begin
+ workspaceDomain = ENV["DOMAIN"]
workspaceCloud = "AzureCloud"
if workspaceDomain.casecmp("opinsights.azure.com") == 0
workspaceCloud = "AzureCloud"
diff --git a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb
index 8cb6f603e..f02459aef 100644
--- a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb
+++ b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb
@@ -203,23 +203,25 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
containerName = container["name"]
metricValue = container["cpu"][cpuMetricNameToCollect]
metricTime = metricPollTime #container["cpu"]["time"]
- metricItem = {}
- metricItem["DataItems"] = []
-
- metricProps = {}
- metricProps["Timestamp"] = metricTime
- metricProps["Host"] = hostName
- metricProps["ObjectName"] = Constants::OBJECT_NAME_K8S_CONTAINER
- metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
-
- metricProps["Collections"] = []
- metricCollections = {}
- metricCollections["CounterName"] = metricNametoReturn
- metricCollections["Value"] = metricValue
+
- metricProps["Collections"].push(metricCollections)
- metricItem["DataItems"].push(metricProps)
- metricItems.push(metricItem)
+ metricItem = {}
+ metricItem["Timestamp"] = metricTime
+ metricItem["Host"] = hostName
+ metricItem["ObjectName"] = Constants::OBJECT_NAME_K8S_CONTAINER
+ metricItem["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
+
+
+ metricCollection = {}
+ metricCollection["CounterName"] = metricNametoReturn
+ metricCollection["Value"] = metricValue
+
+ metricItem["json_Collections"] = []
+ metricCollections = []
+ metricCollections.push(metricCollection)
+ metricItem["json_Collections"] = metricCollections.to_json
+ metricItems.push(metricItem)
+
#Telemetry about agent performance
begin
# we can only do this much now. Ideally would like to use the docker image repository to find our pods/containers
@@ -250,11 +252,8 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
telemetryProps["dsPromUrl"] = @dsPromUrlCount
end
#telemetry about containerlog Routing for daemonset
- if File.exist?(Constants::AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2_FILENAME)
- telemetryProps["containerLogsRoute"] = "v2"
- elsif (!@containerLogsRoute.nil? && !@containerLogsRoute.empty?)
- telemetryProps["containerLogsRoute"] = @containerLogsRoute
- end
+ telemetryProps["containerLogsRoute"] = @containerLogsRoute
+
#telemetry about health model
if (!@hmEnabled.nil? && !@hmEnabled.empty?)
telemetryProps["hmEnabled"] = @hmEnabled
@@ -503,18 +502,16 @@ def getContainerCpuMetricItemRate(metricJSON, hostName, cpuMetricNameToCollect,
containerName = container["name"]
metricValue = container["cpu"][cpuMetricNameToCollect]
metricTime = metricPollTime #container["cpu"]["time"]
+
metricItem = {}
- metricItem["DataItems"] = []
-
- metricProps = {}
- metricProps["Timestamp"] = metricTime
- metricProps["Host"] = hostName
- metricProps["ObjectName"] = Constants::OBJECT_NAME_K8S_CONTAINER
- metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
-
- metricProps["Collections"] = []
- metricCollections = {}
- metricCollections["CounterName"] = metricNametoReturn
+ metricItem["Timestamp"] = metricTime
+ metricItem["Host"] = hostName
+ metricItem["ObjectName"] = Constants::OBJECT_NAME_K8S_CONTAINER
+ metricItem["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
+
+ metricItem["json_Collections"] = []
+ metricCollection = {}
+ metricCollection["CounterName"] = metricNametoReturn
containerId = podUid + "/" + containerName
# Adding the containers to the winContainerIdCache so that it can be used by the cleanup routine
@@ -545,9 +542,11 @@ def getContainerCpuMetricItemRate(metricJSON, hostName, cpuMetricNameToCollect,
@@winContainerPrevMetricRate[containerId] = metricRateValue
end
- metricCollections["Value"] = metricValue
- metricProps["Collections"].push(metricCollections)
- metricItem["DataItems"].push(metricProps)
+ metricCollection["Value"] = metricValue
+
+ metricCollections = []
+ metricCollections.push(metricCollection)
+ metricItem["json_Collections"] = metricCollections.to_json
metricItems.push(metricItem)
#Telemetry about agent performance
begin
@@ -629,22 +628,21 @@ def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollec
metricTime = metricPollTime #container["memory"]["time"]
metricItem = {}
- metricItem["DataItems"] = []
-
- metricProps = {}
- metricProps["Timestamp"] = metricTime
- metricProps["Host"] = hostName
- metricProps["ObjectName"] = Constants::OBJECT_NAME_K8S_CONTAINER
- metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
+ metricItem["Timestamp"] = metricTime
+ metricItem["Host"] = hostName
+ metricItem["ObjectName"] = Constants::OBJECT_NAME_K8S_CONTAINER
+ metricItem["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
+
+ metricCollection = {}
+ metricCollection["CounterName"] = metricNametoReturn
+ metricCollection["Value"] = metricValue
+
+ metricItem["json_Collections"] = []
+ metricCollections = []
+ metricCollections.push(metricCollection)
+ metricItem["json_Collections"] = metricCollections.to_json
+ metricItems.push(metricItem)
- metricProps["Collections"] = []
- metricCollections = {}
- metricCollections["CounterName"] = metricNametoReturn
- metricCollections["Value"] = metricValue
-
- metricProps["Collections"].push(metricCollections)
- metricItem["DataItems"].push(metricProps)
- metricItems.push(metricItem)
#Telemetry about agent performance
begin
# we can only do this much now. Ideally would like to use the docker image repository to find our pods/containers
@@ -687,22 +685,21 @@ def getNodeMetricItem(metricJSON, hostName, metricCategory, metricNameToCollect,
if !node[metricCategory].nil?
metricValue = node[metricCategory][metricNameToCollect]
metricTime = metricPollTime #node[metricCategory]["time"]
-
- metricItem["DataItems"] = []
-
- metricProps = {}
- metricProps["Timestamp"] = metricTime
- metricProps["Host"] = hostName
- metricProps["ObjectName"] = Constants::OBJECT_NAME_K8S_NODE
- metricProps["InstanceName"] = clusterId + "/" + nodeName
-
- metricProps["Collections"] = []
- metricCollections = {}
- metricCollections["CounterName"] = metricNametoReturn
- metricCollections["Value"] = metricValue
-
- metricProps["Collections"].push(metricCollections)
- metricItem["DataItems"].push(metricProps)
+
+ metricItem["Timestamp"] = metricTime
+ metricItem["Host"] = hostName
+ metricItem["ObjectName"] = Constants::OBJECT_NAME_K8S_NODE
+ metricItem["InstanceName"] = clusterId + "/" + nodeName
+
+
+ metricCollection = {}
+ metricCollection["CounterName"] = metricNametoReturn
+ metricCollection["Value"] = metricValue
+
+ metricItem["json_Collections"] = []
+ metricCollections = []
+ metricCollections.push(metricCollection)
+ metricItem["json_Collections"] = metricCollections.to_json
end
rescue => error
@Log.warn("getNodeMetricItem failed: #{error} for metric #{metricNameToCollect}")
@@ -805,21 +802,20 @@ def getNodeMetricItemRate(metricJSON, hostName, metricCategory, metricNameToColl
end
end
end
- metricItem["DataItems"] = []
-
- metricProps = {}
- metricProps["Timestamp"] = metricTime
- metricProps["Host"] = hostName
- metricProps["ObjectName"] = Constants::OBJECT_NAME_K8S_NODE
- metricProps["InstanceName"] = clusterId + "/" + nodeName
-
- metricProps["Collections"] = []
- metricCollections = {}
- metricCollections["CounterName"] = metricNametoReturn
- metricCollections["Value"] = metricValue
-
- metricProps["Collections"].push(metricCollections)
- metricItem["DataItems"].push(metricProps)
+
+ metricItem["Timestamp"] = metricTime
+ metricItem["Host"] = hostName
+ metricItem["ObjectName"] = Constants::OBJECT_NAME_K8S_NODE
+ metricItem["InstanceName"] = clusterId + "/" + nodeName
+
+ metricCollection = {}
+ metricCollection["CounterName"] = metricNametoReturn
+ metricCollection["Value"] = metricValue
+
+ metricItem["json_Collections"] = []
+ metricCollections = []
+ metricCollections.push(metricCollection)
+ metricItem["json_Collections"] = metricCollections.to_json
end
rescue => error
@Log.warn("getNodeMetricItemRate failed: #{error} for metric #{metricNameToCollect}")
@@ -841,22 +837,22 @@ def getNodeLastRebootTimeMetric(metricJSON, hostName, metricNametoReturn, metric
metricValue = node["startTime"]
metricTime = metricPollTime #Time.now.utc.iso8601 #2018-01-30T19:36:14Z
- metricItem["DataItems"] = []
-
- metricProps = {}
- metricProps["Timestamp"] = metricTime
- metricProps["Host"] = hostName
- metricProps["ObjectName"] = Constants::OBJECT_NAME_K8S_NODE
- metricProps["InstanceName"] = clusterId + "/" + nodeName
+
+ metricItem["Timestamp"] = metricTime
+ metricItem["Host"] = hostName
+ metricItem["ObjectName"] = Constants::OBJECT_NAME_K8S_NODE
+ metricItem["InstanceName"] = clusterId + "/" + nodeName
- metricProps["Collections"] = []
- metricCollections = {}
- metricCollections["CounterName"] = metricNametoReturn
+
+ metricCollection = {}
+ metricCollection["CounterName"] = metricNametoReturn
#Read it from /proc/uptime
- metricCollections["Value"] = DateTime.parse(metricTime).to_time.to_i - IO.read("/proc/uptime").split[0].to_f
+ metricCollection["Value"] = DateTime.parse(metricTime).to_time.to_i - IO.read("/proc/uptime").split[0].to_f
- metricProps["Collections"].push(metricCollections)
- metricItem["DataItems"].push(metricProps)
+ metricItem["json_Collections"] = []
+ metricCollections = []
+ metricCollections.push(metricCollection)
+ metricItem["json_Collections"] = metricCollections.to_json
rescue => error
@Log.warn("getNodeLastRebootTimeMetric failed: #{error} ")
@Log.warn metricJSON
@@ -880,21 +876,19 @@ def getContainerStartTimeMetricItems(metricJSON, hostName, metricNametoReturn, m
metricTime = metricPollTime #currentTime
metricItem = {}
- metricItem["DataItems"] = []
-
- metricProps = {}
- metricProps["Timestamp"] = metricTime
- metricProps["Host"] = hostName
- metricProps["ObjectName"] = Constants::OBJECT_NAME_K8S_CONTAINER
- metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
-
- metricProps["Collections"] = []
- metricCollections = {}
- metricCollections["CounterName"] = metricNametoReturn
- metricCollections["Value"] = DateTime.parse(metricValue).to_time.to_i
-
- metricProps["Collections"].push(metricCollections)
- metricItem["DataItems"].push(metricProps)
+ metricItem["Timestamp"] = metricTime
+ metricItem["Host"] = hostName
+ metricItem["ObjectName"] = Constants::OBJECT_NAME_K8S_CONTAINER
+ metricItem["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
+
+ metricCollection = {}
+ metricCollection["CounterName"] = metricNametoReturn
+ metricCollection["Value"] = DateTime.parse(metricValue).to_time.to_i
+
+ metricItem["json_Collections"] = []
+ metricCollections = []
+ metricCollections.push(metricCollection)
+ metricItem["json_Collections"] = metricCollections.to_json
metricItems.push(metricItem)
end
end
diff --git a/source/plugins/ruby/DockerApiClient.rb b/source/plugins/ruby/DockerApiClient.rb
index f2828b357..53dd1f39f 100644
--- a/source/plugins/ruby/DockerApiClient.rb
+++ b/source/plugins/ruby/DockerApiClient.rb
@@ -29,7 +29,7 @@ def getResponse(request, isMultiJson, isVersion)
loop do
begin
responseChunk = ""
- timeout(@@TimeoutInSeconds) do
+ Timeout.timeout(@@TimeoutInSeconds) do
responseChunk = socket.recv(@@ChunkSize)
end
dockerResponse += responseChunk
diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb
index 98347d272..3720bf6dc 100644
--- a/source/plugins/ruby/KubernetesApiClient.rb
+++ b/source/plugins/ruby/KubernetesApiClient.rb
@@ -405,12 +405,9 @@ def getPodUid(podNameSpace, podMetadata)
def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
metricItems = []
- timeDifference = (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs
- timeDifferenceInMinutes = timeDifference / 60
begin
clusterId = getClusterId
podNameSpace = pod["metadata"]["namespace"]
- podName = pod["metadata"]["name"]
podUid = getPodUid(podNameSpace, pod["metadata"])
if podUid.nil?
return metricItems
@@ -442,9 +439,6 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle
if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?)
metricValue = getMetricNumericValue(metricNameToCollect, container["resources"][metricCategory][metricNameToCollect])
- metricItem = {}
- metricItem["DataItems"] = []
-
metricProps = {}
metricProps["Timestamp"] = metricTime
metricProps["Host"] = nodeName
@@ -453,50 +447,22 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle
metricProps["ObjectName"] = "K8SContainer"
metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
- metricProps["Collections"] = []
- metricCollections = {}
- metricCollections["CounterName"] = metricNametoReturn
- metricCollections["Value"] = metricValue
-
- metricProps["Collections"].push(metricCollections)
- metricItem["DataItems"].push(metricProps)
- metricItems.push(metricItem)
- #Telemetry about omsagent requests and limits
- begin
- if (podName.downcase.start_with?("omsagent-") && podNameSpace.eql?("kube-system") && containerName.downcase.start_with?("omsagent"))
- nodePodContainerKey = [nodeName, podName, containerName, metricNametoReturn].join("~~")
- @@resourceLimitsTelemetryHash[nodePodContainerKey] = metricValue
- end
- if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES)
- @@resourceLimitsTelemetryHash.each { |key, value|
- keyElements = key.split("~~")
- if keyElements.length != 4
- next
- end
-
- # get dimension values by key
- telemetryProps = {}
- telemetryProps["Computer"] = keyElements[0]
- telemetryProps["PodName"] = keyElements[1]
- telemetryProps["ContainerName"] = keyElements[2]
- metricNameFromKey = keyElements[3]
- ApplicationInsightsUtility.sendMetricTelemetry(metricNameFromKey, value, telemetryProps)
- }
- @@telemetryTimeTracker = DateTime.now.to_time.to_i
- @@resourceLimitsTelemetryHash = {}
- end
- rescue => errorStr
- $log.warn("Exception while generating Telemetry from getContainerResourceRequestsAndLimits failed: #{errorStr} for metric #{metricNameToCollect}")
- end
+ metricCollection = {}
+ metricCollection["CounterName"] = metricNametoReturn
+ metricCollection["Value"] = metricValue
+
+ metricProps["json_Collections"] = []
+ metricCollections = []
+ metricCollections.push(metricCollection)
+ metricProps["json_Collections"] = metricCollections.to_json
+ metricItems.push(metricProps)
#No container level limit for the given metric, so default to node level limit
else
nodeMetricsHashKey = clusterId + "/" + nodeName + "_" + "allocatable" + "_" + metricNameToCollect
if (metricCategory == "limits" && @@NodeMetrics.has_key?(nodeMetricsHashKey))
metricValue = @@NodeMetrics[nodeMetricsHashKey]
#@Log.info("Limits not set for container #{clusterId + "/" + podUid + "/" + containerName} using node level limits: #{nodeMetricsHashKey}=#{metricValue} ")
- metricItem = {}
- metricItem["DataItems"] = []
-
+
metricProps = {}
metricProps["Timestamp"] = metricTime
metricProps["Host"] = nodeName
@@ -505,14 +471,14 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle
metricProps["ObjectName"] = "K8SContainer"
metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
- metricProps["Collections"] = []
- metricCollections = {}
- metricCollections["CounterName"] = metricNametoReturn
- metricCollections["Value"] = metricValue
-
- metricProps["Collections"].push(metricCollections)
- metricItem["DataItems"].push(metricProps)
- metricItems.push(metricItem)
+ metricCollection = {}
+ metricCollection["CounterName"] = metricNametoReturn
+ metricCollection["Value"] = metricValue
+ metricProps["json_Collections"] = []
+ metricCollections = []
+ metricCollections.push(metricCollection)
+ metricProps["json_Collections"] = metricCollections.to_json
+ metricItems.push(metricProps)
end
end
end
@@ -632,22 +598,22 @@ def parseNodeLimitsFromNodeItem(node, metricCategory, metricNameToCollect, metri
# metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory"
metricValue = getMetricNumericValue(metricNameToCollect, node["status"][metricCategory][metricNameToCollect])
- metricItem["DataItems"] = []
- metricProps = {}
- metricProps["Timestamp"] = metricTime
- metricProps["Host"] = node["metadata"]["name"]
+ metricItem["Timestamp"] = metricTime
+ metricItem["Host"] = node["metadata"]["name"]
# Adding this so that it is not set by base omsagent since it was not set earlier and being set by base omsagent
- metricProps["Computer"] = node["metadata"]["name"]
- metricProps["ObjectName"] = "K8SNode"
- metricProps["InstanceName"] = clusterId + "/" + node["metadata"]["name"]
- metricProps["Collections"] = []
- metricCollections = {}
- metricCollections["CounterName"] = metricNametoReturn
- metricCollections["Value"] = metricValue
-
- metricProps["Collections"].push(metricCollections)
- metricItem["DataItems"].push(metricProps)
-
+ metricItem["Computer"] = node["metadata"]["name"]
+ metricItem["ObjectName"] = "K8SNode"
+ metricItem["InstanceName"] = clusterId + "/" + node["metadata"]["name"]
+
+ metricCollection = {}
+ metricCollection["CounterName"] = metricNametoReturn
+ metricCollection["Value"] = metricValue
+ metricCollections = []
+ metricCollections.push(metricCollection)
+
+ metricItem["json_Collections"] = []
+ metricItem["json_Collections"] = metricCollections.to_json
+
#push node level metrics to a inmem hash so that we can use it looking up at container level.
#Currently if container level cpu & memory limits are not defined we default to node level limits
@@NodeMetrics[clusterId + "/" + node["metadata"]["name"] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue
diff --git a/source/plugins/ruby/MdmMetricsGenerator.rb b/source/plugins/ruby/MdmMetricsGenerator.rb
index 6641456af..a809087dc 100644
--- a/source/plugins/ruby/MdmMetricsGenerator.rb
+++ b/source/plugins/ruby/MdmMetricsGenerator.rb
@@ -525,11 +525,11 @@ def getNodeResourceMetricRecords(record, metric_name, metric_value, percentage_m
records = []
begin
custommetricrecord = MdmAlertTemplates::Node_resource_metrics_template % {
- timestamp: record["DataItems"][0]["Timestamp"],
+ timestamp: record["Timestamp"],
metricName: metric_name,
- hostvalue: record["DataItems"][0]["Host"],
- objectnamevalue: record["DataItems"][0]["ObjectName"],
- instancenamevalue: record["DataItems"][0]["InstanceName"],
+ hostvalue: record["Host"],
+ objectnamevalue: record["ObjectName"],
+ instancenamevalue: record["InstanceName"],
metricminvalue: metric_value,
metricmaxvalue: metric_value,
metricsumvalue: metric_value,
@@ -538,11 +538,11 @@ def getNodeResourceMetricRecords(record, metric_name, metric_value, percentage_m
if !percentage_metric_value.nil?
additional_record = MdmAlertTemplates::Node_resource_metrics_template % {
- timestamp: record["DataItems"][0]["Timestamp"],
+ timestamp: record["Timestamp"],
metricName: @@node_metric_name_metric_percentage_name_hash[metric_name],
- hostvalue: record["DataItems"][0]["Host"],
- objectnamevalue: record["DataItems"][0]["ObjectName"],
- instancenamevalue: record["DataItems"][0]["InstanceName"],
+ hostvalue: record["Host"],
+ objectnamevalue: record["ObjectName"],
+ instancenamevalue: record["InstanceName"],
metricminvalue: percentage_metric_value,
metricmaxvalue: percentage_metric_value,
metricsumvalue: percentage_metric_value,
diff --git a/source/plugins/ruby/constants.rb b/source/plugins/ruby/constants.rb
index 906019b95..c037c99f6 100644
--- a/source/plugins/ruby/constants.rb
+++ b/source/plugins/ruby/constants.rb
@@ -103,5 +103,5 @@ class Constants
#Pod Statuses
POD_STATUS_TERMINATING = "Terminating"
- AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2_FILENAME = "/opt/AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2"
+
end
diff --git a/source/plugins/ruby/filter_cadvisor2mdm.rb b/source/plugins/ruby/filter_cadvisor2mdm.rb
index 659e3000c..62dcf31dc 100644
--- a/source/plugins/ruby/filter_cadvisor2mdm.rb
+++ b/source/plugins/ruby/filter_cadvisor2mdm.rb
@@ -2,7 +2,9 @@
# frozen_string_literal: true
-module Fluent
+require 'fluent/plugin/filter'
+
+module Fluent::Plugin
require "logger"
require "yajl/json_gem"
require_relative "oms_common"
@@ -12,7 +14,7 @@ module Fluent
require_relative "in_kube_nodes"
class CAdvisor2MdmFilter < Filter
- Fluent::Plugin.register_filter("filter_cadvisor2mdm", self)
+ Fluent::Plugin.register_filter("cadvisor2mdm", self)
config_param :enable_log, :integer, :default => 0
config_param :log_path, :string, :default => "/var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log"
@@ -65,7 +67,7 @@ def start
@containerResourceDimensionHash = {}
@pvUsageHash = {}
@@metric_threshold_hash = MdmMetricsGenerator.getContainerResourceUtilizationThresholds
- @NodeCache = Fluent::NodeStatsCache.new()
+ @NodeCache = Fluent::Plugin::NodeStatsCache.new()
end
rescue => e
@log.info "Error initializing plugin #{e}"
@@ -148,16 +150,16 @@ def filter(tag, time, record)
begin
if @process_incoming_stream
- # Check if insights metrics for PV metrics
- data_type = record["DataType"]
- if data_type == "INSIGHTS_METRICS_BLOB"
+ # Check if insights metrics for PV metrics
+ if record["Name"] == Constants::PV_USED_BYTES
return filterPVInsightsMetrics(record)
end
- object_name = record["DataItems"][0]["ObjectName"]
- counter_name = record["DataItems"][0]["Collections"][0]["CounterName"]
+ object_name = record["ObjectName"]
+ counter_name = JSON.parse(record["json_Collections"])[0]["CounterName"]
+
percentage_metric_value = 0.0
- metric_value = record["DataItems"][0]["Collections"][0]["Value"]
+ metric_value = JSON.parse(record["json_Collections"])[0]["Value"]
if object_name == Constants::OBJECT_NAME_K8S_NODE && @metrics_to_collect_hash.key?(counter_name.downcase)
# Compute and send % CPU and Memory
@@ -165,7 +167,7 @@ def filter(tag, time, record)
metric_name = Constants::CPU_USAGE_MILLI_CORES
metric_value /= 1000000 #cadvisor record is in nanocores. Convert to mc
if @@controller_type.downcase == "replicaset"
- target_node_cpu_capacity_mc = @NodeCache.cpu.get_capacity(record["DataItems"][0]["Host"]) / 1000000
+ target_node_cpu_capacity_mc = @NodeCache.cpu.get_capacity(record["Host"]) / 1000000
else
target_node_cpu_capacity_mc = @cpu_capacity
end
@@ -178,7 +180,7 @@ def filter(tag, time, record)
if counter_name.start_with?("memory")
metric_name = counter_name
if @@controller_type.downcase == "replicaset"
- target_node_mem_capacity = @NodeCache.mem.get_capacity(record["DataItems"][0]["Host"])
+ target_node_mem_capacity = @NodeCache.mem.get_capacity(record["Host"])
else
target_node_mem_capacity = @memory_capacity
end
@@ -187,12 +189,12 @@ def filter(tag, time, record)
percentage_metric_value = metric_value * 100 / target_node_mem_capacity
end
end
- @log.info "percentage_metric_value for metric: #{metric_name} for instance: #{record["DataItems"][0]["Host"]} percentage: #{percentage_metric_value}"
+ @log.info "percentage_metric_value for metric: #{metric_name} for instance: #{record["Host"]} percentage: #{percentage_metric_value}"
# do some sanity checking. Do we want this?
if percentage_metric_value > 100.0 or percentage_metric_value < 0.0
telemetryProperties = {}
- telemetryProperties["Computer"] = record["DataItems"][0]["Host"]
+ telemetryProperties["Computer"] = record["Host"]
telemetryProperties["MetricName"] = metric_name
telemetryProperties["MetricPercentageValue"] = percentage_metric_value
ApplicationInsightsUtility.sendCustomEvent("ErrorPercentageOutOfBounds", telemetryProperties)
@@ -200,7 +202,7 @@ def filter(tag, time, record)
return MdmMetricsGenerator.getNodeResourceMetricRecords(record, metric_name, metric_value, percentage_metric_value)
elsif object_name == Constants::OBJECT_NAME_K8S_CONTAINER && @metrics_to_collect_hash.key?(counter_name.downcase)
- instanceName = record["DataItems"][0]["InstanceName"]
+ instanceName = record["InstanceName"]
metricName = counter_name
# Using node cpu capacity in the absence of container cpu capacity since the container will end up using the
# node's capacity in this case. Converting this to nanocores for computation purposes, since this is in millicores
@@ -235,7 +237,7 @@ def filter(tag, time, record)
flushMetricTelemetry
if percentage_metric_value >= thresholdPercentage
setThresholdExceededTelemetry(metricName)
- return MdmMetricsGenerator.getContainerResourceUtilMetricRecords(record["DataItems"][0]["Timestamp"],
+ return MdmMetricsGenerator.getContainerResourceUtilMetricRecords(record["Timestamp"],
metricName,
percentage_metric_value,
@containerResourceDimensionHash[instanceName],
@@ -256,39 +258,36 @@ def filter(tag, time, record)
end
end
- def filterPVInsightsMetrics(record)
+ def filterPVInsightsMetrics(record)
begin
mdmMetrics = []
- record["DataItems"].each do |dataItem|
-
- if dataItem["Name"] == Constants::PV_USED_BYTES && @metrics_to_collect_hash.key?(dataItem["Name"].downcase)
- metricName = dataItem["Name"]
- usage = dataItem["Value"]
- capacity = dataItem["Tags"][Constants::INSIGHTSMETRICS_TAGS_PV_CAPACITY_BYTES]
- if capacity != 0
- percentage_metric_value = (usage * 100.0) / capacity
- end
- @log.info "percentage_metric_value for metric: #{metricName} percentage: #{percentage_metric_value}"
- @log.info "@@metric_threshold_hash for #{metricName}: #{@@metric_threshold_hash[metricName]}"
-
- computer = dataItem["Computer"]
- resourceDimensions = dataItem["Tags"]
- thresholdPercentage = @@metric_threshold_hash[metricName]
-
- flushMetricTelemetry
- if percentage_metric_value >= thresholdPercentage
- setThresholdExceededTelemetry(metricName)
- return MdmMetricsGenerator.getPVResourceUtilMetricRecords(dataItem["CollectionTime"],
- metricName,
- computer,
- percentage_metric_value,
- resourceDimensions,
- thresholdPercentage)
- else
- return []
- end # end if block for percentage metric > configured threshold % check
- end # end if block for dataItem name check
- end # end for block of looping through data items
+ if record["Name"] == Constants::PV_USED_BYTES && @metrics_to_collect_hash.key?(record["Name"].downcase)
+ metricName = record["Name"]
+ usage = record["Value"]
+ capacity = record["Tags"][Constants::INSIGHTSMETRICS_TAGS_PV_CAPACITY_BYTES]
+ if capacity != 0
+ percentage_metric_value = (usage * 100.0) / capacity
+ end
+ @log.info "percentage_metric_value for metric: #{metricName} percentage: #{percentage_metric_value}"
+ @log.info "@@metric_threshold_hash for #{metricName}: #{@@metric_threshold_hash[metricName]}"
+
+ computer = record["Computer"]
+ resourceDimensions = record["Tags"]
+ thresholdPercentage = @@metric_threshold_hash[metricName]
+
+ flushMetricTelemetry
+ if percentage_metric_value >= thresholdPercentage
+ setThresholdExceededTelemetry(metricName)
+ return MdmMetricsGenerator.getPVResourceUtilMetricRecords(record["CollectionTime"],
+ metricName,
+ computer,
+ percentage_metric_value,
+ resourceDimensions,
+ thresholdPercentage)
+ else
+ return []
+ end # end if block for percentage metric > configured threshold % check
+ end # end if block for dataItem name check
return []
rescue Exception => e
@log.info "Error processing cadvisor insights metrics record Exception: #{e.class} Message: #{e.message}"
@@ -316,16 +315,22 @@ def ensure_cpu_memory_capacity_set
end
if !nodeInventory.nil?
cpu_capacity_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores")
- if !cpu_capacity_json.nil? && !cpu_capacity_json[0]["DataItems"][0]["Collections"][0]["Value"].to_s.nil?
- @cpu_capacity = cpu_capacity_json[0]["DataItems"][0]["Collections"][0]["Value"]
- @log.info "CPU Limit #{@cpu_capacity}"
+ if !cpu_capacity_json.nil?
+ metricVal = JSON.parse(cpu_capacity_json[0]["json_Collections"])[0]["Value"]
+ if !metricVal.to_s.nil?
+ @cpu_capacity = metricVal
+ @log.info "CPU Limit #{@cpu_capacity}"
+ end
else
@log.info "Error getting cpu_capacity"
end
memory_capacity_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "memory", "memoryCapacityBytes")
- if !memory_capacity_json.nil? && !memory_capacity_json[0]["DataItems"][0]["Collections"][0]["Value"].to_s.nil?
- @memory_capacity = memory_capacity_json[0]["DataItems"][0]["Collections"][0]["Value"]
- @log.info "Memory Limit #{@memory_capacity}"
+ if !memory_capacity_json.nil?
+ metricVal = JSON.parse(cpu_capacity_json[0]["json_Collections"])[0]["Value"]
+ if !metricVal.to_s.nil?
+ @memory_capacity = metricVal
+ @log.info "Memory Limit #{@memory_capacity}"
+ end
else
@log.info "Error getting memory_capacity"
end
@@ -346,7 +351,7 @@ def ensure_cpu_memory_capacity_set
end
def filter_stream(tag, es)
- new_es = MultiEventStream.new
+ new_es = Fluent::MultiEventStream.new
begin
ensure_cpu_memory_capacity_set
# Getting container limits hash
diff --git a/source/plugins/ruby/filter_cadvisor_health_container.rb b/source/plugins/ruby/filter_cadvisor_health_container.rb
index 870fcd6d6..ab64b6e61 100644
--- a/source/plugins/ruby/filter_cadvisor_health_container.rb
+++ b/source/plugins/ruby/filter_cadvisor_health_container.rb
@@ -1,7 +1,9 @@
#!/usr/local/bin/ruby
# frozen_string_literal: true
-module Fluent
+require 'fluent/plugin/filter'
+
+module Fluent::Plugin
require 'logger'
require 'yajl/json_gem'
require_relative 'oms_common'
@@ -11,7 +13,7 @@ module Fluent
class CAdvisor2ContainerHealthFilter < Filter
include HealthModel
- Fluent::Plugin.register_filter('filter_cadvisor_health_container', self)
+ Fluent::Plugin.register_filter('cadvisor_health_container', self)
config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/health_monitors.log'
config_param :metrics_to_collect, :string, :default => 'cpuUsageNanoCores,memoryRssBytes'
@@ -58,9 +60,9 @@ def start
def filter_stream(tag, es)
if !@@cluster_health_model_enabled
@log.info "Cluster Health Model disabled in filter_cadvisor_health_container"
- return MultiEventStream.new
+ return Fluent::MultiEventStream.new
end
- new_es = MultiEventStream.new
+ new_es = Fluent::MultiEventStream.new
records_count = 0
es.each { |time, record|
begin
@@ -83,8 +85,9 @@ def filter(tag, time, record)
if record.key?("MonitorLabels")
return record
end
- object_name = record['DataItems'][0]['ObjectName']
- counter_name = record['DataItems'][0]['Collections'][0]['CounterName'].downcase
+
+ object_name = record['ObjectName']
+ counter_name = JSON.parse(record['json_Collections'])[0]['CounterName'].downcase
if @metrics_to_collect_hash.key?(counter_name)
if object_name == @@object_name_k8s_container
return @formatter.get_record_from_cadvisor_record(record)
diff --git a/source/plugins/ruby/filter_cadvisor_health_node.rb b/source/plugins/ruby/filter_cadvisor_health_node.rb
index 27e5bc255..ddbb871e8 100644
--- a/source/plugins/ruby/filter_cadvisor_health_node.rb
+++ b/source/plugins/ruby/filter_cadvisor_health_node.rb
@@ -1,7 +1,9 @@
#!/usr/local/bin/ruby
# frozen_string_literal: true
-module Fluent
+require 'fluent/plugin/filter'
+
+module Fluent::Plugin
require 'logger'
require 'yajl/json_gem'
require_relative 'oms_common'
@@ -11,8 +13,8 @@ module Fluent
class CAdvisor2NodeHealthFilter < Filter
include HealthModel
- Fluent::Plugin.register_filter('filter_cadvisor_health_node', self)
-
+ Fluent::Plugin.register_filter('cadvisor_health_node', self)
+
attr_accessor :provider, :resources
config_param :metrics_to_collect, :string, :default => 'cpuUsageNanoCores,memoryRssBytes'
@@ -75,13 +77,13 @@ def start
def filter_stream(tag, es)
if !@@cluster_health_model_enabled
@log.info "Cluster Health Model disabled in filter_cadvisor_health_node"
- return MultiEventStream.new
+ return Fluent::MultiEventStream.new
end
begin
node_capacity = HealthMonitorUtils.ensure_cpu_memory_capacity_set(@@hm_log, @cpu_capacity, @memory_capacity, @@hostName)
@cpu_capacity = node_capacity[0]
@memory_capacity = node_capacity[1]
- new_es = MultiEventStream.new
+ new_es = Fluent::MultiEventStream.new
records_count = 0
es.each { |time, record|
filtered_record = filter(tag, time, record)
@@ -95,7 +97,7 @@ def filter_stream(tag, es)
rescue => e
@log.info "Error in filter_cadvisor_health_node filter_stream #{e.backtrace}"
ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
- return MultiEventStream.new
+ return Fluent::MultiEventStream.new
end
end
@@ -105,10 +107,10 @@ def filter(tag, time, record)
return record
end
- object_name = record['DataItems'][0]['ObjectName']
- counter_name = record['DataItems'][0]['Collections'][0]['CounterName'].downcase
+ object_name = record['ObjectName']
+ counter_name = JSON.parse(record['json_Collections'])[0]['CounterName'].downcase
if @metrics_to_collect_hash.key?(counter_name.downcase)
- metric_value = record['DataItems'][0]['Collections'][0]['Value']
+ metric_value = JSON.parse(record['json_Collections'])[0]['Value']
case object_name
when @@object_name_k8s_node
case counter_name.downcase
@@ -134,14 +136,14 @@ def process_node_cpu_record(record, metric_value)
if record.nil?
return nil
else
- instance_name = record['DataItems'][0]['InstanceName']
+ instance_name = record['InstanceName']
#@log.info "CPU capacity #{@cpu_capacity}"
metric_value /= 1000000
percent = (metric_value.to_f/@cpu_capacity*100).round(2)
#@log.debug "Percentage of CPU limit: #{percent}"
state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(MonitorId::NODE_CPU_MONITOR_ID))
#@log.debug "Computed State : #{state}"
- timestamp = record['DataItems'][0]['Timestamp']
+ timestamp = record['Timestamp']
health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value, "cpuUtilizationPercentage" => percent}}
monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId, @@hostName])
@@ -166,14 +168,14 @@ def process_node_memory_record(record, metric_value)
if record.nil?
return nil
else
- instance_name = record['DataItems'][0]['InstanceName']
+ instance_name = record['InstanceName']
#@log.info "Memory capacity #{@memory_capacity}"
percent = (metric_value.to_f/@memory_capacity*100).round(2)
#@log.debug "Percentage of Memory limit: #{percent}"
state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(MonitorId::NODE_MEMORY_MONITOR_ID))
#@log.debug "Computed State : #{state}"
- timestamp = record['DataItems'][0]['Timestamp']
+ timestamp = record['Timestamp']
health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}}
#@log.info health_monitor_record
diff --git a/source/plugins/ruby/filter_container.rb b/source/plugins/ruby/filter_container.rb
deleted file mode 100644
index b72e82dbc..000000000
--- a/source/plugins/ruby/filter_container.rb
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-
-# frozen_string_literal: true
-
-module Fluent
- require 'logger'
-
- class ContainerFilter < Filter
- Fluent::Plugin.register_filter('filter_container', self)
-
- config_param :enable_log, :integer, :default => 0
- config_param :log_path, :string, :default => '/var/opt/microsoft/omsagent/log/filter_container.log'
-
- def initialize
- super
- end
-
- def configure(conf)
- super
- @log = nil
-
- if @enable_log
- @log = Logger.new(@log_path, 'weekly')
- @log.debug {'Starting filter_container plugin'}
- end
- end
-
- def start
- super
- end
-
- def shutdown
- super
- end
-
- def filter(tag, time, record)
- dataType = nil
-
- record.each do |r|
- if dataType == nil
- dataType = case r["ClassName"]
- when "Container_ImageInventory" then "CONTAINER_IMAGE_INVENTORY_BLOB"
- when "Container_ContainerInventory" then "CONTAINER_INVENTORY_BLOB"
- when "Container_DaemonEvent" then "CONTAINER_SERVICE_LOG_BLOB"
- when "Container_ContainerLog" then "CONTAINER_LOG_BLOB"
- end
- end
- end
-
- wrapper = {
- "DataType"=>dataType,
- "IPName"=>"Containers",
- "DataItems"=>record
- }
-
- wrapper
- end
- end
-end
diff --git a/source/plugins/ruby/filter_docker_log.rb b/source/plugins/ruby/filter_docker_log.rb
deleted file mode 100644
index b80f4c204..000000000
--- a/source/plugins/ruby/filter_docker_log.rb
+++ /dev/null
@@ -1,103 +0,0 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-
-# frozen_string_literal: true
-
-module Fluent
- require 'logger'
- require 'socket'
- require 'yajl/json_gem'
-
- class DockerLogFilter < Filter
- Plugin.register_filter('filter_docker_log', self)
-
- # Set to 1 in config file to enable logging
- config_param :enable_log, :integer, :default => 0
- config_param :log_path, :string, :default => '/var/opt/microsoft/omsagent/log/filter_docker_log.txt'
-
- # This method is called before starting.
- def configure(conf)
- super
- @hostname = Socket.gethostname
- # in case get full name, extract up to '.'
- dotpos = @hostname.index('.')
- if dotpos != nil
- @hostname = @hostname[0..dotpos-1]
- end
-
- # Cache the image name and ID of each container so we don't have to inspect each time
- @containerCache = Hash.new
-
- @log = nil
-
- if @enable_log
- @log = Logger.new(@log_path, 'weekly')
- @log.debug {'Starting filter_docker_log plugin on ' + @hostname}
- end
- end
-
- def filter(tag, time, record)
- if @log != nil
- @log.debug {'Accepted a log from container ' + record['container_id']}
- end
-
- wrapper = Hash.new
-
- if record['log'].empty?
- if @log != nil
- @log.debug {'Log from container ' + record['container_id'] + ' had length 0 and will be discarded'}
- end
- else
- # Need to query image information from ID
- containerId = record['container_id']
-
- unless @containerCache.has_key?(containerId)
- if @log != nil
- @log.debug {'Container ' + containerId + ' information is not in the cache, inspecting'}
- end
-
- # Value not in cache, use inspect
- @containerCache[containerId] = Hash.new
- details = ''
-
- begin
- details = JSON.parse(`sudo docker inspect #{containerId}`)
- rescue => e
- if @log != nil
- @log.error {'sudo docker inspect ' + containerId + ' failed'}
- end
- end
-
- if details.empty?
- # This should not occur
- @containerCache[containerId]['Image'] = 'Unknown'
- @containerCache[containerId]['ImageName'] = 'Unknown'
-
- if @log != nil
- @log.warn {'The image ID of container ' + containerId + ' could not be determined'}
- end
- else
- @containerCache[containerId]['Image'] = details[0]['Config']['Image']
- @containerCache[containerId]['ImageName'] = details[0]['Config']['Image']
- end
- end
-
- newRecord = @containerCache[containerId]
-
- # No query is required
- newRecord['Id'] = containerId
- newRecord['Name'] = record['container_name'][0] == "/" ? record['container_name'][1..-1] : record['container_name']
- newRecord['LogEntrySource'] = record['source']
- newRecord['LogEntry'] = record['log']
- newRecord['Computer'] = @hostname
-
- wrapper = {
- "DataType"=>"CONTAINER_LOG_BLOB",
- "IPName"=>"Containers",
- "DataItems"=>[newRecord]
- }
- end
-
- wrapper
- end
- end
-end
diff --git a/source/plugins/ruby/filter_health_model_builder.rb b/source/plugins/ruby/filter_health_model_builder.rb
index 36e4801d7..d491f17c2 100644
--- a/source/plugins/ruby/filter_health_model_builder.rb
+++ b/source/plugins/ruby/filter_health_model_builder.rb
@@ -2,15 +2,17 @@
# frozen_string_literal: true
-module Fluent
+require 'fluent/plugin/filter'
+
+module Fluent::Plugin
require 'logger'
require 'yajl/json_gem'
Dir[File.join(__dir__, './health', '*.rb')].each { |file| require file }
-
+
class FilterHealthModelBuilder < Filter
include HealthModel
- Fluent::Plugin.register_filter('filter_health_model_builder', self)
+ Fluent::Plugin.register_filter('health_model_builder', self)
config_param :enable_log, :integer, :default => 0
config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log'
@@ -20,7 +22,7 @@ class FilterHealthModelBuilder < Filter
attr_reader :buffer, :model_builder, :health_model_definition, :monitor_factory, :state_finalizers, :monitor_set, :model_builder, :hierarchy_builder, :resources, :kube_api_down_handler, :provider, :reducer, :state, :generator, :telemetry
- @@rewrite_tag = 'kubehealth.Signals'
+
@@cluster_id = KubernetesApiClient.getClusterId
@@token_file_path = "/var/run/secrets/kubernetes.io/serviceaccount/token"
@@cert_file_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
@@ -29,6 +31,7 @@ class FilterHealthModelBuilder < Filter
def initialize
begin
super
+ @rewrite_tag = 'oneagent.containerInsights.KUBE_HEALTH_BLOB'
@buffer = HealthModel::HealthModelBuffer.new
@cluster_health_state = ClusterHealthState.new(@@token_file_path, @@cert_file_path)
@health_model_definition = HealthModel::ParentMonitorProvider.new(HealthModel::HealthModelDefinitionParser.new(@model_definition_path).parse_file)
@@ -53,6 +56,7 @@ def initialize
deserialized_state_info = @cluster_health_state.get_state
@state.initialize_state(deserialized_state_info)
end
+
rescue => e
ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
end
@@ -82,11 +86,11 @@ def shutdown
def filter_stream(tag, es)
if !@@cluster_health_model_enabled
@log.info "Cluster Health Model disabled in filter_health_model_builder"
- return MultiEventStream.new
+ return Fluent::MultiEventStream.new
end
begin
- new_es = MultiEventStream.new
- time = Time.now
+ new_es = Fluent::MultiEventStream.new
+ time = Time.now
if tag.start_with?("kubehealth.DaemonSet.Node")
node_records = []
@@ -96,7 +100,7 @@ def filter_stream(tag, es)
}
@buffer.add_to_buffer(node_records)
end
- return MultiEventStream.new
+ return Fluent::MultiEventStream.new
elsif tag.start_with?("kubehealth.DaemonSet.Container")
container_records = []
if !es.nil?
@@ -110,7 +114,7 @@ def filter_stream(tag, es)
@container_cpu_memory_records = [] #in some clusters, this is null, so initialize it again.
end
@container_cpu_memory_records.push(*container_records) # push the records for aggregation later
- return MultiEventStream.new
+ return Fluent::MultiEventStream.new
elsif tag.start_with?("kubehealth.ReplicaSet")
records = []
es.each{|time, record|
@@ -218,11 +222,11 @@ def filter_stream(tag, es)
@log.info "after optimizing health signals all_monitors.size #{all_monitors.size}"
- current_time = Time.now
- emit_time = current_time.to_f
+
# for each key in monitor.keys,
# get the state from health_monitor_state
# generate the record to send
+ emit_time = Fluent::Engine.now
all_monitors.keys.each{|key|
record = @provider.get_record(all_monitors[key], state)
if record[HealthMonitorRecordFields::MONITOR_ID] == MonitorId::CLUSTER
@@ -241,17 +245,12 @@ def filter_stream(tag, es)
@cluster_new_state = new_state
end
end
- end
- record_wrapper = {
- "DataType" => "KUBE_HEALTH_BLOB",
- "IPName" => "ContainerInsights",
- "DataItems" => [record.each { |k, v| record[k] = v }],
- }
- new_es.add(emit_time, record_wrapper)
+ end
+ new_es.add(emit_time, record)
}
#emit the stream
- router.emit_stream(@@rewrite_tag, new_es)
+ router.emit_stream(@rewrite_tag, new_es)
#initialize monitor_set and model_builder
@monitor_set = HealthModel::MonitorSet.new
@@ -261,8 +260,8 @@ def filter_stream(tag, es)
@cluster_health_state.update_state(@state.to_h)
@telemetry.send
# return an empty event stream, else the match will throw a NoMethodError
- return MultiEventStream.new
- elsif tag.start_with?("kubehealth.Signals")
+ return Fluent::MultiEventStream.new
+ elsif tag.start_with?(@rewrite_tag)
# this filter also acts as a pass through as we are rewriting the tag and emitting to the fluent stream
es
else
@@ -274,6 +273,6 @@ def filter_stream(tag, es)
@log.warn "Message: #{e.message} Backtrace: #{e.backtrace}"
return nil
end
- end
+ end
end
end
diff --git a/source/plugins/ruby/filter_inventory2mdm.rb b/source/plugins/ruby/filter_inventory2mdm.rb
index 38ccab885..509ac608e 100644
--- a/source/plugins/ruby/filter_inventory2mdm.rb
+++ b/source/plugins/ruby/filter_inventory2mdm.rb
@@ -2,14 +2,16 @@
# frozen_string_literal: true
-module Fluent
+require 'fluent/plugin/filter'
+
+module Fluent::Plugin
require 'logger'
require 'yajl/json_gem'
require_relative 'oms_common'
require_relative 'CustomMetricsUtils'
class Inventory2MdmFilter < Filter
- Fluent::Plugin.register_filter('filter_inventory2mdm', self)
+ Fluent::Plugin.register_filter('inventory2mdm', self)
config_param :enable_log, :integer, :default => 0
config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log'
@@ -115,8 +117,8 @@ def process_node_inventory_records(es)
es.each{|time,record|
begin
- timestamp = record['DataItems'][0]['CollectionTime']
- node_status = record['DataItems'][0]['Status']
+ timestamp = record['CollectionTime']
+ node_status = record['Status']
if node_status.downcase.split(",").include? @@node_status_ready.downcase
node_ready_count = node_ready_count+1
else
@@ -161,8 +163,8 @@ def process_pod_inventory_records(es)
records = []
es.each{|time,record|
record_count += 1
- timestamp = record['DataItems'][0]['CollectionTime']
- podUid = record['DataItems'][0]['PodUid']
+ timestamp = record['CollectionTime']
+ podUid = record['PodUid']
if podUids.key?(podUid)
#@log.info "pod with #{podUid} already counted"
@@ -170,10 +172,10 @@ def process_pod_inventory_records(es)
end
podUids[podUid] = true
- podPhaseDimValue = record['DataItems'][0]['PodStatus']
- podNamespaceDimValue = record['DataItems'][0]['Namespace']
- podControllerNameDimValue = record['DataItems'][0]['ControllerName']
- podNodeDimValue = record['DataItems'][0]['Computer']
+ podPhaseDimValue = record['PodStatus']
+ podNamespaceDimValue = record['Namespace']
+ podControllerNameDimValue = record['ControllerName']
+ podNodeDimValue = record['Computer']
if podControllerNameDimValue.nil? || podControllerNameDimValue.empty?
podControllerNameDimValue = 'No Controller'
@@ -263,7 +265,7 @@ def process_pod_inventory_records(es)
end
def filter_stream(tag, es)
- new_es = MultiEventStream.new
+ new_es = Fluent::MultiEventStream.new
filtered_records = []
time = DateTime.now
begin
diff --git a/source/plugins/ruby/filter_telegraf2mdm.rb b/source/plugins/ruby/filter_telegraf2mdm.rb
index 88ae428d1..fd71f1682 100644
--- a/source/plugins/ruby/filter_telegraf2mdm.rb
+++ b/source/plugins/ruby/filter_telegraf2mdm.rb
@@ -2,7 +2,9 @@
# frozen_string_literal: true
-module Fluent
+require 'fluent/plugin/filter'
+
+module Fluent::Plugin
require "logger"
require "yajl/json_gem"
require_relative "oms_common"
@@ -11,7 +13,7 @@ module Fluent
require_relative "constants"
class Telegraf2MdmFilter < Filter
- Fluent::Plugin.register_filter("filter_telegraf2mdm", self)
+ Fluent::Plugin.register_filter("telegraf2mdm", self)
config_param :enable_log, :integer, :default => 0
config_param :log_path, :string, :default => "/var/opt/microsoft/docker-cimprov/log/filter_telegraf2mdm.log"
@@ -64,7 +66,7 @@ def filter(tag, time, record)
end
def filter_stream(tag, es)
- new_es = MultiEventStream.new
+ new_es = Fluent::MultiEventStream.new
begin
es.each { |time, record|
filtered_records = filter(tag, time, record)
diff --git a/source/plugins/ruby/health/health_container_cpu_memory_record_formatter.rb b/source/plugins/ruby/health/health_container_cpu_memory_record_formatter.rb
index 12c72a120..ebf3abd7e 100644
--- a/source/plugins/ruby/health/health_container_cpu_memory_record_formatter.rb
+++ b/source/plugins/ruby/health/health_container_cpu_memory_record_formatter.rb
@@ -17,10 +17,10 @@ def initialize
def get_record_from_cadvisor_record(cadvisor_record)
begin
- instance_name = cadvisor_record['DataItems'][0]['InstanceName']
- counter_name = cadvisor_record['DataItems'][0]['Collections'][0]['CounterName']
- metric_value = cadvisor_record['DataItems'][0]['Collections'][0]['Value']
- timestamp = cadvisor_record['DataItems'][0]['Timestamp']
+ instance_name = cadvisor_record['InstanceName']
+ counter_name = JSON.parse(cadvisor_record['json_Collections'])[0]['CounterName']
+ metric_value = JSON.parse(cadvisor_record['json_Collections'])[0]['Value']
+ timestamp = cadvisor_record['Timestamp']
health_container_cpu_memory_record = @@health_container_cpu_memory_record_template % {
instance_name: instance_name,
diff --git a/source/plugins/ruby/health/health_monitor_utils.rb b/source/plugins/ruby/health/health_monitor_utils.rb
index c23d8824a..58f2ecc36 100644
--- a/source/plugins/ruby/health/health_monitor_utils.rb
+++ b/source/plugins/ruby/health/health_monitor_utils.rb
@@ -171,8 +171,9 @@ def get_cluster_cpu_memory_capacity(log, node_inventory: nil)
cpu_capacity_json = KubernetesApiClient.parseNodeLimits(node_inventory, "capacity", "cpu", "cpuCapacityNanoCores")
if !cpu_capacity_json.nil?
cpu_capacity_json.each do |cpu_capacity_node|
- if !cpu_capacity_node['DataItems'][0]['Collections'][0]['Value'].to_s.nil?
- cluster_cpu_capacity += cpu_capacity_node['DataItems'][0]['Collections'][0]['Value']
+ metricVal = JSON.parse(cpu_capacity_node['json_Collections'])[0]['Value']
+ if !metricVal.to_s.nil?
+ cluster_cpu_capacity += metricVal
end
end
else
@@ -181,8 +182,9 @@ def get_cluster_cpu_memory_capacity(log, node_inventory: nil)
memory_capacity_json = KubernetesApiClient.parseNodeLimits(node_inventory, "capacity", "memory", "memoryCapacityBytes")
if !memory_capacity_json.nil?
memory_capacity_json.each do |memory_capacity_node|
- if !memory_capacity_node['DataItems'][0]['Collections'][0]['Value'].to_s.nil?
- cluster_memory_capacity += memory_capacity_node['DataItems'][0]['Collections'][0]['Value']
+ metricVal = JSON.parse(memory_capacity_node['json_Collections'])[0]['Value']
+ if !metricVal.to_s.nil?
+ cluster_memory_capacity += metricVal
end
end
else
@@ -284,7 +286,7 @@ def build_metrics_hash(metrics_to_collect)
def get_health_monitor_config
health_monitor_config = {}
begin
- file = File.open('/opt/microsoft/omsagent/plugin/healthmonitorconfig.json', "r")
+ file = File.open('/etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json', "r")
if !file.nil?
fileContents = file.read
health_monitor_config = JSON.parse(fileContents)
diff --git a/source/plugins/ruby/in_cadvisor_perf.rb b/source/plugins/ruby/in_cadvisor_perf.rb
index b706ff00a..781042cea 100644
--- a/source/plugins/ruby/in_cadvisor_perf.rb
+++ b/source/plugins/ruby/in_cadvisor_perf.rb
@@ -1,10 +1,11 @@
#!/usr/local/bin/ruby
# frozen_string_literal: true
+require 'fluent/plugin/input'
-module Fluent
+module Fluent::Plugin
class CAdvisor_Perf_Input < Input
- Plugin.register_input("cadvisorperf", self)
+ Fluent::Plugin.register_input("cadvisor_perf", self)
def initialize
super
@@ -15,14 +16,15 @@ def initialize
require_relative "CAdvisorMetricsAPIClient"
require_relative "oms_common"
require_relative "omslog"
- require_relative "constants"
+ require_relative "constants"
end
config_param :run_interval, :time, :default => 60
- config_param :tag, :string, :default => "oms.api.cadvisorperf"
+ config_param :tag, :string, :default => "oneagent.containerInsights.LINUX_PERF_BLOB"
config_param :mdmtag, :string, :default => "mdm.cadvisorperf"
config_param :nodehealthtag, :string, :default => "kubehealth.DaemonSet.Node"
config_param :containerhealthtag, :string, :default => "kubehealth.DaemonSet.Container"
+ config_param :insightsmetricstag, :string, :default => "oneagent.containerInsights.INSIGHTS_METRICS_BLOB"
def configure(conf)
super
@@ -30,6 +32,7 @@ def configure(conf)
def start
if @run_interval
+ super
@finished = false
@condition = ConditionVariable.new
@mutex = Mutex.new
@@ -44,24 +47,23 @@ def shutdown
@condition.signal
}
@thread.join
+ super # This super must be at the end of shutdown method
end
end
def enumerate()
currentTime = Time.now
- time = currentTime.to_f
+ time = Fluent::Engine.now
batchTime = currentTime.utc.iso8601
@@istestvar = ENV["ISTEST"]
begin
- eventStream = MultiEventStream.new
- insightsMetricsEventStream = MultiEventStream.new
+ eventStream = Fluent::MultiEventStream.new
+ insightsMetricsEventStream = Fluent::MultiEventStream.new
metricData = CAdvisorMetricsAPIClient.getMetrics(winNode: nil, metricTime: batchTime )
- metricData.each do |record|
- record["DataType"] = "LINUX_PERF_BLOB"
- record["IPName"] = "LogManagement"
- eventStream.add(time, record) if record
- end
-
+ metricData.each do |record|
+ eventStream.add(time, record) if record
+ end
+
router.emit_stream(@tag, eventStream) if eventStream
router.emit_stream(@mdmtag, eventStream) if eventStream
router.emit_stream(@containerhealthtag, eventStream) if eventStream
@@ -75,19 +77,13 @@ def enumerate()
#start GPU InsightsMetrics items
begin
containerGPUusageInsightsMetricsDataItems = []
- containerGPUusageInsightsMetricsDataItems.concat(CAdvisorMetricsAPIClient.getInsightsMetrics(winNode: nil, metricTime: batchTime))
-
+ containerGPUusageInsightsMetricsDataItems.concat(CAdvisorMetricsAPIClient.getInsightsMetrics(winNode: nil, metricTime: batchTime))
containerGPUusageInsightsMetricsDataItems.each do |insightsMetricsRecord|
- wrapper = {
- "DataType" => "INSIGHTS_METRICS_BLOB",
- "IPName" => "ContainerInsights",
- "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
- }
- insightsMetricsEventStream.add(time, wrapper) if wrapper
+ insightsMetricsEventStream.add(time, insightsMetricsRecord) if insightsMetricsRecord
end
- router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+ router.emit_stream(@insightsmetricstag, insightsMetricsEventStream) if insightsMetricsEventStream
router.emit_stream(@mdmtag, insightsMetricsEventStream) if insightsMetricsEventStream
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
@@ -135,6 +131,6 @@ def run_periodic
@mutex.lock
end
@mutex.unlock
- end
+ end
end # CAdvisor_Perf_Input
end # module
diff --git a/source/plugins/ruby/in_containerinventory.rb b/source/plugins/ruby/in_containerinventory.rb
index c1126aa4e..eebf422d6 100644
--- a/source/plugins/ruby/in_containerinventory.rb
+++ b/source/plugins/ruby/in_containerinventory.rb
@@ -1,9 +1,11 @@
#!/usr/local/bin/ruby
# frozen_string_literal: true
-module Fluent
+require 'fluent/plugin/input'
+
+module Fluent::Plugin
class Container_Inventory_Input < Input
- Plugin.register_input("containerinventory", self)
+ Fluent::Plugin.register_input("containerinventory", self)
@@PluginName = "ContainerInventory"
@@ -19,7 +21,7 @@ def initialize
end
config_param :run_interval, :time, :default => 60
- config_param :tag, :string, :default => "oms.containerinsights.containerinventory"
+ config_param :tag, :string, :default => "oneagent.containerInsights.CONTAINER_INVENTORY_BLOB"
def configure(conf)
super
@@ -27,6 +29,7 @@ def configure(conf)
def start
if @run_interval
+ super
@finished = false
@condition = ConditionVariable.new
@mutex = Mutex.new
@@ -42,17 +45,18 @@ def shutdown
@condition.signal
}
@thread.join
+ super # This super must be at the end of shutdown method
end
end
def enumerate
- currentTime = Time.now
- emitTime = currentTime.to_f
+ currentTime = Time.now
batchTime = currentTime.utc.iso8601
+ emitTime = Fluent::Engine.now
containerInventory = Array.new
- eventStream = MultiEventStream.new
+ eventStream = Fluent::MultiEventStream.new
hostName = ""
- $log.info("in_container_inventory::enumerate : Begin processing @ #{Time.now.utc.iso8601}")
+ $log.info("in_container_inventory::enumerate : Begin processing @ #{Time.now.utc.iso8601}")
begin
containerRuntimeEnv = ENV["CONTAINER_RUNTIME"]
$log.info("in_container_inventory::enumerate : container runtime : #{containerRuntimeEnv}")
@@ -89,13 +93,8 @@ def enumerate
end
end
end
- containerInventory.each do |record|
- wrapper = {
- "DataType" => "CONTAINER_INVENTORY_BLOB",
- "IPName" => "ContainerInsights",
- "DataItems" => [record.each { |k, v| record[k] = v }],
- }
- eventStream.add(emitTime, wrapper) if wrapper
+ containerInventory.each do |record|
+ eventStream.add(emitTime, record) if record
end
router.emit_stream(@tag, eventStream) if eventStream
@@istestvar = ENV["ISTEST"]
@@ -149,6 +148,6 @@ def run_periodic
@mutex.lock
end
@mutex.unlock
- end
+ end
end # Container_Inventory_Input
end # module
diff --git a/source/plugins/ruby/in_kube_events.rb b/source/plugins/ruby/in_kube_events.rb
index f50019a01..6f65dab92 100644
--- a/source/plugins/ruby/in_kube_events.rb
+++ b/source/plugins/ruby/in_kube_events.rb
@@ -1,9 +1,11 @@
#!/usr/local/bin/ruby
# frozen_string_literal: true
-module Fluent
+require 'fluent/plugin/input'
+
+module Fluent::Plugin
class Kube_Event_Input < Input
- Plugin.register_input("kubeevents", self)
+ Fluent::Plugin.register_input("kube_events", self)
@@KubeEventsStateFile = "/var/opt/microsoft/docker-cimprov/state/KubeEventQueryState.yaml"
def initialize
@@ -29,14 +31,15 @@ def initialize
end
config_param :run_interval, :time, :default => 60
- config_param :tag, :string, :default => "oms.containerinsights.KubeEvents"
+ config_param :tag, :string, :default => "oneagent.containerInsights.KUBE_EVENTS_BLOB"
def configure(conf)
super
end
- def start
+ def start
if @run_interval
+ super
if !ENV["EVENTS_CHUNK_SIZE"].nil? && !ENV["EVENTS_CHUNK_SIZE"].empty? && ENV["EVENTS_CHUNK_SIZE"].to_i > 0
@EVENTS_CHUNK_SIZE = ENV["EVENTS_CHUNK_SIZE"].to_i
else
@@ -70,6 +73,7 @@ def shutdown
@condition.signal
}
@thread.join
+ super
end
end
@@ -80,8 +84,8 @@ def enumerate
batchTime = currentTime.utc.iso8601
eventQueryState = getEventQueryState
newEventQueryState = []
- @eventsCount = 0
-
+ @eventsCount = 0
+
# Initializing continuation token to nil
continuationToken = nil
$log.info("in_kube_events::enumerate : Getting events from Kube API @ #{Time.now.utc.iso8601}")
@@ -127,11 +131,11 @@ def enumerate
end # end enumerate
def parse_and_emit_records(events, eventQueryState, newEventQueryState, batchTime = Time.utc.iso8601)
- currentTime = Time.now
- emitTime = currentTime.to_f
+ currentTime = Time.now
+ emitTime = Fluent::Engine.now
@@istestvar = ENV["ISTEST"]
begin
- eventStream = MultiEventStream.new
+ eventStream = Fluent::MultiEventStream.new
events["items"].each do |items|
record = {}
# - Not sure if ingestion has the below mapping for this custom type. Fix it as part of fixed type conversion
@@ -162,13 +166,8 @@ def parse_and_emit_records(events, eventQueryState, newEventQueryState, batchTim
record["Count"] = items["count"]
record["Computer"] = nodeName
record["ClusterName"] = KubernetesApiClient.getClusterName
- record["ClusterId"] = KubernetesApiClient.getClusterId
- wrapper = {
- "DataType" => "KUBE_EVENTS_BLOB",
- "IPName" => "ContainerInsights",
- "DataItems" => [record.each { |k, v| record[k] = v }],
- }
- eventStream.add(emitTime, wrapper) if wrapper
+ record["ClusterId"] = KubernetesApiClient.getClusterId
+ eventStream.add(emitTime, record) if record
@eventsCount += 1
end
router.emit_stream(@tag, eventStream) if eventStream
diff --git a/source/plugins/ruby/in_kube_health.rb b/source/plugins/ruby/in_kube_health.rb
index 874be26f6..db981c53e 100644
--- a/source/plugins/ruby/in_kube_health.rb
+++ b/source/plugins/ruby/in_kube_health.rb
@@ -1,17 +1,19 @@
#!/usr/local/bin/ruby
# frozen_string_literal: true
+require 'fluent/plugin/input'
+
require_relative "KubernetesApiClient"
require_relative "oms_common"
require_relative "omslog"
require_relative "ApplicationInsightsUtility"
-module Fluent
+module Fluent::Plugin
Dir[File.join(__dir__, "./health", "*.rb")].each { |file| require file }
class KubeHealthInput < Input
include HealthModel
- Plugin.register_input("kubehealth", self)
+ Fluent::Plugin.register_input("kube_health", self)
config_param :health_monitor_config_path, :default => "/etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json"
@@ -46,6 +48,7 @@ def configure(conf)
def start
begin
+ super
if @run_interval
@finished = false
@condition = ConditionVariable.new
@@ -76,20 +79,21 @@ def shutdown
@condition.signal
}
@thread.join
+ super # This super must be at the end of shutdown method
end
end
def enumerate
if !@@cluster_health_model_enabled
@@hmlog.info "Cluster Health Model disabled in in_kube_health"
- return MultiEventStream.new
+ return Fluent::MultiEventStream.new
end
begin
- currentTime = Time.now
- emitTime = currentTime.to_f
+ currentTime = Time.now
+ emitTime = Fluent::Engine.now
batchTime = currentTime.utc.iso8601
health_monitor_records = []
- eventStream = MultiEventStream.new
+ eventStream = Fluent::MultiEventStream.new
#HealthMonitorUtils.refresh_kubernetes_api_data(@@hmlog, nil)
# we do this so that if the call fails, we get a response code/header etc.
diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index 99e804302..ffc11de55 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -1,17 +1,17 @@
#!/usr/local/bin/ruby
# frozen_string_literal: true
-module Fluent
- class Kube_nodeInventory_Input < Input
- Plugin.register_input("kubenodeinventory", self)
+require 'fluent/plugin/input'
- @@ContainerNodeInventoryTag = "oms.containerinsights.ContainerNodeInventory"
- @@MDMKubeNodeInventoryTag = "mdm.kubenodeinventory"
+module Fluent::Plugin
+ class Kube_nodeInventory_Input < Input
+ Fluent::Plugin.register_input("kube_nodes", self)
+
@@configMapMountPath = "/etc/config/settings/log-data-collection-settings"
@@promConfigMountPath = "/etc/config/settings/prometheus-data-collection-settings"
@@osmConfigMountPath = "/etc/config/osm-settings/osm-metric-collection-configuration"
@@AzStackCloudFileName = "/etc/kubernetes/host/azurestackcloud.json"
- @@kubeperfTag = "oms.api.KubePerf"
+
@@rsPromInterval = ENV["TELEMETRY_RS_PROM_INTERVAL"]
@@rsPromFieldPassCount = ENV["TELEMETRY_RS_PROM_FIELDPASS_LENGTH"]
@@ -35,7 +35,13 @@ def initialize
require_relative "KubernetesApiClient"
require_relative "ApplicationInsightsUtility"
require_relative "oms_common"
- require_relative "omslog"
+ require_relative "omslog"
+
+ @ContainerNodeInventoryTag = "oneagent.containerInsights.CONTAINER_NODE_INVENTORY_BLOB"
+ @insightsMetricsTag = "oneagent.containerInsights.INSIGHTS_METRICS_BLOB"
+ @MDMKubeNodeInventoryTag = "mdm.kubenodeinventory"
+ @kubeperfTag = "oneagent.containerInsights.LINUX_PERF_BLOB"
+
# refer tomlparser-agent-config for the defaults
@NODES_CHUNK_SIZE = 0
@NODES_EMIT_STREAM_BATCH_SIZE = 0
@@ -48,14 +54,15 @@ def initialize
end
config_param :run_interval, :time, :default => 60
- config_param :tag, :string, :default => "oms.containerinsights.KubeNodeInventory"
+ config_param :tag, :string, :default => "oneagent.containerInsights.KUBE_NODE_INVENTORY_BLOB"
def configure(conf)
super
end
- def start
+ def start
if @run_interval
+ super
if !ENV["NODES_CHUNK_SIZE"].nil? && !ENV["NODES_CHUNK_SIZE"].empty? && ENV["NODES_CHUNK_SIZE"].to_i > 0
@NODES_CHUNK_SIZE = ENV["NODES_CHUNK_SIZE"].to_i
else
@@ -90,6 +97,7 @@ def shutdown
@condition.signal
}
@thread.join
+ super # This super must be at the end of shutdown method
end
end
@@ -101,8 +109,10 @@ def enumerate
@nodesAPIE2ELatencyMs = 0
@nodeInventoryE2EProcessingLatencyMs = 0
- nodeInventoryStartTime = (Time.now.to_f * 1000).to_i
+ nodeInventoryStartTime = (Time.now.to_f * 1000).to_i
+
nodesAPIChunkStartTime = (Time.now.to_f * 1000).to_i
+
# Initializing continuation token to nil
continuationToken = nil
$log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
@@ -151,49 +161,38 @@ def enumerate
def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
begin
- currentTime = Time.now
- emitTime = currentTime.to_f
+ currentTime = Time.now
+ emitTime = Fluent::Engine.now
telemetrySent = false
- eventStream = MultiEventStream.new
- containerNodeInventoryEventStream = MultiEventStream.new
- insightsMetricsEventStream = MultiEventStream.new
- kubePerfEventStream = MultiEventStream.new
+ eventStream = Fluent::MultiEventStream.new
+ containerNodeInventoryEventStream = Fluent::MultiEventStream.new
+ insightsMetricsEventStream = Fluent::MultiEventStream.new
+ kubePerfEventStream = Fluent::MultiEventStream.new
@@istestvar = ENV["ISTEST"]
#get node inventory
nodeInventory["items"].each do |item|
# node inventory
nodeInventoryRecord = getNodeInventoryRecord(item, batchTime)
- wrapper = {
- "DataType" => "KUBE_NODE_INVENTORY_BLOB",
- "IPName" => "ContainerInsights",
- "DataItems" => [nodeInventoryRecord.each { |k, v| nodeInventoryRecord[k] = v }],
- }
- eventStream.add(emitTime, wrapper) if wrapper
+ eventStream.add(emitTime, nodeInventoryRecord) if nodeInventoryRecord
if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && eventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
$log.info("in_kube_node::parse_and_emit_records: number of node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
router.emit_stream(@tag, eventStream) if eventStream
$log.info("in_kube_node::parse_and_emit_records: number of mdm node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
- router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
-
+ router.emit_stream(@MDMKubeNodeInventoryTag, eventStream) if eventStream
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
$log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
end
- eventStream = MultiEventStream.new
+ eventStream = Fluent::MultiEventStream.new
end
# container node inventory
- containerNodeInventoryRecord = getContainerNodeInventoryRecord(item, batchTime)
- containerNodeInventoryWrapper = {
- "DataType" => "CONTAINER_NODE_INVENTORY_BLOB",
- "IPName" => "ContainerInsights",
- "DataItems" => [containerNodeInventoryRecord.each { |k, v| containerNodeInventoryRecord[k] = v }],
- }
- containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper
+ containerNodeInventoryRecord = getContainerNodeInventoryRecord(item, batchTime)
+ containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryRecord) if containerNodeInventoryRecord
if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && containerNodeInventoryEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
$log.info("in_kube_node::parse_and_emit_records: number of container node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
- router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
- containerNodeInventoryEventStream = MultiEventStream.new
+ router.emit_stream(@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
+ containerNodeInventoryEventStream = Fluent::MultiEventStream.new
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
$log.info("containerNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
end
@@ -223,7 +222,8 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
nodeMetricRecords.push(nodeMetricRecord)
# add data to the cache so filter_cadvisor2mdm.rb can use it
if is_windows_node
- @NodeCache.cpu.set_capacity(nodeMetricRecord["DataItems"][0]["Host"], nodeMetricRecord["DataItems"][0]["Collections"][0]["Value"])
+ metricVal = JSON.parse(nodeMetricRecord["json_Collections"])[0]["Value"]
+ @NodeCache.cpu.set_capacity(nodeMetricRecord["Host"], metricVal)
end
end
nodeMetricRecord = KubernetesApiClient.parseNodeLimitsFromNodeItem(item, "capacity", "memory", "memoryCapacityBytes", batchTime)
@@ -231,18 +231,17 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
nodeMetricRecords.push(nodeMetricRecord)
# add data to the cache so filter_cadvisor2mdm.rb can use it
if is_windows_node
- @NodeCache.mem.set_capacity(nodeMetricRecord["DataItems"][0]["Host"], nodeMetricRecord["DataItems"][0]["Collections"][0]["Value"])
+ metricVal = JSON.parse(nodeMetricRecord["json_Collections"])[0]["Value"]
+ @NodeCache.mem.set_capacity(nodeMetricRecord["Host"], metricVal)
end
end
- nodeMetricRecords.each do |metricRecord|
- metricRecord["DataType"] = "LINUX_PERF_BLOB"
- metricRecord["IPName"] = "LogManagement"
+ nodeMetricRecords.each do |metricRecord|
kubePerfEventStream.add(emitTime, metricRecord) if metricRecord
end
if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && kubePerfEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
$log.info("in_kube_nodes::parse_and_emit_records: number of node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
- router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
- kubePerfEventStream = MultiEventStream.new
+ router.emit_stream(@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
+ kubePerfEventStream = Fluent::MultiEventStream.new
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
$log.info("kubeNodePerfEmitStreamSuccess @ #{Time.now.utc.iso8601}")
end
@@ -266,18 +265,13 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
if !insightsMetricsRecord.nil? && !insightsMetricsRecord.empty?
nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord)
end
- nodeGPUInsightsMetricsRecords.each do |insightsMetricsRecord|
- wrapper = {
- "DataType" => "INSIGHTS_METRICS_BLOB",
- "IPName" => "ContainerInsights",
- "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
- }
- insightsMetricsEventStream.add(emitTime, wrapper) if wrapper
+ nodeGPUInsightsMetricsRecords.each do |insightsMetricsRecord|
+ insightsMetricsEventStream.add(emitTime, insightsMetricsRecord) if insightsMetricsRecord
end
if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && insightsMetricsEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
$log.info("in_kube_nodes::parse_and_emit_records: number of GPU node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
- router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
- insightsMetricsEventStream = MultiEventStream.new
+ router.emit_stream(@insightsMetricsTag, insightsMetricsEventStream) if insightsMetricsEventStream
+ insightsMetricsEventStream = Fluent::MultiEventStream.new
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
$log.info("kubeNodeInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
end
@@ -337,15 +331,15 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
$log.info("in_kube_node::parse_and_emit_records: number of node inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}")
router.emit_stream(@tag, eventStream) if eventStream
$log.info("in_kube_node::parse_and_emit_records: number of mdm node inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}")
- router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
+ router.emit_stream(@MDMKubeNodeInventoryTag, eventStream) if eventStream
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
$log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
end
- eventStream = nil
+ eventStream = nil
end
if containerNodeInventoryEventStream.count > 0
$log.info("in_kube_node::parse_and_emit_records: number of container node inventory records emitted #{containerNodeInventoryEventStream.count} @ #{Time.now.utc.iso8601}")
- router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
+ router.emit_stream(@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
containerNodeInventoryEventStream = nil
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
$log.info("containerNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
@@ -354,7 +348,7 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
if kubePerfEventStream.count > 0
$log.info("in_kube_nodes::parse_and_emit_records: number of node perf metric records emitted #{kubePerfEventStream.count} @ #{Time.now.utc.iso8601}")
- router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
+ router.emit_stream(@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
kubePerfEventStream = nil
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
$log.info("kubeNodePerfInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
@@ -362,7 +356,7 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
end
if insightsMetricsEventStream.count > 0
$log.info("in_kube_nodes::parse_and_emit_records: number of GPU node perf metric records emitted #{insightsMetricsEventStream.count} @ #{Time.now.utc.iso8601}")
- router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+ router.emit_stream(@insightsMetricsTag, insightsMetricsEventStream) if insightsMetricsEventStream
insightsMetricsEventStream = nil
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
$log.info("kubeNodeInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
@@ -513,10 +507,8 @@ def getNodeTelemetryProps(item)
$log.warn "in_kube_nodes::getContainerNodeIngetNodeTelemetryPropsventoryRecord:Failed: #{errorStr}"
end
return properties
- end
+ end
end # Kube_Node_Input
-
-
class NodeStatsCache
# inner class for caching implementation (CPU and memory caching is handled the exact same way, so logic to do so is moved to a private inner class)
# (to reduce code duplication)
@@ -586,6 +578,5 @@ def cpu()
def mem()
return @@memCache
end
- end
-
+ end
end # module
diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index 5256eb159..5598602cd 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -1,16 +1,17 @@
#!/usr/local/bin/ruby
# frozen_string_literal: true
-module Fluent
+require 'fluent/plugin/input'
+
+module Fluent::Plugin
require_relative "podinventory_to_mdm"
class Kube_PodInventory_Input < Input
- Plugin.register_input("kubepodinventory", self)
+ Fluent::Plugin.register_input("kube_podinventory", self)
@@MDMKubePodInventoryTag = "mdm.kubepodinventory"
@@hostName = (OMS::Common.get_hostname)
- @@kubeperfTag = "oms.api.KubePerf"
- @@kubeservicesTag = "oms.containerinsights.KubeServices"
+
def initialize
super
@@ -38,19 +39,25 @@ def initialize
@winContainerCount = 0
@controllerData = {}
@podInventoryE2EProcessingLatencyMs = 0
- @podsAPIE2ELatencyMs = 0
+ @podsAPIE2ELatencyMs = 0
+
+ @kubeperfTag = "oneagent.containerInsights.LINUX_PERF_BLOB"
+ @kubeservicesTag = "oneagent.containerInsights.KUBE_SERVICES_BLOB"
+ @containerInventoryTag = "oneagent.containerInsights.CONTAINER_INVENTORY_BLOB"
+ @insightsMetricsTag = "oneagent.containerInsights.INSIGHTS_METRICS_BLOB"
end
config_param :run_interval, :time, :default => 60
- config_param :tag, :string, :default => "oms.containerinsights.KubePodInventory"
+ config_param :tag, :string, :default => "oneagent.containerInsights.KUBE_POD_INVENTORY_BLOB"
def configure(conf)
super
@inventoryToMdmConvertor = Inventory2MdmConvertor.new()
end
- def start
+ def start
if @run_interval
+ super
if !ENV["PODS_CHUNK_SIZE"].nil? && !ENV["PODS_CHUNK_SIZE"].empty? && ENV["PODS_CHUNK_SIZE"].to_i > 0
@PODS_CHUNK_SIZE = ENV["PODS_CHUNK_SIZE"].to_i
else
@@ -58,7 +65,7 @@ def start
$log.warn("in_kube_podinventory::start: setting to default value since got PODS_CHUNK_SIZE nil or empty")
@PODS_CHUNK_SIZE = 1000
end
- $log.info("in_kube_podinventory::start : PODS_CHUNK_SIZE @ #{@PODS_CHUNK_SIZE}")
+ $log.info("in_kube_podinventory::start: PODS_CHUNK_SIZE @ #{@PODS_CHUNK_SIZE}")
if !ENV["PODS_EMIT_STREAM_BATCH_SIZE"].nil? && !ENV["PODS_EMIT_STREAM_BATCH_SIZE"].empty? && ENV["PODS_EMIT_STREAM_BATCH_SIZE"].to_i > 0
@PODS_EMIT_STREAM_BATCH_SIZE = ENV["PODS_EMIT_STREAM_BATCH_SIZE"].to_i
@@ -67,8 +74,7 @@ def start
$log.warn("in_kube_podinventory::start: setting to default value since got PODS_EMIT_STREAM_BATCH_SIZE nil or empty")
@PODS_EMIT_STREAM_BATCH_SIZE = 200
end
- $log.info("in_kube_podinventory::start : PODS_EMIT_STREAM_BATCH_SIZE @ #{@PODS_EMIT_STREAM_BATCH_SIZE}")
-
+ $log.info("in_kube_podinventory::start: PODS_EMIT_STREAM_BATCH_SIZE @ #{@PODS_EMIT_STREAM_BATCH_SIZE}")
@finished = false
@condition = ConditionVariable.new
@mutex = Mutex.new
@@ -84,6 +90,7 @@ def shutdown
@condition.signal
}
@thread.join
+ super # This super must be at the end of shutdown method
end
end
@@ -100,7 +107,8 @@ def enumerate(podList = nil)
batchTime = currentTime.utc.iso8601
serviceRecords = []
@podInventoryE2EProcessingLatencyMs = 0
- podInventoryStartTime = (Time.now.to_f * 1000).to_i
+ podInventoryStartTime = (Time.now.to_f * 1000).to_i
+
# Get services first so that we dont need to make a call for very chunk
$log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
serviceInfo = KubernetesApiClient.getKubeResourceInfo("services")
@@ -189,12 +197,13 @@ def enumerate(podList = nil)
end
def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batchTime = Time.utc.iso8601)
- currentTime = Time.now
- emitTime = currentTime.to_f
+ currentTime = Time.now
+ emitTime = Fluent::Engine.now
#batchTime = currentTime.utc.iso8601
- eventStream = MultiEventStream.new
- kubePerfEventStream = MultiEventStream.new
- insightsMetricsEventStream = MultiEventStream.new
+ eventStream = Fluent::MultiEventStream.new
+ containerInventoryStream = Fluent::MultiEventStream.new
+ kubePerfEventStream = Fluent::MultiEventStream.new
+ insightsMetricsEventStream = Fluent::MultiEventStream.new
@@istestvar = ENV["ISTEST"]
begin #begin block start
@@ -205,13 +214,8 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
podInventoryRecords = getPodInventoryRecords(item, serviceRecords, batchTime)
podInventoryRecords.each do |record|
if !record.nil?
- wrapper = {
- "DataType" => "KUBE_POD_INVENTORY_BLOB",
- "IPName" => "ContainerInsights",
- "DataItems" => [record.each { |k, v| record[k] = v }],
- }
- eventStream.add(emitTime, wrapper) if wrapper
- @inventoryToMdmConvertor.process_pod_inventory_record(wrapper)
+ eventStream.add(emitTime, record) if record
+ @inventoryToMdmConvertor.process_pod_inventory_record(record)
end
end
# Setting this flag to true so that we can send ContainerInventory records for containers
@@ -228,13 +232,8 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
# Send container inventory records for containers on windows nodes
@winContainerCount += containerInventoryRecords.length
containerInventoryRecords.each do |cirecord|
- if !cirecord.nil?
- ciwrapper = {
- "DataType" => "CONTAINER_INVENTORY_BLOB",
- "IPName" => "ContainerInsights",
- "DataItems" => [cirecord.each { |k, v| cirecord[k] = v }],
- }
- eventStream.add(emitTime, ciwrapper) if ciwrapper
+ if !cirecord.nil?
+ containerInventoryStream.add(emitTime, cirecord) if cirecord
end
end
end
@@ -246,7 +245,7 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
$log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
end
router.emit_stream(@tag, eventStream) if eventStream
- eventStream = MultiEventStream.new
+ eventStream = Fluent::MultiEventStream.new
end
#container perf records
@@ -256,19 +255,17 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(item, "limits", "cpu", "cpuLimitNanoCores", batchTime))
containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(item, "limits", "memory", "memoryLimitBytes", batchTime))
- containerMetricDataItems.each do |record|
- record["DataType"] = "LINUX_PERF_BLOB"
- record["IPName"] = "LogManagement"
+ containerMetricDataItems.each do |record|
kubePerfEventStream.add(emitTime, record) if record
end
if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && kubePerfEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE
$log.info("in_kube_podinventory::parse_and_emit_records: number of container perf records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
- router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
+ router.emit_stream(@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
$log.info("kubeContainerPerfEventEmitStreamSuccess @ #{Time.now.utc.iso8601}")
end
- kubePerfEventStream = MultiEventStream.new
+ kubePerfEventStream = Fluent::MultiEventStream.new
end
# container GPU records
@@ -277,13 +274,8 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(item, "limits", "nvidia.com/gpu", "containerGpuLimits", batchTime))
containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(item, "requests", "amd.com/gpu", "containerGpuRequests", batchTime))
containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(item, "limits", "amd.com/gpu", "containerGpuLimits", batchTime))
- containerGPUInsightsMetricsDataItems.each do |insightsMetricsRecord|
- wrapper = {
- "DataType" => "INSIGHTS_METRICS_BLOB",
- "IPName" => "ContainerInsights",
- "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
- }
- insightsMetricsEventStream.add(emitTime, wrapper) if wrapper
+ containerGPUInsightsMetricsDataItems.each do |insightsMetricsRecord|
+ insightsMetricsEventStream.add(emitTime, insightsMetricsRecord) if insightsMetricsRecord
end
if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && insightsMetricsEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE
@@ -291,8 +283,8 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
$log.info("kubePodInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
end
- router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
- insightsMetricsEventStream = MultiEventStream.new
+ router.emit_stream(@insightsMetricsTag, insightsMetricsEventStream) if insightsMetricsEventStream
+ insightsMetricsEventStream = Fluent::MultiEventStream.new
end
end #podInventory block end
@@ -305,9 +297,18 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
eventStream = nil
end
+ if containerInventoryStream.count > 0
+ $log.info("in_kube_podinventory::parse_and_emit_records: number of windows container inventory records emitted #{containerInventoryStream.count} @ #{Time.now.utc.iso8601}")
+ router.emit_stream(@containerInventoryTag, containerInventoryStream) if containerInventoryStream
+ if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
+ $log.info("kubeWindowsContainerInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+ end
+ containerInventoryStream = nil
+ end
+
if kubePerfEventStream.count > 0
$log.info("in_kube_podinventory::parse_and_emit_records: number of perf records emitted #{kubePerfEventStream.count} @ #{Time.now.utc.iso8601}")
- router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
+ router.emit_stream(@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
kubePerfEventStream = nil
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
$log.info("kubeContainerPerfEventEmitStreamSuccess @ #{Time.now.utc.iso8601}")
@@ -316,7 +317,7 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
if insightsMetricsEventStream.count > 0
$log.info("in_kube_podinventory::parse_and_emit_records: number of insights metrics records emitted #{insightsMetricsEventStream.count} @ #{Time.now.utc.iso8601}")
- router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+ router.emit_stream(@insightsMetricsTag, insightsMetricsEventStream) if insightsMetricsEventStream
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
$log.info("kubePodInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
end
@@ -327,7 +328,7 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
@log.info "Sending pod inventory mdm records to out_mdm"
pod_inventory_mdm_records = @inventoryToMdmConvertor.get_pod_inventory_mdm_records(batchTime)
@log.info "pod_inventory_mdm_records.size #{pod_inventory_mdm_records.size}"
- mdm_pod_inventory_es = MultiEventStream.new
+ mdm_pod_inventory_es = Fluent::MultiEventStream.new
pod_inventory_mdm_records.each { |pod_inventory_mdm_record|
mdm_pod_inventory_es.add(batchTime, pod_inventory_mdm_record) if pod_inventory_mdm_record
} if pod_inventory_mdm_records
@@ -335,22 +336,17 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
end
if continuationToken.nil? # sending kube services inventory records
- kubeServicesEventStream = MultiEventStream.new
+ kubeServicesEventStream = Fluent::MultiEventStream.new
serviceRecords.each do |kubeServiceRecord|
if !kubeServiceRecord.nil?
# adding before emit to reduce memory foot print
kubeServiceRecord["ClusterId"] = KubernetesApiClient.getClusterId
- kubeServiceRecord["ClusterName"] = KubernetesApiClient.getClusterName
- kubeServicewrapper = {
- "DataType" => "KUBE_SERVICES_BLOB",
- "IPName" => "ContainerInsights",
- "DataItems" => [kubeServiceRecord.each { |k, v| kubeServiceRecord[k] = v }],
- }
- kubeServicesEventStream.add(emitTime, kubeServicewrapper) if kubeServicewrapper
+ kubeServiceRecord["ClusterName"] = KubernetesApiClient.getClusterName
+ kubeServicesEventStream.add(emitTime, kubeServiceRecord) if kubeServiceRecord
if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && kubeServicesEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE
$log.info("in_kube_podinventory::parse_and_emit_records: number of service records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
- router.emit_stream(@@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream
- kubeServicesEventStream = MultiEventStream.new
+ router.emit_stream(@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream
+ kubeServicesEventStream = Fluent::MultiEventStream.new
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
$log.info("kubeServicesEventEmitStreamSuccess @ #{Time.now.utc.iso8601}")
end
@@ -360,7 +356,7 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
if kubeServicesEventStream.count > 0
$log.info("in_kube_podinventory::parse_and_emit_records : number of service records emitted #{kubeServicesEventStream.count} @ #{Time.now.utc.iso8601}")
- router.emit_stream(@@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream
+ router.emit_stream(@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
$log.info("kubeServicesEventEmitStreamSuccess @ #{Time.now.utc.iso8601}")
end
@@ -652,6 +648,6 @@ def getServiceNameFromLabels(namespace, labels, serviceRecords)
ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
end
return serviceName
- end
+ end
end # Kube_Pod_Input
end # module
diff --git a/source/plugins/ruby/in_kube_pvinventory.rb b/source/plugins/ruby/in_kube_pvinventory.rb
index 4efe86f61..40eebac8a 100644
--- a/source/plugins/ruby/in_kube_pvinventory.rb
+++ b/source/plugins/ruby/in_kube_pvinventory.rb
@@ -1,6 +1,11 @@
-module Fluent
+#!/usr/local/bin/ruby
+# frozen_string_literal: true
+
+require 'fluent/plugin/input'
+
+module Fluent::Plugin
class Kube_PVInventory_Input < Input
- Plugin.register_input("kubepvinventory", self)
+ Fluent::Plugin.register_input("kube_pvinventory", self)
@@hostName = (OMS::Common.get_hostname)
@@ -22,14 +27,15 @@ def initialize
end
config_param :run_interval, :time, :default => 60
- config_param :tag, :string, :default => "oms.containerinsights.KubePVInventory"
+ config_param :tag, :string, :default => "oneagent.containerInsights.KUBE_PV_INVENTORY_BLOB"
def configure(conf)
super
end
- def start
+ def start
if @run_interval
+ super
@finished = false
@condition = ConditionVariable.new
@mutex = Mutex.new
@@ -45,6 +51,7 @@ def shutdown
@condition.signal
}
@thread.join
+ super
end
end
@@ -54,7 +61,7 @@ def enumerate
telemetryFlush = false
@pvTypeToCountHash = {}
currentTime = Time.now
- batchTime = currentTime.utc.iso8601
+ batchTime = currentTime.utc.iso8601
continuationToken = nil
$log.info("in_kube_pvinventory::enumerate : Getting PVs from Kube API @ #{Time.now.utc.iso8601}")
@@ -103,9 +110,9 @@ def enumerate
end # end enumerate
def parse_and_emit_records(pvInventory, batchTime = Time.utc.iso8601)
- currentTime = Time.now
- emitTime = currentTime.to_f
- eventStream = MultiEventStream.new
+ currentTime = Time.now
+ emitTime = Fluent::Engine.now
+ eventStream = Fluent::MultiEventStream.new
@@istestvar = ENV["ISTEST"]
begin
records = []
@@ -145,13 +152,8 @@ def parse_and_emit_records(pvInventory, batchTime = Time.utc.iso8601)
end
records.each do |record|
- if !record.nil?
- wrapper = {
- "DataType" => "KUBE_PV_INVENTORY_BLOB",
- "IPName" => "ContainerInsights",
- "DataItems" => [record.each { |k, v| record[k] = v }],
- }
- eventStream.add(emitTime, wrapper) if wrapper
+ if !record.nil?
+ eventStream.add(emitTime, record)
end
end
@@ -250,7 +252,6 @@ def run_periodic
@mutex.lock
end
@mutex.unlock
- end
-
+ end
end # Kube_PVInventory_Input
-end # module
\ No newline at end of file
+end # module
diff --git a/source/plugins/ruby/in_kubestate_deployments.rb b/source/plugins/ruby/in_kubestate_deployments.rb
index 27e4709a2..182c3ffc1 100644
--- a/source/plugins/ruby/in_kubestate_deployments.rb
+++ b/source/plugins/ruby/in_kubestate_deployments.rb
@@ -1,9 +1,11 @@
#!/usr/local/bin/ruby
# frozen_string_literal: true
-module Fluent
+require 'fluent/plugin/input'
+
+module Fluent::Plugin
class Kube_Kubestate_Deployments_Input < Input
- Plugin.register_input("kubestatedeployments", self)
+ Fluent::Plugin.register_input("kubestate_deployments", self)
@@istestvar = ENV["ISTEST"]
# telemetry - To keep telemetry cost reasonable, we keep track of the max deployments over a period of 15m
@@deploymentsCount = 0
@@ -36,14 +38,15 @@ def initialize
end
config_param :run_interval, :time, :default => 60
- config_param :tag, :string, :default => Constants::INSIGHTSMETRICS_FLUENT_TAG
+ config_param :tag, :string, :default => "oneagent.containerInsights.INSIGHTS_METRICS_BLOB"
def configure(conf)
super
end
- def start
+ def start
if @run_interval
+ super
if !ENV["DEPLOYMENTS_CHUNK_SIZE"].nil? && !ENV["DEPLOYMENTS_CHUNK_SIZE"].empty? && ENV["DEPLOYMENTS_CHUNK_SIZE"].to_i > 0
@DEPLOYMENTS_CHUNK_SIZE = ENV["DEPLOYMENTS_CHUNK_SIZE"].to_i
else
@@ -52,11 +55,11 @@ def start
@DEPLOYMENTS_CHUNK_SIZE = 500
end
$log.info("in_kubestate_deployments::start : DEPLOYMENTS_CHUNK_SIZE @ #{@DEPLOYMENTS_CHUNK_SIZE}")
-
+
@finished = false
@condition = ConditionVariable.new
@mutex = Mutex.new
- @thread = Thread.new(&method(:run_periodic))
+ @thread = Thread.new(&method(:run_periodic))
end
end
@@ -67,6 +70,7 @@ def shutdown
@condition.signal
}
@thread.join
+ super # This super must be at the end of shutdown method
end
end
@@ -77,8 +81,8 @@ def enumerate
batchTime = currentTime.utc.iso8601
#set the running total for this batch to 0
- @deploymentsRunningTotal = 0
-
+ @deploymentsRunningTotal = 0
+
# Initializing continuation token to nil
continuationToken = nil
$log.info("in_kubestate_deployments::enumerate : Getting deployments from Kube API @ #{Time.now.utc.iso8601}")
@@ -126,7 +130,7 @@ def enumerate
def parse_and_emit_records(deployments, batchTime = Time.utc.iso8601)
metricItems = []
- insightsMetricsEventStream = MultiEventStream.new
+ insightsMetricsEventStream = Fluent::MultiEventStream.new
begin
metricInfo = deployments
metricInfo["items"].each do |deployment|
@@ -181,17 +185,12 @@ def parse_and_emit_records(deployments, batchTime = Time.utc.iso8601)
metricItems.push(metricItem)
end
- time = Time.now.to_f
- metricItems.each do |insightsMetricsRecord|
- wrapper = {
- "DataType" => "INSIGHTS_METRICS_BLOB",
- "IPName" => "ContainerInsights",
- "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
- }
- insightsMetricsEventStream.add(time, wrapper) if wrapper
+ time = Fluent::Engine.now
+ metricItems.each do |insightsMetricsRecord|
+ insightsMetricsEventStream.add(time, insightsMetricsRecord) if insightsMetricsRecord
end
- router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+ router.emit_stream(@tag, insightsMetricsEventStream) if insightsMetricsEventStream
$log.info("successfully emitted #{metricItems.length()} kube_state_deployment metrics")
@deploymentsRunningTotal = @deploymentsRunningTotal + metricItems.length()
@@ -234,6 +233,6 @@ def run_periodic
@mutex.lock
end
@mutex.unlock
- end
+ end
end
end
diff --git a/source/plugins/ruby/in_kubestate_hpa.rb b/source/plugins/ruby/in_kubestate_hpa.rb
index afecf8e3b..8f60bfb72 100644
--- a/source/plugins/ruby/in_kubestate_hpa.rb
+++ b/source/plugins/ruby/in_kubestate_hpa.rb
@@ -1,9 +1,11 @@
#!/usr/local/bin/ruby
# frozen_string_literal: true
-module Fluent
+require 'fluent/plugin/input'
+
+module Fluent::Plugin
class Kube_Kubestate_HPA_Input < Input
- Plugin.register_input("kubestatehpa", self)
+ Fluent::Plugin.register_input("kubestate_hpa", self)
@@istestvar = ENV["ISTEST"]
def initialize
@@ -16,7 +18,7 @@ def initialize
require_relative "oms_common"
require_relative "omslog"
require_relative "ApplicationInsightsUtility"
- require_relative "constants"
+ require_relative "constants"
# refer tomlparser-agent-config for defaults
# this configurable via configmap
@@ -33,14 +35,15 @@ def initialize
end
config_param :run_interval, :time, :default => 60
- config_param :tag, :string, :default => Constants::INSIGHTSMETRICS_FLUENT_TAG
+ config_param :tag, :string, :default => "oneagent.containerInsights.INSIGHTS_METRICS_BLOB"
def configure(conf)
super
end
- def start
+ def start
if @run_interval
+ super
if !ENV["HPA_CHUNK_SIZE"].nil? && !ENV["HPA_CHUNK_SIZE"].empty? && ENV["HPA_CHUNK_SIZE"].to_i > 0
@HPA_CHUNK_SIZE = ENV["HPA_CHUNK_SIZE"].to_i
else
@@ -64,6 +67,7 @@ def shutdown
@condition.signal
}
@thread.join
+ super
end
end
@@ -74,7 +78,7 @@ def enumerate
batchTime = currentTime.utc.iso8601
@hpaCount = 0
-
+
# Initializing continuation token to nil
continuationToken = nil
$log.info("in_kubestate_hpa::enumerate : Getting HPAs from Kube API @ #{Time.now.utc.iso8601}")
@@ -113,7 +117,7 @@ def enumerate
def parse_and_emit_records(hpas, batchTime = Time.utc.iso8601)
metricItems = []
- insightsMetricsEventStream = MultiEventStream.new
+ insightsMetricsEventStream = Fluent::MultiEventStream.new
begin
metricInfo = hpas
metricInfo["items"].each do |hpa|
@@ -181,17 +185,12 @@ def parse_and_emit_records(hpas, batchTime = Time.utc.iso8601)
metricItems.push(metricItem)
end
- time = Time.now.to_f
- metricItems.each do |insightsMetricsRecord|
- wrapper = {
- "DataType" => "INSIGHTS_METRICS_BLOB",
- "IPName" => "ContainerInsights",
- "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
- }
- insightsMetricsEventStream.add(time, wrapper) if wrapper
+ time = Fluent::Engine.now
+ metricItems.each do |insightsMetricsRecord|
+ insightsMetricsEventStream.add(time, insightsMetricsRecord) if insightsMetricsRecord
end
- router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+ router.emit_stream(@tag, insightsMetricsEventStream) if insightsMetricsEventStream
$log.info("successfully emitted #{metricItems.length()} kube_state_hpa metrics")
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
$log.info("kubestatehpaInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
@@ -232,6 +231,6 @@ def run_periodic
@mutex.lock
end
@mutex.unlock
- end
+ end
end
end
diff --git a/source/plugins/ruby/in_win_cadvisor_perf.rb b/source/plugins/ruby/in_win_cadvisor_perf.rb
index 9c267cf4f..61e823ea6 100644
--- a/source/plugins/ruby/in_win_cadvisor_perf.rb
+++ b/source/plugins/ruby/in_win_cadvisor_perf.rb
@@ -1,9 +1,11 @@
#!/usr/local/bin/ruby
# frozen_string_literal: true
-module Fluent
+require 'fluent/plugin/input'
+
+module Fluent::Plugin
class Win_CAdvisor_Perf_Input < Input
- Plugin.register_input("wincadvisorperf", self)
+ Fluent::Plugin.register_input("win_cadvisor_perf", self)
@@winNodes = []
@@ -18,10 +20,11 @@ def initialize
require_relative "oms_common"
require_relative "omslog"
require_relative "constants"
+ @insightsMetricsTag = "oneagent.containerInsights.INSIGHTS_METRICS_BLOB"
end
config_param :run_interval, :time, :default => 60
- config_param :tag, :string, :default => "oms.api.wincadvisorperf"
+ config_param :tag, :string, :default => "oneagent.containerInsights.LINUX_PERF_BLOB"
config_param :mdmtag, :string, :default => "mdm.cadvisorperf"
def configure(conf)
@@ -50,11 +53,11 @@ def shutdown
end
def enumerate()
- time = Time.now.to_f
+ time = Fluent::Engine.now
begin
timeDifference = (DateTime.now.to_time.to_i - @@winNodeQueryTimeTracker).abs
timeDifferenceInMinutes = timeDifference / 60
- @@istestvar = ENV["ISTEST"]
+ @@istestvar = ENV["ISTEST"]
#Resetting this cache so that it is populated with the current set of containers with every call
CAdvisorMetricsAPIClient.resetWinContainerIdCache()
@@ -68,12 +71,10 @@ def enumerate()
@@winNodeQueryTimeTracker = DateTime.now.to_time.to_i
end
@@winNodes.each do |winNode|
- eventStream = MultiEventStream.new
+ eventStream = Fluent::MultiEventStream.new
metricData = CAdvisorMetricsAPIClient.getMetrics(winNode: winNode, metricTime: Time.now.utc.iso8601)
metricData.each do |record|
if !record.empty?
- record["DataType"] = "LINUX_PERF_BLOB"
- record["IPName"] = "LogManagement"
eventStream.add(time, record) if record
end
end
@@ -88,18 +89,13 @@ def enumerate()
begin
containerGPUusageInsightsMetricsDataItems = []
containerGPUusageInsightsMetricsDataItems.concat(CAdvisorMetricsAPIClient.getInsightsMetrics(winNode: winNode, metricTime: Time.now.utc.iso8601))
- insightsMetricsEventStream = MultiEventStream.new
+ insightsMetricsEventStream = Fluent::MultiEventStream.new
containerGPUusageInsightsMetricsDataItems.each do |insightsMetricsRecord|
- wrapper = {
- "DataType" => "INSIGHTS_METRICS_BLOB",
- "IPName" => "ContainerInsights",
- "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
- }
- insightsMetricsEventStream.add(time, wrapper) if wrapper
+ insightsMetricsEventStream.add(time, insightsMetricsRecord) if insightsMetricsRecord
end
- router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+ router.emit_stream(@insightsMetricsTag, insightsMetricsEventStream) if insightsMetricsEventStream
router.emit_stream(@mdmtag, insightsMetricsEventStream) if insightsMetricsEventStream
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
$log.info("winCAdvisorInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
diff --git a/source/plugins/ruby/out_health_forward.rb b/source/plugins/ruby/out_health_forward.rb
index 6fcfe368b..59eed97da 100644
--- a/source/plugins/ruby/out_health_forward.rb
+++ b/source/plugins/ruby/out_health_forward.rb
@@ -15,469 +15,593 @@
# limitations under the License.
#
-require 'base64'
-require 'socket'
-require 'fileutils'
-
-require 'cool.io'
-
require 'fluent/output'
require 'fluent/config/error'
+require 'fluent/clock'
+require 'fluent/tls'
+require 'base64'
+require 'forwardable'
-module Fluent
- class ForwardOutputError < StandardError
- end
-
- class ForwardOutputResponseError < ForwardOutputError
- end
+require 'fluent/compat/socket_util'
+require 'fluent/plugin/out_forward/handshake_protocol'
+require 'fluent/plugin/out_forward/load_balancer'
+require 'fluent/plugin/out_forward/socket_cache'
+require 'fluent/plugin/out_forward/failure_detector'
+require 'fluent/plugin/out_forward/error'
+require 'fluent/plugin/out_forward/connection_manager'
+require 'fluent/plugin/out_forward/ack_handler'
- class ForwardOutputConnectionClosedError < ForwardOutputError
- end
+module Fluent::Plugin
+ class HealthForwardOutput < Output
+ Fluent::Plugin.register_output('health_forward', self)
- class ForwardOutputACKTimeoutError < ForwardOutputResponseError
- end
+ helpers :socket, :server, :timer, :thread, :compat_parameters, :service_discovery
- class HealthForwardOutput < ObjectBufferedOutput
- Plugin.register_output('health_forward', self)
+ LISTEN_PORT = 25227
- def initialize
- super
- require 'fluent/plugin/socket_util'
- @nodes = [] #=> [Node]
- end
+ desc 'The transport protocol.'
+ config_param :transport, :enum, list: [:tcp, :tls], default: :tcp
+ # TODO: TLS session cache/tickets
desc 'The timeout time when sending event logs.'
config_param :send_timeout, :time, default: 60
- desc 'The transport protocol to use for heartbeats.(udp,tcp,none)'
- config_param :heartbeat_type, default: :udp do |val|
- case val.downcase
- when 'tcp'
- :tcp
- when 'udp'
- :udp
- when 'none'
- :none
- else
- raise ConfigError, "forward output heartbeat type should be 'tcp', 'udp', or 'none'"
- end
- end
+ desc 'The timeout time for socket connect'
+ config_param :connect_timeout, :time, default: nil
+ # TODO: add linger_timeout, recv_timeout
+
+ desc 'The protocol to use for heartbeats (default is the same with "transport").'
+ config_param :heartbeat_type, :enum, list: [:transport, :tcp, :udp, :none], default: :transport
desc 'The interval of the heartbeat packer.'
config_param :heartbeat_interval, :time, default: 1
desc 'The wait time before accepting a server fault recovery.'
config_param :recover_wait, :time, default: 10
desc 'The hard timeout used to detect server failure.'
config_param :hard_timeout, :time, default: 60
- desc 'Set TTL to expire DNS cache in seconds.'
- config_param :expire_dns_cache, :time, default: nil # 0 means disable cache
desc 'The threshold parameter used to detect server faults.'
config_param :phi_threshold, :integer, default: 16
desc 'Use the "Phi accrual failure detector" to detect server failure.'
config_param :phi_failure_detector, :bool, default: true
- # if any options added that requires extended forward api, fix @extend_internal_protocol
-
desc 'Change the protocol to at-least-once.'
config_param :require_ack_response, :bool, default: false # require in_forward to respond with ack
- desc 'This option is used when require_ack_response is true.'
- config_param :ack_response_timeout, :time, default: 190 # 0 means do not wait for ack responses
+
+ ## The reason of default value of :ack_response_timeout:
# Linux default tcp_syn_retries is 5 (in many environment)
# 3 + 6 + 12 + 24 + 48 + 96 -> 189 (sec)
+ desc 'This option is used when require_ack_response is true.'
+ config_param :ack_response_timeout, :time, default: 190
+
+ desc 'The interval while reading data from server'
+ config_param :read_interval_msec, :integer, default: 50 # 50ms
+ desc 'Reading data size from server'
+ config_param :read_length, :size, default: 512 # 512bytes
+
+ desc 'Set TTL to expire DNS cache in seconds.'
+ config_param :expire_dns_cache, :time, default: nil # 0 means disable cache
desc 'Enable client-side DNS round robin.'
config_param :dns_round_robin, :bool, default: false # heartbeat_type 'udp' is not available for this
+ desc 'Ignore DNS resolution and errors at startup time.'
+ config_param :ignore_network_errors_at_startup, :bool, default: false
+
+ desc 'Verify that a connection can be made with one of out_forward nodes at the time of startup.'
+ config_param :verify_connection_at_startup, :bool, default: false
+
+ desc 'Compress buffered data.'
+ config_param :compress, :enum, list: [:text, :gzip], default: :text
+
+ desc 'The default version of TLS transport.'
+ config_param :tls_version, :enum, list: Fluent::TLS::SUPPORTED_VERSIONS, default: Fluent::TLS::DEFAULT_VERSION
+ desc 'The cipher configuration of TLS transport.'
+ config_param :tls_ciphers, :string, default: Fluent::TLS::CIPHERS_DEFAULT
+ desc 'Skip all verification of certificates or not.'
+ config_param :tls_insecure_mode, :bool, default: false
+ desc 'Allow self signed certificates or not.'
+ config_param :tls_allow_self_signed_cert, :bool, default: false
+ desc 'Verify hostname of servers and certificates or not in TLS transport.'
+ config_param :tls_verify_hostname, :bool, default: true
+ desc 'The additional CA certificate path for TLS.'
+ config_param :tls_ca_cert_path, :array, value_type: :string, default: nil
+ desc 'The additional certificate path for TLS.'
+ config_param :tls_cert_path, :array, value_type: :string, default: nil
+ desc 'The client certificate path for TLS.'
+ config_param :tls_client_cert_path, :string, default: nil
+ desc 'The client private key path for TLS.'
+ config_param :tls_client_private_key_path, :string, default: nil
+ desc 'The client private key passphrase for TLS.'
+ config_param :tls_client_private_key_passphrase, :string, default: nil, secret: true
+ desc 'The certificate thumbprint for searching from Windows system certstore.'
+ config_param :tls_cert_thumbprint, :string, default: nil, secret: true
+ desc 'The certificate logical store name on Windows system certstore.'
+ config_param :tls_cert_logical_store_name, :string, default: nil
+ desc 'Enable to use certificate enterprise store on Windows system certstore.'
+ config_param :tls_cert_use_enterprise_store, :bool, default: true
+ desc "Enable keepalive connection."
+ config_param :keepalive, :bool, default: false
+ desc "Expired time of keepalive. Default value is nil, which means to keep connection as long as possible"
+ config_param :keepalive_timeout, :time, default: nil
+
+ config_section :security, required: false, multi: false do
+ desc 'The hostname'
+ config_param :self_hostname, :string
+ desc 'Shared key for authentication'
+ config_param :shared_key, :string, secret: true
+ end
+
+ config_section :server, param_name: :servers do
+ desc "The IP address or host name of the server."
+ config_param :host, :string
+ desc "The name of the server. Used for logging and certificate verification in TLS transport (when host is address)."
+ config_param :name, :string, default: nil
+ desc "The port number of the host."
+ config_param :port, :integer, default: LISTEN_PORT
+ desc "The shared key per server."
+ config_param :shared_key, :string, default: nil, secret: true
+ desc "The username for authentication."
+ config_param :username, :string, default: ''
+ desc "The password for authentication."
+ config_param :password, :string, default: '', secret: true
+ desc "Marks a node as the standby node for an Active-Standby model between Fluentd nodes."
+ config_param :standby, :bool, default: false
+ desc "The load balancing weight."
+ config_param :weight, :integer, default: 60
+ end
+
attr_reader :nodes
- config_param :port, :integer, default: DEFAULT_LISTEN_PORT, deprecated: "User host xxx instead."
- config_param :host, :string, default: nil, deprecated: "Use port xxx instead."
- desc 'Skip network related error, e.g. DNS error, during plugin setup'
- config_param :skip_network_error_at_init, :bool, :default => false
+ config_param :port, :integer, default: LISTEN_PORT, obsoleted: "User section instead."
+ config_param :host, :string, default: nil, obsoleted: "Use section instead."
+ config_section :buffer do
+ config_set_default :chunk_keys, ["tag"]
+ end
- attr_accessor :extend_internal_protocol
+ attr_reader :read_interval, :recover_sample_size
- def configure(conf)
+ def initialize
super
- # backward compatibility
- if host = conf['host']
- port = conf['port']
- port = port ? port.to_i : DEFAULT_LISTEN_PORT
- e = conf.add_element('server')
- e['host'] = host
- e['port'] = port.to_s
- end
+ @nodes = [] #=> [Node]
+ @loop = nil
+ @thread = nil
- recover_sample_size = @recover_wait / @heartbeat_interval
+ @usock = nil
+ @keep_alive_watcher_interval = 5 # TODO
+ @suspend_flush = false
+ end
- # add options here if any options addes which uses extended protocol
- @extend_internal_protocol = if @require_ack_response
- true
- else
- false
- end
+ def configure(conf)
+ compat_parameters_convert(conf, :buffer, default_chunk_key: 'tag')
- if @dns_round_robin
- if @heartbeat_type == :udp
- raise ConfigError, "forward output heartbeat type must be 'tcp' or 'none' to use dns_round_robin option"
- end
- end
+ super
- conf.elements.each {|e|
- next if e.name != "server"
+ unless @chunk_key_tag
+ raise Fluent::ConfigError, "buffer chunk key must include 'tag' for forward output"
+ end
- host = e['host']
- port = e['port']
- port = port ? port.to_i : DEFAULT_LISTEN_PORT
+ @read_interval = @read_interval_msec / 1000.0
+ @recover_sample_size = @recover_wait / @heartbeat_interval
- weight = e['weight']
- weight = weight ? weight.to_i : 60
+ if @heartbeat_type == :tcp
+ log.warn "'heartbeat_type tcp' is deprecated. use 'transport' instead."
+ @heartbeat_type = :transport
+ end
- standby = !!e['standby']
+ if @dns_round_robin && @heartbeat_type == :udp
+ raise Fluent::ConfigError, "forward output heartbeat type must be 'transport' or 'none' to use dns_round_robin option"
+ end
- name = e['name']
- unless name
- name = "#{host}:#{port}"
+ if @transport == :tls
+ # socket helper adds CA cert or signed certificate to same cert store internally so unify it in this place.
+ if @tls_cert_path && !@tls_cert_path.empty?
+ @tls_ca_cert_path = @tls_cert_path
+ end
+ if @tls_ca_cert_path && !@tls_ca_cert_path.empty?
+ @tls_ca_cert_path.each do |path|
+ raise Fluent::ConfigError, "specified cert path does not exist:#{path}" unless File.exist?(path)
+ raise Fluent::ConfigError, "specified cert path is not readable:#{path}" unless File.readable?(path)
+ end
end
- failure = FailureDetector.new(@heartbeat_interval, @hard_timeout, Time.now.to_i.to_f)
-
- node_conf = NodeConfig2.new(name, host, port, weight, standby, failure,
- @phi_threshold, recover_sample_size, @expire_dns_cache, @phi_failure_detector, @dns_round_robin, @skip_network_error_at_init)
+ if @tls_insecure_mode
+ log.warn "TLS transport is configured in insecure way"
+ @tls_verify_hostname = false
+ @tls_allow_self_signed_cert = true
+ end
- if @heartbeat_type == :none
- @nodes << NoneHeartbeatNode.new(log, node_conf)
+ if Fluent.windows?
+ if (@tls_cert_path || @tls_ca_cert_path) && @tls_cert_logical_store_name
+ raise Fluent::ConfigError, "specified both cert path and tls_cert_logical_store_name is not permitted"
+ end
else
- @nodes << Node.new(log, node_conf)
+ raise Fluent::ConfigError, "This parameter is for only Windows" if @tls_cert_logical_store_name
+ raise Fluent::ConfigError, "This parameter is for only Windows" if @tls_cert_thumbprint
end
- log.info "adding forwarding server '#{name}'", host: host, port: port, weight: weight, plugin_id: plugin_id
- }
+ end
+
+ @ack_handler = @require_ack_response ? AckHandler.new(timeout: @ack_response_timeout, log: @log, read_length: @read_length) : nil
+ socket_cache = @keepalive ? SocketCache.new(@keepalive_timeout, @log) : nil
+ @connection_manager = Fluent::Plugin::ForwardOutput::ConnectionManager.new(
+ log: @log,
+ secure: !!@security,
+ connection_factory: method(:create_transfer_socket),
+ socket_cache: socket_cache,
+ )
- if @nodes.empty?
- raise ConfigError, "forward output plugin requires at least one is required"
+ configs = []
+
+ # rewrite for using server as sd_static
+ conf.elements(name: 'server').each do |s|
+ s.name = 'service'
end
- end
- def start
- super
+ unless conf.elements(name: 'service').empty?
+ # To copy `services` element only
+ new_elem = Fluent::Config::Element.new('static_service_discovery', {}, {}, conf.elements(name: 'service'))
+ configs << { type: :static, conf: new_elem }
+ end
- @rand_seed = Random.new.seed
- rebuild_weight_array
- @rr = 0
+ conf.elements(name: 'service_discovery').each_with_index do |c, i|
+ configs << { type: @service_discovery[i][:@type], conf: c }
+ end
- unless @heartbeat_type == :none
- @loop = Coolio::Loop.new
+ service_discovery_create_manager(
+ :out_forward_service_discovery_watcher,
+ configurations: configs,
+ load_balancer: Fluent::Plugin::ForwardOutput::LoadBalancer.new(log),
+ custom_build_method: method(:build_node),
+ )
- if @heartbeat_type == :udp
- # assuming all hosts use udp
- @usock = SocketUtil.create_udp_socket(@nodes.first.host)
- @usock.fcntl(Fcntl::F_SETFL, Fcntl::O_NONBLOCK)
- @hb = HeartbeatHandler.new(@usock, method(:on_heartbeat))
- @loop.attach(@hb)
+ discovery_manager.services.each do |server|
+ # it's only for test
+ @nodes << server
+ unless @heartbeat_type == :none
+ begin
+ server.validate_host_resolution!
+ rescue => e
+ raise unless @ignore_network_errors_at_startup
+ log.warn "failed to resolve node name when configured", server: (server.name || server.host), error: e
+ server.disable!
+ end
end
+ end
- @timer = HeartbeatRequestTimer.new(@heartbeat_interval, method(:on_timer))
- @loop.attach(@timer)
+ unless @as_secondary
+ if @compress == :gzip && @buffer.compress == :text
+ @buffer.compress = :gzip
+ elsif @compress == :text && @buffer.compress == :gzip
+ log.info "buffer is compressed. If you also want to save the bandwidth of a network, Add `compress` configuration in "
+ end
+ end
- @thread = Thread.new(&method(:run))
+ if discovery_manager.services.empty?
+ raise Fluent::ConfigError, "forward output plugin requires at least one node is required. Add or "
end
- end
- def shutdown
- @finished = true
- if @loop
- @loop.watchers.each {|w| w.detach }
- @loop.stop
+ if !@keepalive && @keepalive_timeout
+ log.warn('The value of keepalive_timeout is ignored. if you want to use keepalive, please add `keepalive true` to your conf.')
end
- @thread.join if @thread
- @usock.close if @usock
+
+ raise Fluent::ConfigError, "ack_response_timeout must be a positive integer" if @ack_response_timeout < 1
end
- def run
- @loop.run if @loop
- rescue
- log.error "unexpected error", error: $!.to_s
- log.error_backtrace
+ def multi_workers_ready?
+ true
end
- def write_objects(tag, chunk)
- return if chunk.empty?
+ def prefer_delayed_commit
+ @require_ack_response
+ end
- error = nil
+ def overwrite_delayed_commit_timeout
+ # Output#start sets @delayed_commit_timeout by @buffer_config.delayed_commit_timeout
+ # But it should be overwritten by ack_response_timeout to rollback chunks after timeout
+ if @delayed_commit_timeout != @ack_response_timeout
+ log.info "delayed_commit_timeout is overwritten by ack_response_timeout"
+ @delayed_commit_timeout = @ack_response_timeout + 2 # minimum ack_reader IO.select interval is 1s
+ end
+ end
- wlen = @weight_array.length
- wlen.times do
- @rr = (@rr + 1) % wlen
- node = @weight_array[@rr]
+ def start
+ super
- if node.available?
+ unless @heartbeat_type == :none
+ if @heartbeat_type == :udp
+ @usock = socket_create_udp(discovery_manager.services.first.host, discovery_manager.services.first.port, nonblock: true)
+ server_create_udp(:out_forward_heartbeat_receiver, 0, socket: @usock, max_bytes: @read_length, &method(:on_udp_heatbeat_response_recv))
+ end
+ timer_execute(:out_forward_heartbeat_request, @heartbeat_interval, &method(:on_heartbeat_timer))
+ end
+
+ if @require_ack_response
+ overwrite_delayed_commit_timeout
+ thread_create(:out_forward_receiving_ack, &method(:ack_reader))
+ end
+
+ if @verify_connection_at_startup
+ discovery_manager.services.each do |node|
begin
- send_data(node, tag, chunk)
- return
- rescue
- # for load balancing during detecting crashed servers
- error = $! # use the latest error
+ node.verify_connection
+ rescue StandardError => e
+ log.fatal "forward's connection setting error: #{e.message}"
+ raise Fluent::UnrecoverableError, e.message
end
end
end
- if error
- raise error
- else
- raise "no nodes are available" # TODO message
+ if @keepalive
+ timer_execute(:out_forward_keep_alived_socket_watcher, @keep_alive_watcher_interval, &method(:on_purge_obsolete_socks))
end
end
- private
+ def close
+ if @usock
+ # close socket and ignore errors: this socket will not be used anyway.
+ @usock.close rescue nil
+ end
- def rebuild_weight_array
- standby_nodes, regular_nodes = @nodes.partition {|n|
- n.standby?
- }
+ super
+ end
- lost_weight = 0
- regular_nodes.each {|n|
- unless n.available?
- lost_weight += n.weight
- end
- }
- log.debug "rebuilding weight array", lost_weight: lost_weight
-
- if lost_weight > 0
- standby_nodes.each {|n|
- if n.available?
- regular_nodes << n
- log.warn "using standby node #{n.host}:#{n.port}", weight: n.weight
- lost_weight -= n.weight
- break if lost_weight <= 0
- end
- }
+ def stop
+ super
+
+ if @keepalive
+ @connection_manager.stop
end
+ end
+
+ def before_shutdown
+ super
+ @suspend_flush = true
+ end
+
+ def after_shutdown
+ last_ack if @require_ack_response
+ super
+ end
- weight_array = []
- gcd = regular_nodes.map {|n| n.weight }.inject(0) {|r,w| r.gcd(w) }
- regular_nodes.each {|n|
- (n.weight / gcd).times {
- weight_array << n
- }
- }
+ def try_flush
+ return if @require_ack_response && @suspend_flush
+ super
+ end
- # for load balancing during detecting crashed servers
- coe = (regular_nodes.size * 6) / weight_array.size
- weight_array *= coe if coe > 1
+ def last_ack
+ overwrite_delayed_commit_timeout
+ ack_check(ack_select_interval)
+ end
- r = Random.new(@rand_seed)
- weight_array.sort_by! { r.rand }
+ def write(chunk)
+ return if chunk.empty?
+ tag = chunk.metadata.tag
- @weight_array = weight_array
+ discovery_manager.select_service { |node| node.send_data(tag, chunk) }
end
- # MessagePack FixArray length = 3 (if @extend_internal_protocol)
- # = 2 (else)
- FORWARD_HEADER = [0x92].pack('C').freeze
- FORWARD_HEADER_EXT = [0x93].pack('C').freeze
- def forward_header
- if @extend_internal_protocol
- FORWARD_HEADER_EXT
- else
- FORWARD_HEADER
+ def try_write(chunk)
+ log.trace "writing a chunk to destination", chunk_id: dump_unique_id_hex(chunk.unique_id)
+ if chunk.empty?
+ commit_write(chunk.unique_id)
+ return
end
+ tag = chunk.metadata.tag
+ discovery_manager.select_service { |node| node.send_data(tag, chunk) }
+ last_ack if @require_ack_response && @suspend_flush
end
- #FORWARD_TCP_HEARTBEAT_DATA = FORWARD_HEADER + ''.to_msgpack + [].to_msgpack
- def send_heartbeat_tcp(node)
- sock = connect(node)
- begin
- opt = [1, @send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
- sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
- opt = [@send_timeout.to_i, 0].pack('L!L!') # struct timeval
- # don't send any data to not cause a compatibility problem
- #sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
- #sock.write FORWARD_TCP_HEARTBEAT_DATA
- node.heartbeat(true)
- ensure
- sock.close
+ def create_transfer_socket(host, port, hostname, &block)
+ case @transport
+ when :tls
+ socket_create_tls(
+ host, port,
+ version: @tls_version,
+ ciphers: @tls_ciphers,
+ insecure: @tls_insecure_mode,
+ verify_fqdn: @tls_verify_hostname,
+ fqdn: hostname,
+ allow_self_signed_cert: @tls_allow_self_signed_cert,
+ cert_paths: @tls_ca_cert_path,
+ cert_path: @tls_client_cert_path,
+ private_key_path: @tls_client_private_key_path,
+ private_key_passphrase: @tls_client_private_key_passphrase,
+ cert_thumbprint: @tls_cert_thumbprint,
+ cert_logical_store_name: @tls_cert_logical_store_name,
+ cert_use_enterprise_store: @tls_cert_use_enterprise_store,
+
+ # Enabling SO_LINGER causes tcp port exhaustion on Windows.
+ # This is because dynamic ports are only 16384 (from 49152 to 65535) and
+ # expiring SO_LINGER enabled ports should wait 4 minutes
+ # where set by TcpTimeDelay. Its default value is 4 minutes.
+ # So, we should disable SO_LINGER on Windows to prevent flood of waiting ports.
+ linger_timeout: Fluent.windows? ? nil : @send_timeout,
+ send_timeout: @send_timeout,
+ recv_timeout: @ack_response_timeout,
+ connect_timeout: @connect_timeout,
+ &block
+ )
+ when :tcp
+ socket_create_tcp(
+ host, port,
+ linger_timeout: @send_timeout,
+ send_timeout: @send_timeout,
+ recv_timeout: @ack_response_timeout,
+ connect_timeout: @connect_timeout,
+ &block
+ )
+ else
+ raise "BUG: unknown transport protocol #{@transport}"
end
end
- def send_data(node, tag, chunk)
- sock = connect(node)
- begin
- opt = [1, @send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
- sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
-
- opt = [@send_timeout.to_i, 0].pack('L!L!') # struct timeval
- sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
-
- # beginArray(2)
- sock.write forward_header
-
- # writeRaw(tag)
- sock.write tag.to_msgpack # tag
-
- # beginRaw(size)
- sz = chunk.size
- #if sz < 32
- # # FixRaw
- # sock.write [0xa0 | sz].pack('C')
- #elsif sz < 65536
- # # raw 16
- # sock.write [0xda, sz].pack('Cn')
- #else
- # raw 32
- sock.write [0xdb, sz].pack('CN')
- #end
-
- # writeRawBody(packed_es)
- chunk.write_to(sock)
-
- if @extend_internal_protocol
- option = {}
- option['chunk'] = Base64.encode64(chunk.unique_id) if @require_ack_response
- sock.write option.to_msgpack
-
- if @require_ack_response && @ack_response_timeout > 0
- # Waiting for a response here results in a decrease of throughput because a chunk queue is locked.
- # To avoid a decrease of troughput, it is necessary to prepare a list of chunks that wait for responses
- # and process them asynchronously.
- if IO.select([sock], nil, nil, @ack_response_timeout)
- raw_data = sock.recv(1024)
-
- # When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
- # If this happens we assume the data wasn't delivered and retry it.
- if raw_data.empty?
- @log.warn "node #{node.host}:#{node.port} closed the connection. regard it as unavailable."
- node.disable!
- raise ForwardOutputConnectionClosedError, "node #{node.host}:#{node.port} closed connection"
- else
- # Serialization type of the response is same as sent data.
- res = MessagePack.unpack(raw_data)
-
- if res['ack'] != option['chunk']
- # Some errors may have occured when ack and chunk id is different, so send the chunk again.
- raise ForwardOutputResponseError, "ack in response and chunk id in sent data are different"
- end
- end
-
- else
- # IO.select returns nil on timeout.
- # There are 2 types of cases when no response has been received:
- # (1) the node does not support sending responses
- # (2) the node does support sending response but responses have not arrived for some reasons.
- @log.warn "no response from #{node.host}:#{node.port}. regard it as unavailable."
- node.disable!
- raise ForwardOutputACKTimeoutError, "node #{node.host}:#{node.port} does not return ACK"
- end
- end
+ def statistics
+ stats = super
+ services = discovery_manager.services
+ healthy_nodes_count = 0
+ registed_nodes_count = services.size
+ services.each do |s|
+ if s.available?
+ healthy_nodes_count += 1
end
-
- node.heartbeat(false)
- return res # for test
- ensure
- sock.close
end
+
+ stats.merge(
+ 'healthy_nodes_count' => healthy_nodes_count,
+ 'registered_nodes_count' => registed_nodes_count,
+ )
end
- def connect(node)
- # TODO unix socket?
- TCPSocket.new(node.resolved_host, node.port)
+ # MessagePack FixArray length is 3
+ FORWARD_HEADER = [0x93].pack('C').freeze
+ def forward_header
+ FORWARD_HEADER
end
- class HeartbeatRequestTimer < Coolio::TimerWatcher
- def initialize(interval, callback)
- super(interval, true)
- @callback = callback
- end
+ private
- def on_timer
- @callback.call
- rescue
- # TODO log?
+ def build_node(server)
+ name = server.name || "#{server.host}:#{server.port}"
+ log.info "adding forwarding server '#{name}'", host: server.host, port: server.port, weight: server.weight, plugin_id: plugin_id
+
+ failure = Fluent::Plugin::ForwardOutput::FailureDetector.new(@heartbeat_interval, @hard_timeout, Time.now.to_i.to_f)
+ if @heartbeat_type == :none
+ NoneHeartbeatNode.new(self, server, failure: failure, connection_manager: @connection_manager, ack_handler: @ack_handler)
+ else
+ Node.new(self, server, failure: failure, connection_manager: @connection_manager, ack_handler: @ack_handler)
end
end
- def on_timer
- return if @finished
- @nodes.each {|n|
- if n.tick
- rebuild_weight_array
- end
+ def on_heartbeat_timer
+ need_rebuild = false
+ discovery_manager.services.each do |n|
begin
- #log.trace "sending heartbeat #{n.host}:#{n.port} on #{@heartbeat_type}"
- if @heartbeat_type == :tcp
- send_heartbeat_tcp(n)
- else
- @usock.send "\0", 0, Socket.pack_sockaddr_in(n.port, n.resolved_host)
- end
- rescue Errno::EAGAIN, Errno::EWOULDBLOCK, Errno::EINTR, Errno::ECONNREFUSED
- # TODO log
- log.debug "failed to send heartbeat packet to #{n.host}:#{n.port}", error: $!.to_s
+ log.trace "sending heartbeat", host: n.host, port: n.port, heartbeat_type: @heartbeat_type
+ n.usock = @usock if @usock
+ need_rebuild = n.send_heartbeat || need_rebuild
+ rescue Errno::EAGAIN, Errno::EWOULDBLOCK, Errno::EINTR, Errno::ECONNREFUSED, Errno::ETIMEDOUT => e
+ log.debug "failed to send heartbeat packet", host: n.host, port: n.port, heartbeat_type: @heartbeat_type, error: e
+ rescue => e
+ log.debug "unexpected error happen during heartbeat", host: n.host, port: n.port, heartbeat_type: @heartbeat_type, error: e
end
- }
- end
- class HeartbeatHandler < Coolio::IO
- def initialize(io, callback)
- super(io)
- @io = io
- @callback = callback
+ need_rebuild = n.tick || need_rebuild
end
- def on_readable
- begin
- msg, addr = @io.recvfrom(1024)
- rescue Errno::EAGAIN, Errno::EWOULDBLOCK, Errno::EINTR
- return
- end
- host = addr[3]
- port = addr[1]
- sockaddr = Socket.pack_sockaddr_in(port, host)
- @callback.call(sockaddr, msg)
- rescue
- # TODO log?
+ if need_rebuild
+ discovery_manager.rebalance
end
end
- def on_heartbeat(sockaddr, msg)
- port, host = Socket.unpack_sockaddr_in(sockaddr)
- if node = @nodes.find {|n| n.sockaddr == sockaddr }
- #log.trace "heartbeat from '#{node.name}'", :host=>node.host, :port=>node.port
+ def on_udp_heatbeat_response_recv(data, sock)
+ sockaddr = Socket.pack_sockaddr_in(sock.remote_port, sock.remote_host)
+ if node = discovery_manager.services.find { |n| n.sockaddr == sockaddr }
+ # log.trace "heartbeat arrived", name: node.name, host: node.host, port: node.port
if node.heartbeat
- rebuild_weight_array
+ discovery_manager.rebalance
end
+ else
+ log.warn("Unknown heartbeat response received from #{sock.remote_host}:#{sock.remote_port}. It may service out")
end
end
- NodeConfig2 = Struct.new("NodeConfig2", :name, :host, :port, :weight, :standby, :failure,
- :phi_threshold, :recover_sample_size, :expire_dns_cache, :phi_failure_detector, :dns_round_robin, :skip_network_error)
+ def on_purge_obsolete_socks
+ @connection_manager.purge_obsolete_socks
+ end
+
+ def ack_select_interval
+ if @delayed_commit_timeout > 3
+ 1
+ else
+ @delayed_commit_timeout / 3.0
+ end
+ end
+
+ def ack_reader
+ select_interval = ack_select_interval
+
+ while thread_current_running?
+ ack_check(select_interval)
+ end
+ end
+
+ def ack_check(select_interval)
+ @ack_handler.collect_response(select_interval) do |chunk_id, node, sock, result|
+ @connection_manager.close(sock)
+
+ case result
+ when AckHandler::Result::SUCCESS
+ commit_write(chunk_id)
+ when AckHandler::Result::FAILED
+ node.disable!
+ rollback_write(chunk_id, update_retry: false)
+ when AckHandler::Result::CHUNKID_UNMATCHED
+ rollback_write(chunk_id, update_retry: false)
+ else
+ log.warn("BUG: invalid status #{result} #{chunk_id}")
+
+ if chunk_id
+ rollback_write(chunk_id, update_retry: false)
+ end
+ end
+ end
+ end
class Node
- def initialize(log, conf)
- @log = log
- @conf = conf
- @name = @conf.name
- @host = @conf.host
- @port = @conf.port
- @weight = @conf.weight
- @failure = @conf.failure
+ extend Forwardable
+ def_delegators :@server, :discovery_id, :host, :port, :name, :weight, :standby
+
+ # @param connection_manager [Fluent::Plugin::ForwardOutput::ConnectionManager]
+ # @param ack_handler [Fluent::Plugin::ForwardOutput::AckHandler]
+ def initialize(sender, server, failure:, connection_manager:, ack_handler:)
+ @sender = sender
+ @log = sender.log
+ @compress = sender.compress
+ @server = server
+
+ @name = server.name
+ @host = server.host
+ @port = server.port
+ @weight = server.weight
+ @standby = server.standby
+ @failure = failure
@available = true
+ # @hostname is used for certificate verification & TLS SNI
+ host_is_hostname = !(IPAddr.new(@host) rescue false)
+ @hostname = case
+ when host_is_hostname then @host
+ when @name then @name
+ else nil
+ end
+
+ @usock = nil
+
+ @handshake = Fluent::Plugin::ForwardOutput::HandshakeProtocol.new(
+ log: @log,
+ hostname: sender.security && sender.security.self_hostname,
+ shared_key: server.shared_key || (sender.security && sender.security.shared_key) || '',
+ password: server.password || '',
+ username: server.username || '',
+ )
+
+ @unpacker = Fluent::MessagePackFactory.msgpack_unpacker
+
@resolved_host = nil
@resolved_time = 0
- begin
- resolved_host # check dns
- rescue => e
- if @conf.skip_network_error
- log.warn "#{@name} got network error during setup. Resolve host later", :error => e, :error_class => e.class
- else
- raise
- end
- end
- end
+ @resolved_once = false
+
+ @connection_manager = connection_manager
+ @ack_handler = ack_handler
+ end
+
+ attr_accessor :usock
- attr_reader :conf
- attr_reader :name, :host, :port, :weight
- attr_reader :sockaddr # used by on_heartbeat
- attr_reader :failure, :available # for test
+ attr_reader :state
+ attr_reader :sockaddr # used by on_udp_heatbeat_response_recv
+ attr_reader :failure # for test
+
+ def validate_host_resolution!
+ resolved_host
+ end
def available?
@available
@@ -488,41 +612,158 @@ def disable!
end
def standby?
- @conf.standby
+ @standby
+ end
+
+ def verify_connection
+ connect do |sock, ri|
+ ensure_established_connection(sock, ri)
+ end
+ end
+
+ def establish_connection(sock, ri)
+ while ri.state != :established
+ begin
+ # TODO: On Ruby 2.2 or earlier, read_nonblock doesn't work expectedly.
+ # We need rewrite around here using new socket/server plugin helper.
+ buf = sock.read_nonblock(@sender.read_length)
+ if buf.empty?
+ sleep @sender.read_interval
+ next
+ end
+ @unpacker.feed_each(buf) do |data|
+ if @handshake.invoke(sock, ri, data) == :established
+ @log.debug "connection established", host: @host, port: @port
+ end
+ end
+ rescue IO::WaitReadable
+ # If the exception is Errno::EWOULDBLOCK or Errno::EAGAIN, it is extended by IO::WaitReadable.
+ # So IO::WaitReadable can be used to rescue the exceptions for retrying read_nonblock.
+ # https//docs.ruby-lang.org/en/2.3.0/IO.html#method-i-read_nonblock
+ sleep @sender.read_interval unless ri.state == :established
+ rescue SystemCallError => e
+ @log.warn "disconnected by error", host: @host, port: @port, error: e
+ disable!
+ break
+ rescue EOFError
+ @log.warn "disconnected", host: @host, port: @port
+ disable!
+ break
+ rescue HeloError => e
+ @log.warn "received invalid helo message from #{@name}"
+ disable!
+ break
+ rescue PingpongError => e
+ @log.warn "connection refused to #{@name || @host}: #{e.message}"
+ disable!
+ break
+ end
+ end
+ end
+
+ def send_data_actual(sock, tag, chunk)
+ option = { 'size' => chunk.size, 'compressed' => @compress }
+ option['chunk'] = Base64.encode64(chunk.unique_id) if @ack_handler
+
+ # https://github.com/fluent/fluentd/wiki/Forward-Protocol-Specification-v1#packedforward-mode
+ # out_forward always uses str32 type for entries.
+ # str16 can store only 64kbytes, and it should be much smaller than buffer chunk size.
+
+ tag = tag.dup.force_encoding(Encoding::UTF_8)
+
+ sock.write @sender.forward_header # array, size=3
+ sock.write tag.to_msgpack # 1. tag: String (str)
+ chunk.open(compressed: @compress) do |chunk_io|
+ entries = [0xdb, chunk_io.size].pack('CN')
+ sock.write entries.force_encoding(Encoding::UTF_8) # 2. entries: String (str32)
+ IO.copy_stream(chunk_io, sock) # writeRawBody(packed_es)
+ end
+ sock.write option.to_msgpack # 3. option: Hash(map)
+
+ # TODO: use bin32 for non-utf8 content(entries) when old msgpack-ruby (0.5.x or earlier) not supported
+ end
+
+ def send_data(tag, chunk)
+ ack = @ack_handler && @ack_handler.create_ack(chunk.unique_id, self)
+ connect(nil, ack: ack) do |sock, ri|
+ ensure_established_connection(sock, ri)
+ send_data_actual(sock, tag, chunk)
+ end
+
+ heartbeat(false)
+ nil
+ end
+
+ # FORWARD_TCP_HEARTBEAT_DATA = FORWARD_HEADER + ''.to_msgpack + [].to_msgpack
+ #
+ # @return [Boolean] return true if it needs to rebuild nodes
+ def send_heartbeat
+ begin
+ dest_addr = resolved_host
+ @resolved_once = true
+ rescue ::SocketError => e
+ if !@resolved_once && @sender.ignore_network_errors_at_startup
+ @log.warn "failed to resolve node name in heartbeating", server: @name || @host, error: e
+ return false
+ end
+ raise
+ end
+
+ case @sender.heartbeat_type
+ when :transport
+ connect(dest_addr) do |sock, ri|
+ ensure_established_connection(sock, ri)
+
+ ## don't send any data to not cause a compatibility problem
+ # sock.write FORWARD_TCP_HEARTBEAT_DATA
+
+ # successful tcp connection establishment is considered as valid heartbeat.
+ # When heartbeat is succeeded after detached, return true. It rebuilds weight array.
+ heartbeat(true)
+ end
+ when :udp
+ @usock.send "\0", 0, Socket.pack_sockaddr_in(@port, dest_addr)
+ # response is going to receive at on_udp_heatbeat_response_recv
+ false
+ when :none # :none doesn't use this class
+ raise "BUG: heartbeat_type none must not use Node"
+ else
+ raise "BUG: unknown heartbeat_type '#{@sender.heartbeat_type}'"
+ end
end
def resolved_host
- case @conf.expire_dns_cache
+ case @sender.expire_dns_cache
when 0
# cache is disabled
- return resolve_dns!
+ resolve_dns!
when nil
# persistent cache
- return @resolved_host ||= resolve_dns!
+ @resolved_host ||= resolve_dns!
else
- now = Engine.now
+ now = Fluent::EventTime.now
rh = @resolved_host
- if !rh || now - @resolved_time >= @conf.expire_dns_cache
+ if !rh || now - @resolved_time >= @sender.expire_dns_cache
rh = @resolved_host = resolve_dns!
@resolved_time = now
end
- return rh
+ rh
end
end
def resolve_dns!
addrinfo_list = Socket.getaddrinfo(@host, @port, nil, Socket::SOCK_STREAM)
- addrinfo = @conf.dns_round_robin ? addrinfo_list.sample : addrinfo_list.first
- @sockaddr = Socket.pack_sockaddr_in(addrinfo[1], addrinfo[3]) # used by on_heartbeat
+ addrinfo = @sender.dns_round_robin ? addrinfo_list.sample : addrinfo_list.first
+ @sockaddr = Socket.pack_sockaddr_in(addrinfo[1], addrinfo[3]) # used by on_udp_heatbeat_response_recv
addrinfo[3]
end
private :resolve_dns!
def tick
now = Time.now.to_f
- if !@available
+ unless available?
if @failure.hard_timeout?(now)
@failure.clear
end
@@ -531,41 +772,51 @@ def tick
if @failure.hard_timeout?(now)
@log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, hard_timeout: true
- @available = false
+ disable!
@resolved_host = nil # expire cached host
@failure.clear
return true
end
- if @conf.phi_failure_detector
+ if @sender.phi_failure_detector
phi = @failure.phi(now)
- #$log.trace "phi '#{@name}'", :host=>@host, :port=>@port, :phi=>phi
- if phi > @conf.phi_threshold
- @log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, phi: phi
- @available = false
+ if phi > @sender.phi_threshold
+ @log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, phi: phi, phi_threshold: @sender.phi_threshold
+ disable!
@resolved_host = nil # expire cached host
@failure.clear
return true
end
end
- return false
+ false
end
def heartbeat(detect=true)
now = Time.now.to_f
@failure.add(now)
- #@log.trace "heartbeat from '#{@name}'", :host=>@host, :port=>@port, :available=>@available, :sample_size=>@failure.sample_size
- if detect && !@available && @failure.sample_size > @conf.recover_sample_size
+ if detect && !available? && @failure.sample_size > @sender.recover_sample_size
@available = true
@log.warn "recovered forwarding server '#{@name}'", host: @host, port: @port
- return true
+ true
else
- return nil
+ nil
end
end
- def to_msgpack(out = '')
- [@host, @port, @weight, @available].to_msgpack(out)
+ private
+
+ def ensure_established_connection(sock, request_info)
+ if request_info.state != :established
+ establish_connection(sock, request_info)
+
+ if request_info.state != :established
+ raise ConnectionClosedError, "failed to establish connection with node #{@name}"
+ end
+ end
+ end
+
+ def connect(host = nil, ack: false, &block)
+ @connection_manager.connect(host: host || resolved_host, port: port, hostname: @hostname, ack: ack, &block)
end
end
@@ -583,96 +834,5 @@ def heartbeat(detect=true)
true
end
end
-
- class FailureDetector
- PHI_FACTOR = 1.0 / Math.log(10.0)
- SAMPLE_SIZE = 1000
-
- def initialize(heartbeat_interval, hard_timeout, init_last)
- @heartbeat_interval = heartbeat_interval
- @last = init_last
- @hard_timeout = hard_timeout
-
- # microsec
- @init_gap = (heartbeat_interval * 1e6).to_i
- @window = [@init_gap]
- end
-
- def hard_timeout?(now)
- now - @last > @hard_timeout
- end
-
- def add(now)
- if @window.empty?
- @window << @init_gap
- @last = now
- else
- gap = now - @last
- @window << (gap * 1e6).to_i
- @window.shift if @window.length > SAMPLE_SIZE
- @last = now
- end
- end
-
- def phi(now)
- size = @window.size
- return 0.0 if size == 0
-
- # Calculate weighted moving average
- mean_usec = 0
- fact = 0
- @window.each_with_index {|gap,i|
- mean_usec += gap * (1+i)
- fact += (1+i)
- }
- mean_usec = mean_usec / fact
-
- # Normalize arrive intervals into 1sec
- mean = (mean_usec.to_f / 1e6) - @heartbeat_interval + 1
-
- # Calculate phi of the phi accrual failure detector
- t = now - @last - @heartbeat_interval + 1
- phi = PHI_FACTOR * t / mean
-
- return phi
- end
-
- def sample_size
- @window.size
- end
-
- def clear
- @window.clear
- @last = 0
- end
- end
-
- ## TODO
- #class RPC
- # def initialize(this)
- # @this = this
- # end
- #
- # def list_nodes
- # @this.nodes
- # end
- #
- # def list_fault_nodes
- # list_nodes.select {|n| !n.available? }
- # end
- #
- # def list_available_nodes
- # list_nodes.select {|n| n.available? }
- # end
- #
- # def add_node(name, host, port, weight)
- # end
- #
- # def recover_node(host, port)
- # end
- #
- # def remove_node(host, port)
- # end
- #end
end
end
diff --git a/source/plugins/ruby/out_mdm.rb b/source/plugins/ruby/out_mdm.rb
index 6238eb51a..8e80fb753 100644
--- a/source/plugins/ruby/out_mdm.rb
+++ b/source/plugins/ruby/out_mdm.rb
@@ -1,11 +1,12 @@
#!/usr/local/bin/ruby
# frozen_string_literal: true
-module Fluent
- class OutputMDM < BufferedOutput
- config_param :retry_mdm_post_wait_minutes, :integer
+require 'fluent/plugin/output'
- Plugin.register_output("out_mdm", self)
+module Fluent::Plugin
+ class OutputMDM < Output
+ config_param :retry_mdm_post_wait_minutes, :integer
+ Fluent::Plugin.register_output("mdm", self)
def initialize
super
@@ -57,8 +58,6 @@ def initialize
end
def configure(conf)
- s = conf.add_element("secondary")
- s["type"] = ChunkErrorHandler::SecondaryName
super
end
@@ -204,7 +203,7 @@ def get_access_token
end
def write_status_file(success, message)
- fn = "/var/opt/microsoft/omsagent/log/MDMIngestion.status"
+ fn = "/var/opt/microsoft/docker-cimprov/log/MDMIngestion.status"
status = '{ "operation": "MDMIngestion", "success": "%s", "message": "%s" }' % [success, message]
begin
File.open(fn, "w") { |file| file.write(status) }
@@ -270,6 +269,7 @@ def write(chunk)
flush_mdm_exception_telemetry
if (!@first_post_attempt_made || (Time.now > @last_post_attempt_time + retry_mdm_post_wait_minutes * 60)) && @can_send_data_to_mdm
post_body = []
+ chunk.extend Fluent::ChunkMessagePackEventStreamer
chunk.msgpack_each { |(tag, record)|
post_body.push(record.to_json)
}
@@ -320,7 +320,7 @@ def send_to_mdm(post_body)
ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMSendSuccessful", {})
@last_telemetry_sent_time = Time.now
end
- rescue Net::HTTPServerException => e
+ rescue Net::HTTPClientException => e # see https://docs.ruby-lang.org/en/2.6.0/NEWS.html about deprecating HTTPServerException and adding HTTPClientException
if !response.nil? && !response.body.nil? #body will have actual error
@log.info "Failed to Post Metrics to MDM : #{e} Response.body: #{response.body}"
else
@@ -334,7 +334,7 @@ def send_to_mdm(post_body)
# Not raising exception, as that will cause retries to happen
elsif !response.code.empty? && response.code.start_with?("4")
# Log 400 errors and continue
- @log.info "Non-retryable HTTPServerException when POSTing Metrics to MDM #{e} Response: #{response}"
+ @log.info "Non-retryable HTTPClientException when POSTing Metrics to MDM #{e} Response: #{response}"
else
# raise if the response code is non-400
@log.info "HTTPServerException when POSTing Metrics to MDM #{e} Response: #{response}"
@@ -352,72 +352,5 @@ def send_to_mdm(post_body)
raise e
end
end
-
- private
-
- class ChunkErrorHandler
- include Configurable
- include PluginId
- include PluginLoggerMixin
-
- SecondaryName = "__ChunkErrorHandler__"
-
- Plugin.register_output(SecondaryName, self)
-
- def initialize
- @router = nil
- end
-
- def secondary_init(primary)
- @error_handlers = create_error_handlers @router
- end
-
- def start
- # NOP
- end
-
- def shutdown
- # NOP
- end
-
- def router=(r)
- @router = r
- end
-
- def write(chunk)
- chunk.msgpack_each { |(tag, record)|
- @error_handlers[tag].emit(record)
- }
- end
-
- private
-
- def create_error_handlers(router)
- nop_handler = NopErrorHandler.new
- Hash.new() { |hash, tag|
- etag = OMS::Common.create_error_tag tag
- hash[tag] = router.match?(etag) ?
- ErrorHandler.new(router, etag) :
- nop_handler
- }
- end
-
- class ErrorHandler
- def initialize(router, etag)
- @router = router
- @etag = etag
- end
-
- def emit(record)
- @router.emit(@etag, Fluent::Engine.now, record)
- end
- end
-
- class NopErrorHandler
- def emit(record)
- # NOP
- end
- end
- end
end # class OutputMDM
end # module Fluent
diff --git a/source/plugins/ruby/podinventory_to_mdm.rb b/source/plugins/ruby/podinventory_to_mdm.rb
index d9cb71bd4..c24a91a87 100644
--- a/source/plugins/ruby/podinventory_to_mdm.rb
+++ b/source/plugins/ruby/podinventory_to_mdm.rb
@@ -279,16 +279,16 @@ def process_pod_inventory_record(record)
begin
records = []
- podUid = record["DataItems"][0]["PodUid"]
+ podUid = record["PodUid"]
if @pod_uids.key?(podUid)
return
end
@pod_uids[podUid] = true
- podPhaseDimValue = record["DataItems"][0]["PodStatus"]
- podNamespaceDimValue = record["DataItems"][0]["Namespace"]
- podControllerNameDimValue = record["DataItems"][0]["ControllerName"]
- podNodeDimValue = record["DataItems"][0]["Computer"]
+ podPhaseDimValue = record["PodStatus"]
+ podNamespaceDimValue = record["Namespace"]
+ podControllerNameDimValue = record["ControllerName"]
+ podNodeDimValue = record["Computer"]
if podControllerNameDimValue.nil? || podControllerNameDimValue.empty?
podControllerNameDimValue = "No Controller"
diff --git a/source/plugins/utils/oms_common.rb b/source/plugins/utils/oms_common.rb
new file mode 100644
index 000000000..c10cb8638
--- /dev/null
+++ b/source/plugins/utils/oms_common.rb
@@ -0,0 +1,143 @@
+module OMS
+
+ MSDockerCImprovHostnameFilePath = '/var/opt/microsoft/docker-cimprov/state/containerhostname'
+ IPV6_REGEX = '\h{4}:\h{4}:\h{4}:\h{4}:\h{4}:\h{4}:\h{4}:\h{4}'
+ IPV4_Approximate_REGEX = '\d+\.\d+\.\d+\.\d+'
+
+ class RetryRequestException < Exception
+ # Throw this exception to tell the fluentd engine to retry and
+ # inform the output plugin that it is indeed retryable
+ end
+
+ class Common
+ require 'socket'
+ require_relative 'omslog'
+
+ @@Hostname = nil
+ @@HostnameFilePath = MSDockerCImprovHostnameFilePath
+
+
+ class << self
+
+ # Internal methods
+ # (left public for easy testing, though protected may be better later)
+
+ def clean_hostname_string(hnBuffer)
+ return "" if hnBuffer.nil? # So give the rest of the program a string to deal with.
+ hostname_buffer = hnBuffer.strip
+ return hostname_buffer
+ end
+
+ def has_designated_hostnamefile?
+ return false if @@HostnameFilePath.nil?
+ return false unless @@HostnameFilePath =~ /\w/
+ return false unless File.exist?(@@HostnameFilePath)
+ return true
+ end
+
+ def is_dot_separated_string?(hnBuffer)
+ return true if /[^.]+\.[^.]+/ =~ hnBuffer
+ return false
+ end
+
+ def is_hostname_compliant?(hnBuffer)
+ # RFC 2181:
+ # Size limit is 1 to 63 octets, so probably bytesize is appropriate method.
+ return false if hnBuffer.nil?
+ return false if /\./ =~ hnBuffer # Hostname by definition may not contain a dot.
+ return false if /:/ =~ hnBuffer # Hostname by definition may not contain a colon.
+ return false unless 1 <= hnBuffer.bytesize && hnBuffer.bytesize <= 63
+ return true
+ end
+
+ def is_like_ipv4_string?(hnBuffer)
+ return false unless /\A#{IPV4_Approximate_REGEX}\z/ =~ hnBuffer
+ qwa = hnBuffer.split('.')
+ return false unless qwa.length == 4
+ return false if qwa[0].to_i == 0
+ qwa.each do |quadwordstring|
+ bi = quadwordstring.to_i
+ # This may need more detail if 255 octets are sometimes allowed, but I don't think so.
+ return false unless 0 <= bi and bi < 255
+ end
+ return true
+ end
+
+ def is_like_ipv6_string?(hnBuffer)
+ return true if /\A#{IPV6_REGEX}\z/ =~ hnBuffer
+ return false
+ end
+
+ def look_for_socket_class_host_address
+ hostname_buffer = nil
+
+ begin
+ hostname_buffer = Socket.gethostname
+ rescue => error
+ OMS::Log.error_once("Unable to get the Host Name using socket facility: #{error}")
+ return
+ end
+ @@Hostname = clean_hostname_string(hostname_buffer)
+
+ return # Thwart accidental return to force correct use.
+ end
+
+ def look_in_designated_hostnamefile
+ # Issue:
+ # When omsagent runs inside a container, gethostname returns the hostname of the container (random name)
+ # not the actual machine hostname.
+ # One way to solve this problem is to set the container hostname same as machine name, but this is not
+ # possible when host-machine is a private VM inside a cluster.
+ # Solution:
+ # Share/mount ‘/etc/hostname’ as '/var/opt/microsoft/omsagent/state/containername' with container and
+ # omsagent will read hostname from shared file.
+ hostname_buffer = nil
+
+ unless File.readable?(@@HostnameFilePath)
+ OMS::Log.warn_once("File '#{@@HostnameFilePath}' exists but is not readable.")
+ return
+ end
+
+ begin
+ hostname_buffer = File.read(@@HostnameFilePath)
+ rescue => error
+ OMS::Log.warn_once("Unable to read the hostname from #{@@HostnameFilePath}: #{error}")
+ end
+ @@Hostname = clean_hostname_string(hostname_buffer)
+ return # Thwart accidental return to force correct use.
+ end
+
+ def validate_hostname_equivalent(hnBuffer)
+ # RFC 1123 and 2181
+ # Note that for now we are limiting the earlier maximum of 63 for fqdn labels and thus
+ # hostnames UNTIL we are assured azure will allow 255, as specified in RFC 1123, or
+ # we are otherwise instructed.
+ rfcl = "RFCs 1123, 2181 with hostname range of {1,63} octets for non-root item."
+ return if is_hostname_compliant?(hnBuffer)
+ return if is_like_ipv4_string?(hnBuffer)
+ return if is_like_ipv6_string?(hnBuffer)
+ msg = "Hostname '#{hnBuffer}' not compliant (#{rfcl}). Not IP Address Either."
+ OMS::Log.warn_once(msg)
+ raise NameError, msg
+ end
+
+ # End of Internal methods
+
+ def get_hostname(ignoreOldValue = false)
+ if not is_hostname_compliant?(@@Hostname) or ignoreOldValue then
+
+ look_in_designated_hostnamefile if has_designated_hostnamefile?
+
+ look_for_socket_class_host_address unless is_hostname_compliant?(@@Hostname)
+ end
+
+ begin
+ validate_hostname_equivalent(@@Hostname)
+ rescue => error
+ OMS::Log.warn_once("Hostname '#{@@Hostname}' found, but did NOT validate as compliant. #{error}. Using anyway.")
+ end
+ return @@Hostname
+ end
+ end # Class methods
+ end # class Common
+end # module OMS
diff --git a/source/plugins/utils/omslog.rb b/source/plugins/utils/omslog.rb
new file mode 100644
index 000000000..b65bf947c
--- /dev/null
+++ b/source/plugins/utils/omslog.rb
@@ -0,0 +1,50 @@
+module OMS
+ class Log
+ require 'set'
+ require 'digest'
+
+ @@error_proc = Proc.new {|message| $log.error message }
+ @@warn_proc = Proc.new {|message| $log.warn message }
+ @@info_proc = Proc.new {|message| $log.info message }
+ @@debug_proc = Proc.new {|message| $log.debug message }
+
+ @@logged_hashes = Set.new
+
+ class << self
+ def error_once(message, tag=nil)
+ log_once(@@error_proc, @@debug_proc, message, tag)
+ end
+
+ def warn_once(message, tag=nil)
+ log_once(@@warn_proc, @@debug_proc, message, tag)
+ end
+
+ def info_once(message, tag=nil)
+ log_once(@@info_proc, @@debug_proc, message, tag)
+ end
+
+ def log_once(first_loglevel_proc, next_loglevel_proc, message, tag=nil)
+ # Will log a message once with the first procedure and subsequently with the second
+ # This allows repeated messages to be ignored by having the second logging function at a lower log level
+ # An optional tag can be used as the message key
+
+ if tag == nil
+ tag = message
+ end
+
+ md5_digest = Digest::MD5.new
+ tag_hash = md5_digest.update(tag).base64digest
+ res = @@logged_hashes.add?(tag_hash)
+
+ if res == nil
+ # The hash was already in the set
+ next_loglevel_proc.call(message)
+ else
+ # First time we see this hash
+ first_loglevel_proc.call(message)
+ end
+ end
+ end # Class methods
+
+ end # Class Log
+end # Module OMS