Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions build/common/installer/scripts/tomlparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
@enrichContainerLogs = false
@containerLogSchemaVersion = ""
@collectAllKubeEvents = false
@containerLogsRoute = ""

@containerLogsRoute = "v2" # default for linux
if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0
@containerLogsRoute = "v1" # default is v1 for windows until windows agent integrates windows ama
end
# Use parser to parse the configmap toml file to a ruby structure
def parseConfigMap
begin
Expand Down Expand Up @@ -162,8 +164,12 @@ def populateSettingValuesFromConfigMap(parsedConfig)
#Get container logs route setting
begin
if !parsedConfig[:log_collection_settings][:route_container_logs].nil? && !parsedConfig[:log_collection_settings][:route_container_logs][:version].nil?
@containerLogsRoute = parsedConfig[:log_collection_settings][:route_container_logs][:version]
puts "config::Using config map setting for container logs route"
if !parsedConfig[:log_collection_settings][:route_container_logs][:version].empty?
@containerLogsRoute = parsedConfig[:log_collection_settings][:route_container_logs][:version]
puts "config::Using config map setting for container logs route: #{@containerLogsRoute}"
else
puts "config::Ignoring config map settings and using default value since provided container logs route value is empty"
end
end
rescue => errorStr
ConfigParseErrorLogger.logError("Exception while reading config map settings for container logs route - #{errorStr}, using defaults, please check config map for errors")
Expand Down Expand Up @@ -256,7 +262,7 @@ def get_command_windows(env_variable_name, env_variable_value)
file.write(commands)
commands = get_command_windows('AZMON_CLUSTER_COLLECT_ALL_KUBE_EVENTS', @collectAllKubeEvents)
file.write(commands)
commands = get_command_windows('AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE', @containerLogsRoute)
commands = get_command_windows('AZMON_CONTAINER_LOGS_ROUTE', @containerLogsRoute)
file.write(commands)
commands = get_command_windows('AZMON_CONTAINER_LOG_SCHEMA_VERSION', @containerLogSchemaVersion)
file.write(commands)
Expand Down
318 changes: 178 additions & 140 deletions build/linux/installer/conf/container.conf
Original file line number Diff line number Diff line change
@@ -1,141 +1,179 @@
# Fluentd config file for OMS Docker - container components (non kubeAPI)

# Forward port 25225 for container logs
<source>
type forward
port 25225
bind 127.0.0.1
</source>

# MDM metrics from telegraf
<source>
@type tcp
tag oms.mdm.container.perf.telegraf.*
bind 0.0.0.0
port 25228
format json
</source>

# Container inventory
<source>
type containerinventory
tag oms.containerinsights.containerinventory
run_interval 60
log_level debug
</source>

#cadvisor perf
<source>
type cadvisorperf
tag oms.api.cadvisorperf
run_interval 60
log_level debug
</source>

<filter kubehealth.DaemonSet.Node**>
type filter_cadvisor_health_node
log_level debug
</filter>

<filter kubehealth.DaemonSet.Container**>
type filter_cadvisor_health_container
log_level debug
</filter>

#custom_metrics_mdm filter plugin
<filter mdm.cadvisorperf**>
type filter_cadvisor2mdm
metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes,pvUsedBytes
log_level info
</filter>

<filter oms.mdm.container.perf.telegraf**>
type filter_telegraf2mdm
log_level debug
</filter>

<match oms.containerinsights.containerinventory**>
type out_oms
log_level debug
num_threads 5
buffer_type file
buffer_path %STATE_DIR_WS%/out_oms_containerinventory*.buffer
buffer_queue_full_action drop_oldest_chunk
buffer_chunk_limit 4m
flush_interval 20s
retry_limit 10
retry_wait 5s
max_retry_wait 5m
</match>

<match oms.api.cadvisorperf**>
type out_oms
log_level debug
num_threads 5
buffer_type file
buffer_path %STATE_DIR_WS%/out_oms_cadvisorperf*.buffer
buffer_queue_full_action drop_oldest_chunk
buffer_chunk_limit 4m
flush_interval 20s
retry_limit 10
retry_wait 5s
max_retry_wait 5m
</match>


<match kubehealth.DaemonSet**>
@type health_forward
send_timeout 60s
recover_wait 10s
hard_timeout 60s
heartbeat_type tcp
skip_network_error_at_init true
expire_dns_cache 600s
buffer_queue_full_action drop_oldest_chunk
buffer_type file
buffer_path %STATE_DIR_WS%/out_health_forward*.buffer
buffer_chunk_limit 3m
flush_interval 20s
retry_limit 10
retry_wait 5s
max_retry_wait 5m

<server>
host "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_HOST']}"
port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
</server>

<secondary>
# Fluentd config file for OMS Docker - container components (non kubeAPI)

# Forward port 25225 for container logs
# gangams - not used and get ridoff after confirming safe to remove
<source>
@type forward
port 25225
bind 127.0.0.1
</source>

# MDM metrics from telegraf
<source>
@type tcp
tag oms.mdm.container.perf.telegraf.*
bind 0.0.0.0
port 25228
format json
</source>

# Container inventory
<source>
@type containerinventory
tag oneagent.containerInsights.CONTAINER_INVENTORY_BLOB
run_interval 60
@log_level debug
</source>

#cadvisor perf
<source>
@type cadvisor_perf
tag oneagent.containerInsights.LINUX_PERF_BLOB
run_interval 60
@log_level debug
</source>

<filter kubehealth.DaemonSet.Node**>
@type cadvisor_health_node
@log_level debug
</filter>

<filter kubehealth.DaemonSet.Container**>
@type cadvisor_health_container
@log_level debug
</filter>

#custom_metrics_mdm filter plugin
<filter mdm.cadvisorperf**>
@type cadvisor2mdm
metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes,pvUsedBytes
@log_level info
</filter>

<filter oms.mdm.container.perf.telegraf**>
@type telegraf2mdm
@log_level debug
</filter>

#containerinventory
<match **CONTAINER_INVENTORY_BLOB**>
@type forward
@log_level debug
send_timeout 30
connect_timeout 30
heartbeat_type none
<server>
host 0.0.0.0
port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}"
</server>
<buffer>
@type file
path %STATE_DIR_WS%/fluent_forward_failed.buffer
</secondary>
</match>

<match mdm.cadvisorperf** oms.mdm.container.perf.telegraf**>
type out_mdm
log_level debug
num_threads 5
buffer_type file
buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer
buffer_queue_full_action drop_oldest_chunk
buffer_chunk_limit 4m
flush_interval 20s
retry_limit 10
retry_wait 5s
max_retry_wait 5m
retry_mdm_post_wait_minutes 30
</match>

<match oms.api.InsightsMetrics**>
type out_oms
log_level debug
num_threads 5
buffer_type file
buffer_path %STATE_DIR_WS%/out_oms_insightsmetrics*.buffer
buffer_queue_full_action drop_oldest_chunk
buffer_chunk_limit 4m
flush_interval 20s
retry_limit 10
retry_wait 5s
max_retry_wait 5m
</match>
path /var/opt/microsoft/docker-cimprov/state/containerinventory*.buffer
overflow_action drop_oldest_chunk
chunk_limit_size 4m
queue_limit_length 20
flush_interval 20s
retry_max_times 10
retry_wait 5s
retry_max_interval 5m
flush_thread_count 5
</buffer>
keepalive true
</match>

#cadvisorperf
<match **LINUX_PERF_BLOB**>
@type forward
@log_level debug
send_timeout 30
connect_timeout 30
heartbeat_type none
<server>
host 0.0.0.0
port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}"
</server>
<buffer>
@type file
path /var/opt/microsoft/docker-cimprov/state/cadvisorperf*.buffer
overflow_action drop_oldest_chunk
chunk_limit_size 4m
queue_limit_length 20
flush_interval 20s
retry_max_times 10
retry_wait 5s
retry_max_interval 5m
flush_thread_count 5
</buffer>
keepalive true
</match>

<match kubehealth.DaemonSet**>
@type health_forward
send_timeout 60s
recover_wait 10s
hard_timeout 60s
transport tcp
ignore_network_errors_at_startup true
expire_dns_cache 600s
<buffer>
@type file
overflow_action drop_oldest_chunk
path /var/opt/microsoft/docker-cimprov/state/out_health_forward*.buffer
chunk_limit_size 3m
flush_interval 20s
retry_max_times 10
retry_max_interval 5m
retry_wait 5s
</buffer>
<server>
host "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_HOST']}"
port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
</server>
<secondary>
@type file
path /var/opt/microsoft/docker-cimprov/state/fluent_forward_failed.buffer
</secondary>
</match>

<match mdm.cadvisorperf** oms.mdm.container.perf.telegraf**>
@type mdm
@log_level debug
<buffer>
@type file
path /var/opt/microsoft/docker-cimprov/state/out_mdm_cdvisorperf*.buffer
overflow_action drop_oldest_chunk
chunk_limit_size 4m
flush_interval 20s
retry_max_times 10
retry_wait 5s
retry_max_interval 5m
flush_thread_count 5
</buffer>
retry_mdm_post_wait_minutes 30
</match>

#InsightsMetrics
<match **INSIGHTS_METRICS_BLOB**>
@type forward
@log_level debug
send_timeout 30
connect_timeout 30
heartbeat_type none
<server>
host 0.0.0.0
port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}"
</server>
<buffer>
@type file
path /var/opt/microsoft/docker-cimprov/state/insightsmetrics*.buffer
overflow_action drop_oldest_chunk
chunk_limit_size 4m
queue_limit_length 20
flush_interval 20s
retry_max_times 10
retry_wait 5s
retry_max_interval 5m
flush_thread_count 5
</buffer>
keepalive true
</match>
Loading