diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index d4a49a385..2dee26234 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -1,5 +1,5 @@ [SERVICE] - Flush 10 + Flush 15 Log_Level info Parsers_File /etc/td-agent-bit/parsers.conf Log_File /var/opt/microsoft/docker-cimprov/log/fluent-bit.log @@ -11,7 +11,7 @@ DB /var/log/omsagent-fblogs.db DB.Sync Off Parser docker - Mem_Buf_Limit 5m + Mem_Buf_Limit 10m Buffer_Chunk_Size 1m Buffer_Max_Size 1m Rotate_Wait 20 @@ -26,20 +26,22 @@ Tag oms.container.log.flbplugin.* Path /var/log/containers/omsagent*.log DB /var/opt/microsoft/docker-cimprov/state/omsagent-ai.db - Mem_Buf_Limit 2m + DB.Sync Off + Mem_Buf_Limit 1m Path_Key filepath Skip_Long_Lines On - Ignore_Older 5m + Ignore_Older 2m [INPUT] Name tail Tag oms.container.log.telegraf.err.* Path /var/opt/microsoft/docker-cimprov/log/telegraf.log DB /var/opt/microsoft/docker-cimprov/state/telegraf-log-state.db - Mem_Buf_Limit 2m + DB.Sync Off + Mem_Buf_Limit 1m Path_Key filepath Skip_Long_Lines On - Ignore_Older 5m + Ignore_Older 2m [INPUT] Name tcp diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf index 6b3f44929..06b1c55eb 100644 --- a/installer/conf/telegraf.conf +++ b/installer/conf/telegraf.conf @@ -404,7 +404,7 @@ # Below due to Bug - https://github.com/influxdata/telegraf/issues/5615 # ORDER matters here!! - i.e the below should be the LAST modifier [inputs.disk.tagdrop] - path = ["/var/lib/kubelet*", "/dev/termination-log", "/var/log", "/etc/hosts", "/etc/resolv.conf", "/etc/hostname", "/etc/kubernetes/host", "/var/lib/docker/containers"] + path = ["/var/lib/kubelet*", "/dev/termination-log", "/var/log", "/etc/hosts", "/etc/resolv.conf", "/etc/hostname", "/etc/kubernetes/host", "/var/lib/docker/containers", "/etc/config/settings"] # Read metrics about memory usage diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index b925e7145..4e6cd4d88 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -243,6 +243,75 @@ func populateExcludedStderrNamespaces() { StderrIgnoreNsSet[strings.TrimSpace(ns)] = true } } + + var metrics []laTelegrafMetric + var i int + + for i = 0; i < len(laMetrics); i++ { + metrics = append(metrics, *laMetrics[i]) + } + + laTelegrafMetrics := InsightsMetricsBlob{ + DataType: InsightsMetricsDataType, + IPName: IPName, + DataItems: metrics} + + jsonBytes, err := json.Marshal(laTelegrafMetrics) + + if err != nil { + message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:when marshalling json %q", err) + Log(message) + SendException(message) + return output.FLB_OK + } + + //Post metrics data to LA + req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(jsonBytes)) + + //req.URL.Query().Add("api-version","2016-04-01") + + //set headers + req.Header.Set("x-ms-date", time.Now().Format(time.RFC3339)) + + //expensive to do string len for every request, so use a flag + if ResourceCentric == true { + req.Header.Set("x-ms-AzureResourceId", ResourceID) + } + + start := time.Now() + resp, err := HTTPClient.Do(req) + elapsed := time.Since(start) + + if err != nil { + message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:(retriable) when sending %v metrics. duration:%v err:%q \n", len(laMetrics), elapsed, err.Error()) + Log(message) + SendException(message) + UpdateNumTelegrafMetricsSentTelemetry(0, 1) + return output.FLB_RETRY + } + + if resp == nil || resp.StatusCode != 200 { + if resp != nil { + Log("PostTelegrafMetricsToLA::Error:(retriable) Response Status %v Status Code %v", resp.Status, resp.StatusCode) + } + UpdateNumTelegrafMetricsSentTelemetry(0, 1) + return output.FLB_RETRY + } + + defer resp.Body.Close() + + numMetrics := len(laMetrics) + UpdateNumTelegrafMetricsSentTelemetry(numMetrics, 0) + Log("PostTelegrafMetricsToLA::Info:Successfully flushed %v records in %v", numMetrics, elapsed) + + return output.FLB_OK +} + +func UpdateNumTelegrafMetricsSentTelemetry(numMetricsSent int, numSendErrors int) { + ContainerLogTelemetryMutex.Lock() + TelegrafMetricsSentCount += float64(numMetricsSent) + TelegrafMetricsSendErrorCount += float64(numSendErrors) + ContainerLogTelemetryMutex.Unlock() } //Azure loganalytics metric values have to be numeric, so string values are dropped