diff --git a/build/windows/installer/conf/fluent-cri-parser.conf b/build/windows/installer/conf/fluent-cri-parser.conf new file mode 100644 index 000000000..86f1572ca --- /dev/null +++ b/build/windows/installer/conf/fluent-cri-parser.conf @@ -0,0 +1,6 @@ + + @type regexp + expression ^(? diff --git a/build/windows/installer/conf/fluent-docker-parser.conf b/build/windows/installer/conf/fluent-docker-parser.conf new file mode 100644 index 000000000..9dc800aeb --- /dev/null +++ b/build/windows/installer/conf/fluent-docker-parser.conf @@ -0,0 +1,5 @@ + + @type json + time_format %Y-%m-%dT%H:%M:%S.%NZ + keep_time_key true + diff --git a/build/windows/installer/conf/fluent.conf b/build/windows/installer/conf/fluent.conf index a4cacbcf6..c96300b1e 100644 --- a/build/windows/installer/conf/fluent.conf +++ b/build/windows/installer/conf/fluent.conf @@ -12,11 +12,8 @@ @log_level trace path_key tailed_path limit_recently_modified 5m - - @type json - time_format %Y-%m-%dT%H:%M:%S.%NZ - keep_time_key true - + # if the container runtime is non docker then this will be updated to fluent-cri-parser.conf during container startup + @include fluent-docker-parser.conf @@ -27,11 +24,8 @@ @log_level trace path_key tailed_path read_from_head true - - @type json - time_format %Y-%m-%dT%H:%M:%S.%NZ - keep_time_key true - + # if the container runtime is non docker then this will be updated to fluent-cri-parser.conf during container startup + @include fluent-docker-parser.conf @@ -59,13 +53,13 @@ - overflow_action throw_exception - chunk_limit_size 32k - queued_chunks_limit_size 256 - flush_interval 1 - flush_thread_interval 0.5 - flush_thread_burst_interval 0.01 - flush_thread_count 4 - retry_forever true - + overflow_action throw_exception + chunk_limit_size 32k + queued_chunks_limit_size 256 + flush_interval 1 + flush_thread_interval 0.5 + flush_thread_burst_interval 0.01 + flush_thread_count 4 + retry_forever true + diff --git a/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml b/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml index 0ea7a9af6..b8e667398 100644 --- a/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml +++ b/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml @@ -53,6 +53,13 @@ spec: - name: CONTROLLER_TYPE value: "DaemonSet" - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP volumeMounts: - mountPath: C:\ProgramData\docker\containers name: docker-windows-containers diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml index 29533e678..db788a37e 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent.yaml @@ -660,6 +660,10 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName + - name: NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP volumeMounts: - mountPath: C:\ProgramData\docker\containers name: docker-windows-containers diff --git a/kubernetes/windows/Dockerfile b/kubernetes/windows/Dockerfile index c8162b539..06e11e73a 100644 --- a/kubernetes/windows/Dockerfile +++ b/kubernetes/windows/Dockerfile @@ -56,6 +56,9 @@ COPY ./omsagentwindows/out_oms.so /opt/omsagentwindows/out_oms.so # copy fluent, fluent-bit and out_oms conf files COPY ./omsagentwindows/installer/conf/fluent.conf /etc/fluent/ +# copy fluent docker and cri parser conf files +COPY ./omsagentwindows/installer/conf/fluent-cri-parser.conf /etc/fluent/ +COPY ./omsagentwindows/installer/conf/fluent-docker-parser.conf /etc/fluent/ COPY ./omsagentwindows/installer/conf/fluent-bit.conf /etc/fluent-bit COPY ./omsagentwindows/installer/conf/out_oms.conf /etc/omsagentwindows diff --git a/kubernetes/windows/main.ps1 b/kubernetes/windows/main.ps1 index b7ddfa8e7..de82722ad 100644 --- a/kubernetes/windows/main.ps1 +++ b/kubernetes/windows/main.ps1 @@ -1,34 +1,51 @@ -function Confirm-WindowsServiceExists($name) -{ - if (Get-Service $name -ErrorAction SilentlyContinue) +Add-Type @" + using System; + using System.Net; + using System.Net.Security; + using System.Security.Cryptography.X509Certificates; + public class ServerCertificateValidationCallback { + public static void Ignore() + { + ServicePointManager.ServerCertificateValidationCallback += + delegate + ( + Object obj, + X509Certificate certificate, + X509Chain chain, + SslPolicyErrors errors + ) + { + return true; + }; + } + } +"@ +function Confirm-WindowsServiceExists($name) { + if (Get-Service $name -ErrorAction SilentlyContinue) { return $true } return $false } -function Remove-WindowsServiceIfItExists($name) -{ +function Remove-WindowsServiceIfItExists($name) { $exists = Confirm-WindowsServiceExists $name - if ($exists) - { + if ($exists) { sc.exe \\server delete $name } } -function Start-FileSystemWatcher -{ +function Start-FileSystemWatcher { Start-Process powershell -NoNewWindow .\filesystemwatcher.ps1 } #register fluentd as a windows service -function Set-EnvironmentVariables -{ +function Set-EnvironmentVariables { $domain = "opinsights.azure.com" if (Test-Path /etc/omsagent-secret/DOMAIN) { # TODO: Change to omsagent-secret before merging - $domain = Get-Content /etc/omsagent-secret/DOMAIN + $domain = Get-Content /etc/omsagent-secret/DOMAIN } # Set DOMAIN @@ -38,7 +55,7 @@ function Set-EnvironmentVariables $wsID = "" if (Test-Path /etc/omsagent-secret/WSID) { # TODO: Change to omsagent-secret before merging - $wsID = Get-Content /etc/omsagent-secret/WSID + $wsID = Get-Content /etc/omsagent-secret/WSID } # Set DOMAIN @@ -48,7 +65,7 @@ function Set-EnvironmentVariables $wsKey = "" if (Test-Path /etc/omsagent-secret/KEY) { # TODO: Change to omsagent-secret before merging - $wsKey = Get-Content /etc/omsagent-secret/KEY + $wsKey = Get-Content /etc/omsagent-secret/KEY } # Set KEY @@ -58,7 +75,7 @@ function Set-EnvironmentVariables $proxy = "" if (Test-Path /etc/omsagent-secret/PROXY) { # TODO: Change to omsagent-secret before merging - $proxy = Get-Content /etc/omsagent-secret/PROXY + $proxy = Get-Content /etc/omsagent-secret/PROXY Write-Host "Validating the proxy configuration since proxy configuration provided" # valide the proxy endpoint configuration if (![string]::IsNullOrEmpty($proxy)) { @@ -66,26 +83,22 @@ function Set-EnvironmentVariables if (![string]::IsNullOrEmpty($proxy)) { $proxy = [string]$proxy.Trim(); $parts = $proxy -split "@" - if ($parts.Length -ne 2) - { + if ($parts.Length -ne 2) { Write-Host "Invalid ProxyConfiguration $($proxy). EXITING....." exit 1 } $subparts1 = $parts[0] -split "//" - if ($subparts1.Length -ne 2) - { + if ($subparts1.Length -ne 2) { Write-Host "Invalid ProxyConfiguration $($proxy). EXITING....." exit 1 } $protocol = $subparts1[0].ToLower().TrimEnd(":") - if (!($protocol -eq "http") -and !($protocol -eq "https")) - { + if (!($protocol -eq "http") -and !($protocol -eq "https")) { Write-Host "Unsupported protocol in ProxyConfiguration $($proxy). EXITING....." exit 1 } $subparts2 = $parts[1] -split ":" - if ($subparts2.Length -ne 2) - { + if ($subparts2.Length -ne 2) { Write-Host "Invalid ProxyConfiguration $($proxy). EXITING....." exit 1 } @@ -118,46 +131,154 @@ function Set-EnvironmentVariables .\setenv.ps1 } -function Start-Fluent -{ +function Get-ContainerRuntime { + # default container runtime and make default as containerd when containerd becomes default in AKS + $containerRuntime = "docker" + $response = "" + $NODE_IP = "" + try { + if (![string]::IsNullOrEmpty([System.Environment]::GetEnvironmentVariable("NODE_IP", "PROCESS"))) { + $NODE_IP = [System.Environment]::GetEnvironmentVariable("NODE_IP", "PROCESS") + } + elseif (![string]::IsNullOrEmpty([System.Environment]::GetEnvironmentVariable("NODE_IP", "USER"))) { + $NODE_IP = [System.Environment]::GetEnvironmentVariable("NODE_IP", "USER") + } + elseif (![string]::IsNullOrEmpty([System.Environment]::GetEnvironmentVariable("NODE_IP", "MACHINE"))) { + $NODE_IP = [System.Environment]::GetEnvironmentVariable("NODE_IP", "MACHINE") + } + + if (![string]::IsNullOrEmpty($NODE_IP)) { + $isPodsAPISuccess = $false + Write-Host "Value of NODE_IP environment variable : $($NODE_IP)" + try { + Write-Host "Making API call to http://$($NODE_IP):10255/pods" + $response = Invoke-WebRequest -uri http://$($NODE_IP):10255/pods -UseBasicParsing + Write-Host "Response status code of API call to http://$($NODE_IP):10255/pods : $($response.StatusCode)" + } + catch { + Write-Host "API call to http://$($NODE_IP):10255/pods failed" + } + + if (![string]::IsNullOrEmpty($response) -and $response.StatusCode -eq 200) { + Write-Host "API call to http://$($NODE_IP):10255/pods succeeded" + $isPodsAPISuccess = $true + } + else { + try { + Write-Host "Making API call to https://$($NODE_IP):10250/pods" + # ignore certificate validation since kubelet uses self-signed cert + [ServerCertificateValidationCallback]::Ignore() + $response = Invoke-WebRequest -Uri https://$($NODE_IP):10250/pods -Headers @{'Authorization' = "Bearer $(Get-Content /var/run/secrets/kubernetes.io/serviceaccount/token)" } -UseBasicParsing + Write-Host "Response status code of API call to https://$($NODE_IP):10250/pods : $($response.StatusCode)" + if (![string]::IsNullOrEmpty($response) -and $response.StatusCode -eq 200) { + Write-Host "API call to https://$($NODE_IP):10250/pods succeeded" + $isPodsAPISuccess = $true + } + } + catch { + Write-Host "API call to https://$($NODE_IP):10250/pods failed" + } + } + + if ($isPodsAPISuccess) { + if (![string]::IsNullOrEmpty($response.Content)) { + $podList = $response.Content | ConvertFrom-Json + if (![string]::IsNullOrEmpty($podList)) { + $podItems = $podList.Items + if ($podItems.Length -gt 0) { + Write-Host "found pod items: $($podItems.Length)" + for ($index = 0; $index -le $podItems.Length ; $index++) { + Write-Host "current podItem index : $($index)" + $pod = $podItems[$index] + if (![string]::IsNullOrEmpty($pod) -and + ![string]::IsNullOrEmpty($pod.status) -and + ![string]::IsNullOrEmpty($pod.status.phase) -and + $pod.status.phase -eq "Running" -and + $pod.status.ContainerStatuses.Length -gt 0) { + $containerID = $pod.status.ContainerStatuses[0].containerID + $detectedContainerRuntime = $containerID.split(":")[0].trim() + Write-Host "detected containerRuntime as : $($detectedContainerRuntime)" + if (![string]::IsNullOrEmpty($detectedContainerRuntime) -and [string]$detectedContainerRuntime.StartsWith('docker') -eq $false) { + $containerRuntime = $detectedContainerRuntime + } + Write-Host "using containerRuntime as : $($containerRuntime)" + break + } + } + } + else { + Write-Host "got podItems count is 0 hence using default container runtime: $($containerRuntime)" + } + + + } + else { + Write-Host "got podList null or empty hence using default container runtime: $($containerRuntime)" + } + } + else { + Write-Host "got empty response content for /Pods API call hence using default container runtime: $($containerRuntime)" + } + } + } + else { + Write-Host "got empty NODE_IP environment variable" + } + # set CONTAINER_RUNTIME env for debug and telemetry purpose + [System.Environment]::SetEnvironmentVariable("CONTAINER_RUNTIME", $containerRuntime, "Process") + [System.Environment]::SetEnvironmentVariable("CONTAINER_RUNTIME", $containerRuntime, "Machine") + } + catch { + $e = $_.Exception + Write-Host $e + Write-Host "exception occured on getting container runtime hence using default container runtime: $($containerRuntime)" + } + + return $containerRuntime +} + +function Start-Fluent { + # Run fluent-bit service first so that we do not miss any logs being forwarded by the fluentd service. # Run fluent-bit as a background job. Switch this to a windows service once fluent-bit supports natively running as a windows service Start-Job -ScriptBlock { Start-Process -NoNewWindow -FilePath "C:\opt\fluent-bit\bin\fluent-bit.exe" -ArgumentList @("-c", "C:\etc\fluent-bit\fluent-bit.conf", "-e", "C:\opt\omsagentwindows\out_oms.so") } + $containerRuntime = Get-ContainerRuntime + #register fluentd as a service and start # there is a known issues with win32-service https://github.com/chef/win32-service/issues/70 + if (![string]::IsNullOrEmpty($containerRuntime) -and [string]$containerRuntime.StartsWith('docker') -eq $false) { + # change parser from docker to cri if the container runtime is not docker + Write-Host "changing parser from Docker to CRI since container runtime : $($containerRuntime) and which is non-docker" + (Get-Content -Path C:/etc/fluent/fluent.conf -Raw) -replace 'fluent-docker-parser.conf','fluent-cri-parser.conf' | Set-Content C:/etc/fluent/fluent.conf + } + fluentd --reg-winsvc i --reg-winsvc-auto-start --winsvc-name fluentdwinaks --reg-winsvc-fluentdopt '-c C:/etc/fluent/fluent.conf -o C:/etc/fluent/fluent.log' Notepad.exe | Out-Null } -function Generate-Certificates -{ +function Generate-Certificates { Write-Host "Generating Certificates" C:\\opt\\omsagentwindows\\certgenerator\\certificategenerator.exe } -function Test-CertificatePath -{ +function Test-CertificatePath { $certLocation = $env:CI_CERT_LOCATION - $keyLocation = $env:CI_KEY_LOCATION - if (!(Test-Path $certLocation)) - { + $keyLocation = $env:CI_KEY_LOCATION + if (!(Test-Path $certLocation)) { Write-Host "Certificate file not found at $($certLocation). EXITING....." exit 1 } - else - { + else { Write-Host "Certificate file found at $($certLocation)" } - if (! (Test-Path $keyLocation)) - { + if (! (Test-Path $keyLocation)) { Write-Host "Key file not found at $($keyLocation). EXITING...." exit 1 } - else - { + else { Write-Host "Key file found at $($keyLocation)" } } @@ -172,7 +293,7 @@ Test-CertificatePath Start-Fluent # List all powershell processes running. This should have main.ps1 and filesystemwatcher.ps1 -Get-WmiObject Win32_process | Where-Object {$_.Name -match 'powershell'} | Format-Table -Property Name, CommandLine, ProcessId +Get-WmiObject Win32_process | Where-Object { $_.Name -match 'powershell' } | Format-Table -Property Name, CommandLine, ProcessId #check if fluentd service is running Get-Service fluentdwinaks