-
Notifications
You must be signed in to change notification settings - Fork 1
[SVLS-8757] Add instance enhanced metric in Azure Functions #114
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
9fb66fd
0bd9a30
0504362
25c1d5a
ccb697d
6c8537e
c2799f6
4c46f88
6e39d78
1c7cbde
77895c7
ba56366
e2adcff
4172461
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| [package] | ||
| name = "datadog-metrics-collector" | ||
| version = "0.1.0" | ||
| edition.workspace = true | ||
| license.workspace = true | ||
| description = "Collector to read, compute, and submit enhanced metrics in Serverless environments" | ||
|
|
||
| [dependencies] | ||
| dogstatsd = { path = "../dogstatsd", default-features = true } | ||
| tracing = { version = "0.1", default-features = false } | ||
| libdd-common = { git = "https://github.com/DataDog/libdatadog", rev = "8c88979985154d6d97c0fc2ca9039682981eacad", default-features = false } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,176 @@ | ||
| // Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ | ||
| // SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| //! Instance identity metric collector for Azure Functions. | ||
| //! | ||
| //! Submits `azure.functions.enhanced.instance` with value 1.0 on each | ||
| //! collection tick, tagged with the instance identifier. | ||
|
|
||
| use dogstatsd::aggregator::AggregatorHandle; | ||
| use dogstatsd::metric::{Metric, MetricValue, SortedTags}; | ||
| use std::env; | ||
| use tracing::{error, warn}; | ||
|
|
||
| const INSTANCE_METRIC: &str = "azure.functions.enhanced.instance"; | ||
|
|
||
| /// Resolves the instance ID from explicit values (used by tests). | ||
| /// | ||
| /// Picks the env var that matches the Azure integration metric's `instance` | ||
| /// tag for the current hosting plan with fallback logic | ||
| /// if the preferred source is empty. | ||
| fn resolve_instance_id_from( | ||
| website_sku: Option<&str>, | ||
| container_name: Option<&str>, | ||
| website_pod_name: Option<&str>, | ||
| computer_name: Option<&str>, | ||
| ) -> Option<String> { | ||
| fn non_empty(s: Option<&str>) -> Option<&str> { | ||
| s.filter(|v| !v.is_empty()) | ||
| } | ||
|
|
||
| let sku_preferred = match website_sku { | ||
| Some("FlexConsumption") | Some("Dynamic") => { | ||
| non_empty(container_name).or(non_empty(website_pod_name)) | ||
| } | ||
| Some(_) => non_empty(computer_name), | ||
| None => None, | ||
| }; | ||
|
|
||
| sku_preferred | ||
| .or_else(|| non_empty(container_name)) | ||
| .or_else(|| non_empty(website_pod_name)) | ||
| .or_else(|| non_empty(computer_name)) | ||
| .map(|s| s.to_lowercase()) | ||
| } | ||
|
|
||
| /// Resolves the instance ID from environment variables. | ||
| fn resolve_instance_id() -> Option<String> { | ||
| resolve_instance_id_from( | ||
| env::var("WEBSITE_SKU").ok().as_deref(), | ||
| env::var("CONTAINER_NAME").ok().as_deref(), | ||
| env::var("WEBSITE_POD_NAME").ok().as_deref(), | ||
| env::var("COMPUTERNAME").ok().as_deref(), | ||
| ) | ||
|
Comment on lines
+46
to
+53
|
||
| } | ||
|
|
||
| pub struct InstanceMetricsCollector { | ||
| aggregator: AggregatorHandle, | ||
| tags: Option<SortedTags>, | ||
| } | ||
|
|
||
| impl InstanceMetricsCollector { | ||
| /// Creates a new collector, returning `None` if no instance ID is found. | ||
| pub fn new(aggregator: AggregatorHandle, tags: Option<SortedTags>) -> Option<Self> { | ||
| let instance_id = resolve_instance_id(); | ||
| let Some(instance_id) = instance_id else { | ||
| warn!("No instance ID found, instance metric will not be submitted"); | ||
| return None; | ||
| }; | ||
|
|
||
| // Precompute tags: enhanced metrics tags + instance tag | ||
| let instance_tag = format!("instance:{}", instance_id); | ||
| let tags = match tags { | ||
| Some(mut existing) => { | ||
| if let Ok(id_tag) = SortedTags::parse(&instance_tag) { | ||
| existing.extend(&id_tag); | ||
| } | ||
| Some(existing) | ||
| } | ||
| None => SortedTags::parse(&instance_tag).ok(), | ||
| }; | ||
|
|
||
| Some(Self { aggregator, tags }) | ||
| } | ||
|
|
||
| pub fn collect_and_submit(&self) { | ||
| let metric = Metric::new( | ||
| INSTANCE_METRIC.into(), | ||
| MetricValue::gauge(1.0), | ||
| self.tags.clone(), | ||
| None, | ||
| ); | ||
|
|
||
| if let Err(e) = self.aggregator.insert_batch(vec![metric]) { | ||
| error!("Failed to insert instance metric: {}", e); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
|
|
||
| #[test] | ||
| fn test_flex_consumption_uses_container_name() { | ||
| let id = resolve_instance_id_from( | ||
| Some("FlexConsumption"), | ||
| Some("0--abc-DEF"), | ||
| Some("0--abc-DEF"), | ||
| None, | ||
| ); | ||
| assert_eq!(id, Some("0--abc-def".to_string())); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_flex_consumption_falls_back_to_pod_name_if_container_missing() { | ||
| let id = resolve_instance_id_from(Some("FlexConsumption"), None, Some("pod-XYZ"), None); | ||
| assert_eq!(id, Some("pod-xyz".to_string())); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_consumption_uses_container_name() { | ||
| let id = resolve_instance_id_from( | ||
| Some("Dynamic"), | ||
| Some("ABCD1234-111122223333444455"), | ||
| None, | ||
| None, | ||
| ); | ||
| assert_eq!(id, Some("abcd1234-111122223333444455".to_string())); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_elastic_premium_uses_computer_name() { | ||
| let id = | ||
| resolve_instance_id_from(Some("ElasticPremium"), None, None, Some("ep0fakewk0000A1")); | ||
| assert_eq!(id, Some("ep0fakewk0000a1".to_string())); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_dedicated_uses_computer_name() { | ||
| let id = resolve_instance_id_from(Some("PremiumV3"), None, None, Some("p3fakewk0000B2")); | ||
| assert_eq!(id, Some("p3fakewk0000b2".to_string())); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_empty_string_is_treated_as_missing() { | ||
| let id = | ||
| resolve_instance_id_from(Some("ElasticPremium"), Some(""), Some(""), Some("worker-1")); | ||
| assert_eq!(id, Some("worker-1".to_string())); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_unknown_sku_falls_back_to_search_order() { | ||
| let id = resolve_instance_id_from(Some("SomeNewSku"), Some("container-1"), None, None); | ||
| assert_eq!(id, Some("container-1".to_string())); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_missing_sku_falls_back_to_search_order() { | ||
| let id = resolve_instance_id_from(None, Some("container-1"), None, Some("worker-1")); | ||
| assert_eq!(id, Some("container-1".to_string())); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_no_env_vars_returns_none() { | ||
| let id = resolve_instance_id_from(None, None, None, None); | ||
| assert_eq!(id, None); | ||
| } | ||
|
Comment on lines
+103
to
+167
|
||
|
|
||
| // On Windows Consumption we've observed CONTAINER_NAME and WEBSITE_POD_NAME | ||
| // unset but COMPUTERNAME set | ||
| #[test] | ||
| fn test_windows_consumption_falls_through_to_computer_name() { | ||
| let id = resolve_instance_id_from(Some("Dynamic"), None, None, Some("10-20-30-40")); | ||
| assert_eq!(id, Some("10-20-30-40".to_string())); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| // Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ | ||
| // SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| #![cfg_attr(not(test), deny(clippy::panic))] | ||
| #![cfg_attr(not(test), deny(clippy::unwrap_used))] | ||
| #![cfg_attr(not(test), deny(clippy::expect_used))] | ||
| #![cfg_attr(not(test), deny(clippy::todo))] | ||
| #![cfg_attr(not(test), deny(clippy::unimplemented))] | ||
|
|
||
| pub mod instance; | ||
| pub mod tags; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,123 @@ | ||
| // Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ | ||
| // SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| //! Shared tag builder for enhanced metrics. | ||
| //! | ||
| //! Tags are attached to all enhanced metrics submitted by the metrics collector. | ||
|
|
||
| use dogstatsd::metric::SortedTags; | ||
| use libdd_common::{azure_app_services, tag::Tag}; | ||
| use std::env; | ||
| use tracing::warn; | ||
|
|
||
| /// `libdd_common::azure_app_services` returns this value when the corresponding Azure metadata isn't populated. | ||
| const AAS_UNKNOWN_VALUE: &str = "unknown"; | ||
|
|
||
| /// Builds the common tags for all enhanced metrics. | ||
| /// | ||
| /// Sources: | ||
| /// - Azure metadata (resource_group, subscription_id, name) from libdd_common | ||
| /// - Environment variables (region, plan_tier, service, env, version, serverless_compat_version) | ||
| /// | ||
| /// The DogStatsD origin tag (e.g. `origin:azurefunction`) is added by the metrics aggregator, | ||
| /// not here. | ||
| pub fn build_enhanced_metrics_tags() -> Option<SortedTags> { | ||
| let mut pairs: Vec<(&'static str, String)> = Vec::new(); | ||
|
|
||
| if let Some(aas_metadata) = &*azure_app_services::AAS_METADATA_FUNCTION { | ||
| for (name, value) in [ | ||
| ("resource_group", aas_metadata.get_resource_group()), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "resource_group" vs ""aas.resource.group" (used in common metadata)? should we have both? Probably not given the whole cardinality choice, but wondering why to decide one way or another.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had the same confusion initially - we want to use the same tags that integration metrics are using so that we can JOIN them, which is why we don't have the aas* prefix! |
||
| ("subscription_id", aas_metadata.get_subscription_id()), | ||
| ("name", aas_metadata.get_site_name()), | ||
| ] { | ||
| if value != AAS_UNKNOWN_VALUE { | ||
| pairs.push((name, value.to_string())); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| for (tag_name, env_var) in [ | ||
| ("region", "REGION_NAME"), | ||
| ("plan_tier", "WEBSITE_SKU"), | ||
| ("service", "DD_SERVICE"), | ||
| ("env", "DD_ENV"), | ||
| ("version", "DD_VERSION"), | ||
| ("serverless_compat_version", "DD_SERVERLESS_COMPAT_VERSION"), | ||
| ] { | ||
| if let Ok(val) = env::var(env_var) { | ||
| pairs.push((tag_name, val)); | ||
| } | ||
|
Comment on lines
+39
to
+49
|
||
| } | ||
|
|
||
| build_tags(pairs) | ||
| } | ||
|
|
||
| fn build_tags(pairs: impl IntoIterator<Item = (&'static str, String)>) -> Option<SortedTags> { | ||
| let mut tags: Vec<Tag> = Vec::new(); | ||
| for (key, value) in pairs { | ||
| if value.is_empty() { | ||
| continue; | ||
| } | ||
| // Tag::new validates the combined "key:value" string: it must be | ||
| // non-empty and not start or end with a colon | ||
| match Tag::new(key, &value) { | ||
| Ok(t) => tags.push(t), | ||
| Err(e) => warn!("Skipping invalid tag {key}:{value}: {e}"), | ||
| } | ||
| } | ||
| if tags.is_empty() { | ||
| return None; | ||
| } | ||
| let joined = tags | ||
| .iter() | ||
| .map(|t| t.as_ref()) | ||
| .collect::<Vec<&str>>() | ||
| .join(","); | ||
| SortedTags::parse(&joined).ok() | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
|
|
||
| #[test] | ||
| fn test_build_tags_returns_none_when_no_pairs() { | ||
| let pairs: Vec<(&'static str, String)> = Vec::new(); | ||
| assert!(build_tags(pairs).is_none()); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_build_tags_returns_none_when_all_values_empty() { | ||
| let pairs = vec![("service", String::new()), ("env", String::new())]; | ||
| assert!(build_tags(pairs).is_none()); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_build_tags_skips_empty_values() { | ||
| let pairs = vec![("service", String::new()), ("env", "dev".to_string())]; | ||
| let tags = build_tags(pairs).unwrap().to_strings(); | ||
| assert_eq!(tags, vec!["env:dev"]); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_build_tags_includes_all_nonempty_pairs() { | ||
| let pairs = vec![ | ||
| ("service", "svc-1".to_string()), | ||
| ("env", "dev".to_string()), | ||
| ("version", "1.2.3".to_string()), | ||
| ]; | ||
| let mut tags = build_tags(pairs).unwrap().to_strings(); | ||
| tags.sort(); | ||
| assert_eq!(tags, vec!["env:dev", "service:svc-1", "version:1.2.3"]); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_build_tags_rejects_trailing_colon_values() { | ||
| let pairs = vec![ | ||
| ("service", "svc-1:".to_string()), | ||
| ("env", "dev".to_string()), | ||
| ]; | ||
| let tags = build_tags(pairs).unwrap().to_strings(); | ||
| assert_eq!(tags, vec!["env:dev"]); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we use https://github.com/DataDog/libdatadog/blob/main/libdd-common/src/azure_app_services.rs#L243 ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should libddcommon be thinking about website pod name / container name? Will there be potential inconsistencies?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point! I had created a ticket for this actually https://datadoghq.atlassian.net/browse/SVLS-8931 - but this led me to realize that the instance ID used in libddcommon / spans is different from the instance tag on integration metrics.
I compared the env var values to the instance tag on integration metrics across hosting plans and found that in Elastic Premium and Premium plans, the integration metrics actually match the
COMPUTERNAMEenv var rather thanWEBSITE_INSTANCE_IDwhich the spans useAnd for Flex Consumption and Consumption, on spans the instance id is often unknown. I documented my env var investigations here as well as in the ticket above