Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 57 additions & 13 deletions datadog_sync/model/synthetics_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# Copyright 2019 Datadog, Inc.

from __future__ import annotations
from copy import deepcopy
from typing import TYPE_CHECKING, Optional, List, Dict, Tuple, cast

from datadog_sync.utils.base_resource import BaseResource, ResourceConfig, TaggingConfig
Expand Down Expand Up @@ -33,18 +34,19 @@ class SyntheticsTests(BaseResource):
},
base_path="/api/v1/synthetics/tests",
excluded_attributes=[
"deleted_at",
"org_id",
"public_id",
"monitor_id",
"modified_at",
"created_at",
"creator",
"created_by",
"deleted_at",
"mobileApplicationsVersions",
"modified_at",
"modified_by",
"monitor_id",
"org_id",
"public_id",
"overall_state",
"overall_state_modified",
"status", # Exclude status to prevent overwriting manual changes during sync
"stepCount",
"steps.public_id",
],
Expand All @@ -68,10 +70,14 @@ class SyntheticsTests(BaseResource):
browser_test_path: str = "/api/v1/synthetics/tests/browser/{}"
api_test_path: str = "/api/v1/synthetics/tests/api/{}"
mobile_test_path: str = "/api/v1/synthetics/tests/mobile/{}"
get_params = {"include_metadata": "true"}
versions: List = []

async def get_resources(self, client: CustomClient) -> List[Dict]:
resp = await client.get(self.resource_config.base_path)
resp = await client.get(
self.resource_config.base_path,
params=self.get_params,
)
versions = SyntheticsMobileApplicationsVersions(self.config)
self.versions = await versions.get_resources(client)
return resp["tests"]
Expand All @@ -80,21 +86,39 @@ async def import_resource(self, _id: Optional[str] = None, resource: Optional[Di
source_client = self.config.source_client
if _id:
try:
resource = await source_client.get(self.browser_test_path.format(_id))
resource = await source_client.get(
self.browser_test_path.format(_id),
params=self.get_params,
)
except Exception:
try:
resource = await source_client.get(self.api_test_path.format(_id))
resource = await source_client.get(
self.api_test_path.format(_id),
params=self.get_params,
)
except Exception:
resource = await source_client.get(self.mobile_test_path.format(_id))
resource = await source_client.get(
self.mobile_test_path.format(_id),
params=self.get_params,
)

resource = cast(dict, resource)
_id = resource["public_id"]
if resource.get("type") == "browser":
resource = await source_client.get(self.browser_test_path.format(_id))
resource = await source_client.get(
self.browser_test_path.format(_id),
params=self.get_params,
)
elif resource.get("type") == "api":
resource = await source_client.get(self.api_test_path.format(_id))
resource = await source_client.get(
self.api_test_path.format(_id),
params=self.get_params,
)
elif resource.get("type") == "mobile":
resource = await source_client.get(self.mobile_test_path.format(_id))
resource = await source_client.get(
self.mobile_test_path.format(_id),
params=self.get_params,
)
versions = [
i["id"]
for i in self.versions
Expand All @@ -106,7 +130,15 @@ async def import_resource(self, _id: Optional[str] = None, resource: Optional[Di
return f"{resource['public_id']}#{resource['monitor_id']}", resource

async def pre_resource_action_hook(self, _id, resource: Dict) -> None:
pass
# Inject metadata.disaster_recovery so diff/sync compares source status with
# destination's metadata.disaster_recovery.source_status and triggers update when they differ.
source = self.config.state.source[self.resource_type].get(_id, resource)
source_public_id = source.get("public_id", "")
source_status = (source.get("status") or "live")
resource.setdefault("metadata", {})["disaster_recovery"] = {
"source_public_id": source_public_id,
"source_status": source_status,
}

async def pre_apply_hook(self) -> None:
pass
Expand All @@ -115,16 +147,28 @@ async def create_resource(self, _id: str, resource: Dict) -> Tuple[str, Dict]:
destination_client = self.config.destination_client
test_type = resource["type"]
resource.pop("mobileApplicationsVersions", None)

# Force status to "paused" for new tests to prevent immediate execution
# on destination during failover scenarios. Status can be manually changed after creation.
resource["status"] = "paused"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know you're still working on this, but thought I'd chime in. Customers have brought up this problem with monitors. If they have monitor A, B, and C in R1 and B is muted in R1. Then if we copy A, B, and C to R2 and mute all 3 of them. How do they know which monitors to unmute? (This is solved by global downtime, monitors themselves keep their muted/unmuted status during a copy)

I'm worried the same might happen with synthetics. Synthetics A, B, and C exist in R1 and B is paused. If we pause A, B, and C in R2 how does the customer know to only unpause A and C? My quick suggestion would be tags if we can tag tests at all? If the R1 test is paused just copy it over as is, but if the R1 test is running and sync-cli forcibly pauses it we could add a tag like paused_by:datadog-sync-cli. We also may need to remove that tag in R2 if they end up pausing the test in R1.

Copy link
Member Author

@aletournel aletournel Feb 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👋 Hey Michael,

Thanks for taking a peek, yes it is a rough first draft as I wanted to have a quick proof of concept that it work as expected but I need to add unit tests at the very least.

And to get back to your questions, yes you're definitively right that we need to make the distinction between paused tests in R1 and live tests in R1. And by pausing all the synced tests in R2 we lose that distinction.

This is why as you've already guess we need extra metadata as well. Tags are an option as we do tag tests. But in the RFC, we think for future it's best to have both the source_status and the source_public_id. And after discussing with folks managing the Synthetics API and the data model suggested in the RFC, we have planned as of today to use a new dedicated field for this so that the metadata can be enrich for the Edge DCs.

Those metadata will get set and updated by datasync but will not be visible on Synthetics UI out of the box unlike tags. But we could add a banner to say "This test in managed by datadog-sync-cli and is a replication of test X in dc Y. It will be unpaused automatically during a DDR failover event".

Copy link
Member Author

@aletournel aletournel Feb 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been meaning to ask though if should put this behavior (Pausing the s8s test on creation + and keeping metadata up to date) behind a flags like datadog-sync sync --ddr --config config or a dedicated command like datadog-sync ddr-sync --config config. Because in case datadog-sync is used to migrate a customer from one DC to another we may not want this behavior.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👋 Hey Michael,

Thanks for taking a peek, yes it is a rough first draft as I wanted to have a quick proof of concept that it work as expected but I need to add unit tests at the very least.

And to get back to your questions, yes you're definitively right that we need to make the distinction between paused tests in R1 and live tests in R1. And by pausing all the synced tests in R2 we lose that distinction.

This is why as you've already guess we need extra metadata as well. Tags are an option as we do tag tests. But in the RFC, we think for future it's best to have both the source_status and the source_public_id. And after discussing with folks managing the Synthetics API and the data model suggested in the RFC, we have planned as of today to use a new dedicated field for this so that the metadata can be enrich for the Edge DCs.

Those metadata will get set and updated by datasync but will not be visible on Synthetics UI out of the box unlike tags. But we could add a banner to say "This test in managed by datadog-sync-cli and is a replication of test X in dc Y. It will be unpaused automatically during a DDR failover event".

Yeah, anything for the users to know what was running/paused in R1 would be good.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been meaning to ask though if should put this behavior (Pausing the s8s test on creation + and keeping metadata up to date) behind a flags like datadog-sync sync --ddr --config config or a dedicated command like datadog-sync ddr-sync --config config. Because in case datadog-sync is used to migrate a customer from one DC to another we may not want this behavior.

Our policy (while not overly popular) has been to treat sync-cli's default behavior as a tool for DDR, and build in flags to allow migration unrelated to DDR. So I'd say have the tests pause by default and provide a flag like --retain-synthetic-tests-state to stop that behavior.


resp = await destination_client.post(self.resource_config.base_path + f"/{test_type}", resource)
# Persist metadata in state so destination JSON has it and diffs compare correctly.
if resource.get("metadata"):
resp.setdefault("metadata", {}).update(deepcopy(resource["metadata"]))
return _id, resp

async def update_resource(self, _id: str, resource: Dict) -> Tuple[str, Dict]:
destination_client = self.config.destination_client
resource.pop("mobileApplicationsVersions", None)

resp = await destination_client.put(
self.resource_config.base_path + f"/{self.config.state.destination[self.resource_type][_id]['public_id']}",
resource,
)
# Persist metadata in state so destination JSON has it and diffs compare correctly.
if resource.get("metadata"):
resp.setdefault("metadata", {}).update(deepcopy(resource["metadata"]))
return _id, resp

async def delete_resource(self, _id: str) -> None:
Expand Down
Empty file added tests/integration/__init__.py
Empty file.
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ interactions:
Content-Type:
- application/json
method: GET
uri: https://api.datadoghq.eu/api/v1/synthetics/tests
uri: https://api.datadoghq.eu/api/v1/synthetics/tests?include_metadata=true
response:
body:
string: '{"tests": [{"public_id": "njk-avc-mjc", "name": "Test on www.datadoghq.com",
Expand Down Expand Up @@ -115,7 +115,7 @@ interactions:
Content-Type:
- application/json
method: GET
uri: https://api.datadoghq.eu/api/v1/synthetics/tests/api/njk-avc-mjc
uri: https://api.datadoghq.eu/api/v1/synthetics/tests/api/njk-avc-mjc?include_metadata=true
response:
body:
string: '{"public_id": "njk-avc-mjc", "name": "Test on www.datadoghq.com", "status":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ interactions:
Content-Type:
- application/json
method: GET
uri: https://api.datadoghq.eu/api/v1/synthetics/tests
uri: https://api.datadoghq.eu/api/v1/synthetics/tests?include_metadata=true
response:
body:
string: '{"tests": [{"public_id": "njk-avc-mjc", "name": "Test on www.datadoghq.com",
Expand Down Expand Up @@ -135,7 +135,7 @@ interactions:
Content-Type:
- application/json
method: GET
uri: https://api.datadoghq.eu/api/v1/synthetics/tests/api/njk-avc-mjc
uri: https://api.datadoghq.eu/api/v1/synthetics/tests/api/njk-avc-mjc?include_metadata=true
response:
body:
string: '{"public_id": "njk-avc-mjc", "name": "Test on www.datadoghq.com", "status":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ interactions:
Content-Type:
- application/json
method: GET
uri: https://api.datadoghq.eu/api/v1/synthetics/tests
uri: https://api.datadoghq.eu/api/v1/synthetics/tests?include_metadata=true
response:
body:
string: '{"tests": [{"public_id": "njk-avc-mjc", "name": "Test on www.datadoghq.com",
Expand Down Expand Up @@ -115,7 +115,7 @@ interactions:
Content-Type:
- application/json
method: GET
uri: https://api.datadoghq.eu/api/v1/synthetics/tests/api/njk-avc-mjc
uri: https://api.datadoghq.eu/api/v1/synthetics/tests/api/njk-avc-mjc?include_metadata=true
response:
body:
string: '{"public_id": "njk-avc-mjc", "name": "Test on www.datadoghq.com", "status":
Expand All @@ -142,7 +142,7 @@ interactions:
code: 200
message: OK
- request:
body: '{"name": "Test on www.datadoghq.com", "status": "live", "type": "api",
body: '{"name": "Test on www.datadoghq.com", "type": "api",
"subtype": "http", "tags": ["managed_by:datadog-sync"], "config": {"assertions":
[{"operator": "lessThan", "type": "responseTime", "target": 1000}], "request":
{"method": "GET", "url": "https://www.datadoghq.com"}}, "message": "", "options":
Expand All @@ -154,7 +154,7 @@ interactions:
"aws:eu-south-1", "aws:eu-west-1", "aws:eu-west-2", "aws:eu-west-3", "aws:me-south-1",
"aws:sa-east-1", "aws:us-east-1", "aws:us-east-2", "aws:us-west-1", "aws:us-west-2",
"azure:eastus", "gcp:asia-northeast1", "gcp:europe-west3", "gcp:us-east4", "gcp:us-south1",
"gcp:us-west1", "gcp:us-west2"]}'
"gcp:us-west1", "gcp:us-west2"], "metadata": {"disaster_recovery": {"source_public_id": "njk-avc-mjc", "source_status": "live"}}, "status": "paused"}'
headers:
Content-Type:
- application/json
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ interactions:
Content-Type:
- application/json
method: GET
uri: https://api.datadoghq.eu/api/v1/synthetics/tests
uri: https://api.datadoghq.eu/api/v1/synthetics/tests?include_metadata=true
response:
body:
string: '{"tests": [{"public_id": "njk-avc-mjc", "name": "Test on www.datadoghq.com",
Expand Down Expand Up @@ -135,7 +135,7 @@ interactions:
Content-Type:
- application/json
method: GET
uri: https://api.datadoghq.eu/api/v1/synthetics/tests/api/njk-avc-mjc
uri: https://api.datadoghq.eu/api/v1/synthetics/tests/api/njk-avc-mjc?include_metadata=true
response:
body:
string: '{"public_id": "njk-avc-mjc", "name": "Test on www.datadoghq.com", "status":
Expand Down Expand Up @@ -182,7 +182,7 @@ interactions:
code: 200
message: OK
- request:
body: '{"name": "Test on www.datadoghq.com", "status": "live", "type": "api",
body: '{"name": "Test on www.datadoghq.com", "type": "api",
"subtype": "http", "tags": ["managed_by:datadog-sync"], "config": {"assertions":
[{"operator": "lessThan", "type": "responseTime", "target": 1000}], "request":
{"method": "GET", "url": "https://www.datadoghq.com"}}, "message": "", "options":
Expand All @@ -194,7 +194,7 @@ interactions:
"aws:eu-south-1", "aws:eu-west-1", "aws:eu-west-2", "aws:eu-west-3", "aws:me-south-1",
"aws:sa-east-1", "aws:us-east-1", "aws:us-east-2", "aws:us-west-1", "aws:us-west-2",
"azure:eastus", "gcp:asia-northeast1", "gcp:europe-west3", "gcp:us-east4", "gcp:us-south1",
"gcp:us-west1", "gcp:us-west2"]}'
"gcp:us-west1", "gcp:us-west2"], "metadata": {"disaster_recovery": {"source_public_id": "njk-avc-mjc", "source_status": "live"}}, "status": "paused"}'
headers:
Content-Type:
- application/json
Expand Down
Loading
Loading